# regression ultimate code

In [1]:
import numpy as np
import argparse

import os
import math
from math import sin

import json

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
matplotlib.rc( 'savefig', facecolor = 'white' )
from matplotlib import pyplot
%matplotlib inline

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets, models
from torchsummary import summary
torch.manual_seed(0)

i_seed = 0

import sys
sys.path.append('../') # add this line so Data and data are visible in this file
sys.path.append('../../') # add this line so Data and data are visible in this file
sys.path.append('../PatchWand/') # add this line so Data and data are visible in this file

# from PatchWand import *
from plotting_tools import *
from setting import *
# from models import *
# from models_CNN import *
from evaluate import *

from stage3_preprocess import *
# from training_util import *
# from dataset_util import *
from dataIO import *
from stage4_regression import *

from importlib import reload
%load_ext autoreload
%autoreload 2

In [2]:
parser = argparse.ArgumentParser(description='SpO2_estimate')
parser.add_argument('--input_folder', metavar='input_folder', help='input_folder',
                    default='../')
parser.add_argument('--output_folder', metavar='output_folder', help='output_folder',
                    default='../')
parser.add_argument('--training_params_file', metavar='training_params_file', help='training_params_file',
                    default='training_params_list.json')


# checklist 3: comment first line, uncomment second line
args = parser.parse_args(['--input_folder', '../../data/stage4/', 
                          '--output_folder', '../../data/stage5_VO2/',
#                           '--training_params_file', 'training_params_ML.json',
#                           '--training_params_file', 'training_params_baseline.json',
                         ])
# args = parser.parse_args()
print(args)

Namespace(input_folder='../../data/stage4/', output_folder='../../data/stage5_VO2/', training_params_file='training_params_list.json')


In [3]:
inputdir = args.input_folder
outputdir = args.output_folder

if not os.path.exists(outputdir):
    os.makedirs(outputdir)

    

In [4]:
repN = 5
feature_names = ['4feat', '26feat']
# feature_names = ['4feat', '33feat']
regressor_names = ['MLPRegressor', 'XGBRegressor']
aux_weight = 0.3

In [5]:
path_str = inputdir + 'ML_regression/exp_features/rep{}/{}-{}/numeric_results/df_performance_val.csv'


In [6]:
df_performance_merged = pd.DataFrame()

for i_rep in range(repN):
    # for aux_weight in aux_weights:
    for regressor_name in regressor_names:
        for feature_name in feature_names:
        
            print(regressor_name, feature_name, i_rep)


            df_dir = path_str.format(i_rep, regressor_name, feature_name)

            df = pd.read_csv(df_dir)

            if df.shape[0]!=17:
                print('\t', df.shape)
            df['i_rep'] = i_rep
            df['reg-feat'] = '{}-{}'.format(regressor_name, feature_name)

            df_performance_merged = df_performance_merged.append(df)

MLPRegressor 4feat 0
MLPRegressor 26feat 0
XGBRegressor 4feat 0
XGBRegressor 26feat 0
MLPRegressor 4feat 1
MLPRegressor 26feat 1
XGBRegressor 4feat 1
XGBRegressor 26feat 1
MLPRegressor 4feat 2
MLPRegressor 26feat 2
XGBRegressor 4feat 2
XGBRegressor 26feat 2
MLPRegressor 4feat 3
MLPRegressor 26feat 3
XGBRegressor 4feat 3
XGBRegressor 26feat 3
MLPRegressor 4feat 4
MLPRegressor 26feat 4
XGBRegressor 4feat 4
XGBRegressor 26feat 4


# load deep features

In [7]:
regressor_names = ['CardioRespXGBRegression', 'CardioRespRegression']
# feature_names = ['4feat', '26feat']
# feature_names = ['26feat']

target_name = 'VO2_cosmedweighted'

path_str = inputdir + 'DFR_DL1111/exp_features/rep{}/auxweight{}-fixed-{}-HR_patch+RR_cosmed-cardioresp_multiverse-{}-ch=1|spec=1-4dem/numeric_results/df_performance_val_{}.csv'


In [8]:
# df_performance_merged = pd.DataFrame()

for i_rep in range(repN):
    # for aux_weight in aux_weights:
    for regressor_name in regressor_names:
        for feature_name in feature_names:
        
            print(regressor_name, feature_name, i_rep)


            # df_dir = path_str.format(i_rep, regressor_name, feature_name)
            
            df_dir = path_str.format(i_rep, aux_weight, feature_name, regressor_name, target_name)
            df = pd.read_csv(df_dir)

            if df.shape[0]!=17:
                print('\t', df.shape)
            df['i_rep'] = i_rep
            df['reg-feat'] = '{}-{}'.format(regressor_name, feature_name)

            df_performance_merged = df_performance_merged.append(df)

CardioRespXGBRegression 4feat 0
CardioRespXGBRegression 26feat 0
CardioRespRegression 4feat 0
CardioRespRegression 26feat 0
CardioRespXGBRegression 4feat 1
CardioRespXGBRegression 26feat 1
CardioRespRegression 4feat 1
CardioRespRegression 26feat 1
CardioRespXGBRegression 4feat 2
CardioRespXGBRegression 26feat 2
CardioRespRegression 4feat 2
CardioRespRegression 26feat 2
CardioRespXGBRegression 4feat 3
CardioRespXGBRegression 26feat 3
CardioRespRegression 4feat 3
CardioRespRegression 26feat 3
CardioRespXGBRegression 4feat 4
CardioRespXGBRegression 26feat 4
CardioRespRegression 4feat 4
CardioRespRegression 26feat 4


In [9]:
metric_name = 'PCC'
metric_name = 'PCC'
metric_name = 'rmse'

df_table = df_performance_merged.groupby(['reg-feat', 'i_rep']).mean()[[metric_name]]
df_table

Unnamed: 0_level_0,Unnamed: 1_level_0,rmse
reg-feat,i_rep,Unnamed: 2_level_1
CardioRespRegression-26feat,0,2.98436
CardioRespRegression-26feat,1,2.766578
CardioRespRegression-26feat,2,2.644478
CardioRespRegression-26feat,3,2.96043
CardioRespRegression-26feat,4,2.979141
CardioRespRegression-4feat,0,3.285136
CardioRespRegression-4feat,1,2.879589
CardioRespRegression-4feat,2,2.877258
CardioRespRegression-4feat,3,2.573298
CardioRespRegression-4feat,4,2.716936


In [10]:
df_table_mean = df_table.reset_index(['reg-feat', 'i_rep']).groupby(['reg-feat']).mean()[[metric_name]]
df_table_std = df_table.reset_index(['reg-feat', 'i_rep']).groupby(['reg-feat']).std()[[metric_name]]

df_table_mean =  df_table_mean.reset_index(['reg-feat'])
df_table_std =  df_table_std.reset_index(['reg-feat'])

display(df_table_mean)
display(df_table_std)


Unnamed: 0,reg-feat,rmse
0,CardioRespRegression-26feat,2.866997
1,CardioRespRegression-4feat,2.866443
2,CardioRespXGBRegression-26feat,2.443798
3,CardioRespXGBRegression-4feat,2.507274
4,MLPRegressor-26feat,2.998763
5,MLPRegressor-4feat,5.133875
6,XGBRegressor-26feat,2.703141
7,XGBRegressor-4feat,4.152511


Unnamed: 0,reg-feat,rmse
0,CardioRespRegression-26feat,0.15385
1,CardioRespRegression-4feat,0.266402
2,CardioRespXGBRegression-26feat,0.051641
3,CardioRespXGBRegression-4feat,0.052936
4,MLPRegressor-26feat,0.081911
5,MLPRegressor-4feat,1.963495
6,XGBRegressor-26feat,0.061827
7,XGBRegressor-4feat,0.005709


In [11]:
df_table_merged = pd.DataFrame()

for reg_feat in df_table_mean['reg-feat'].unique():
    # print(aux_task)
    
    merged_mean = df_table_mean[df_table_mean['reg-feat']==reg_feat][metric_name].values[0]
    merged_std = df_table_std[df_table_mean['reg-feat']==reg_feat][metric_name].values[0]
    
    performance_dict = {
        'reg_feat': reg_feat,
        'merged_stats': '{:.2f}±{:.2f}'.format(merged_mean, merged_std),
    }
    
    df_table_merged = df_table_merged.append(pd.DataFrame(performance_dict, index=[0]))
    
df_table_merged

Unnamed: 0,reg_feat,merged_stats
0,CardioRespRegression-26feat,2.87±0.15
0,CardioRespRegression-4feat,2.87±0.27
0,CardioRespXGBRegression-26feat,2.44±0.05
0,CardioRespXGBRegression-4feat,2.51±0.05
0,MLPRegressor-26feat,3.00±0.08
0,MLPRegressor-4feat,5.13±1.96
0,XGBRegressor-26feat,2.70±0.06
0,XGBRegressor-4feat,4.15±0.01
