# regression ultimate code

In [1]:
import numpy as np
import argparse

import os
import math
from math import sin

import json

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import seaborn as sns

import matplotlib
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
matplotlib.rc( 'savefig', facecolor = 'white' )
from matplotlib import pyplot
%matplotlib inline

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms, datasets, models
from torchsummary import summary
torch.manual_seed(0)

i_seed = 0

import sys
sys.path.append('../') # add this line so Data and data are visible in this file
sys.path.append('../../') # add this line so Data and data are visible in this file
sys.path.append('../PatchWand/') # add this line so Data and data are visible in this file

# from PatchWand import *
from plotting_tools import *
from setting import *
# from models import *
# from models_CNN import *
from evaluate import *

from stage3_preprocess import *
# from training_util import *
# from dataset_util import *
from dataIO import *
from stage4_regression import *

from importlib import reload
%load_ext autoreload
%autoreload 2

In [2]:
parser = argparse.ArgumentParser(description='SpO2_estimate')
parser.add_argument('--input_folder', metavar='input_folder', help='input_folder',
                    default='../')
parser.add_argument('--output_folder', metavar='output_folder', help='output_folder',
                    default='../')
parser.add_argument('--training_params_file', metavar='training_params_file', help='training_params_file',
                    default='training_params_list.json')


# checklist 3: comment first line, uncomment second line
args = parser.parse_args(['--input_folder', '../../data/stage4/', 
                          '--output_folder', '../../data/stage5_VO2/',
#                           '--training_params_file', 'training_params_ML.json',
#                           '--training_params_file', 'training_params_baseline.json',
                         ])
# args = parser.parse_args()
print(args)

Namespace(input_folder='../../data/stage4/', output_folder='../../data/stage5_VO2/', training_params_file='training_params_list.json')


In [3]:
inputdir = args.input_folder
outputdir = args.output_folder

if not os.path.exists(outputdir):
    os.makedirs(outputdir)

    

In [4]:

def load_best(inputdir, repN, target_name):
    
    df_performance_best = pd.DataFrame()

    path_str = inputdir + 'DFR_DL1111/exp_weights/rep{}/auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=1|spec=1-4dem/numeric_results/df_performance_val_{}.csv'
    # ablation_name = 'exp_FTMTLCASA'

    for i_rep in range(repN):
        print(i_rep)
        df_dir = path_str.format(i_rep, target_name)

        df = pd.read_csv(df_dir)

        if df.shape[0]!=17:
            print('\t', df.shape)
        df['i_rep'] = i_rep
        # df['ablation_name'] = ablation_name

        df_performance_best = df_performance_best.append(df)
        
    return df_performance_best

# get training params and dataloaders

In [13]:
# i_rep = 0
# aux_weight = 0.1
repN = 5
ablation_names = ['exp_nothing', 'exp_FT', 'exp_MTL', 'exp_FTMTL', 'exp_FTMTLCA']
# aux_weights = [0.001, 0.4, 0.45, 0.495, 0.4995]
# aux_weights = [0.001, 0.01, 0.1, 0.4995]
target_name = 'VO2_cosmedweighted'


ablation_folders = {
    'exp_nothing': 'auxweight0-fixed-4feat-HR_patch+RR_cosmed-cardioresp_shittyverse-CardioRespRegression-ch=0|spec=0-4dem',
    'exp_FT': 'auxweight0-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=0|spec=0-4dem',
    'exp_MTL': 'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_shittyverse-CardioRespRegression-ch=0|spec=0-4dem',
    'exp_FTMTL': 'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=0|spec=0-4dem',
    'exp_FTMTLCA': 'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=0|spec=1-4dem',
    # 'exp_FTMTLCASA': 'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=1|spec=1-4dem',
}

In [14]:
path_str = inputdir + 'DFR_DL1111/ablation/{}/rep{}/'

# /auxweight{}-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespXGBRegression-ch=1|spec=1-4dem/numeric_results/df_performance_val_{}.csv'


In [18]:
folder

'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespXGBRegression-ch=0|spec=1-4dem'

'auxweight0.3-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=0|spec=1-4dem'

In [19]:
df_performance_merged = pd.DataFrame()

for i_rep in range(repN):
    
    for ablation_name in ablation_names:
        
        df_folder = path_str.format(ablation_name, i_rep)
        
        folder = ablation_folders[ablation_name]

        # for folder in os.listdir(df_folder):
        #     print(folder)
        if 'CardioRespXGBRegression' in folder:
            # print(ablation_name, '\t', folder, '\n')
            continue

        df_dir = df_folder + folder + '/numeric_results/df_performance_val_{}.csv'.format(target_name)

        df = pd.read_csv(df_dir)

        if df.shape[0]!=17:
            print('\t', df.shape)
        df['i_rep'] = i_rep
        df['ablation_name'] = ablation_name

        df_performance_merged = df_performance_merged.append(df)


In [20]:
# folder

In [21]:
df_performance_best = load_best(inputdir, repN, target_name)
df_performance_best['ablation_name'] = 'exp_FTMTLCASA'
df_performance_merged = df_performance_merged.append(df_performance_best)


0
1
2
3
4


In [22]:
# df_performance_best

In [23]:
df_performance_merged

Unnamed: 0,CV,task,Rsquared,PCC,rmse,mae,mape,i_rep,ablation_name
0,105,VO2_cosmedweighted,-1.330406,0.587659,3.628489,2.705612,0.400046,0,exp_nothing
1,115,VO2_cosmedweighted,-3.211710,0.845132,8.874121,8.597563,2.346723,0,exp_nothing
2,107,VO2_cosmedweighted,-2.648232,0.639916,6.718734,6.132153,0.752437,0,exp_nothing
3,117,VO2_cosmedweighted,-0.062899,0.880646,3.814043,3.487020,0.511126,0,exp_nothing
4,106,VO2_cosmedweighted,0.215142,0.909167,3.679055,2.931018,0.354930,0,exp_nothing
...,...,...,...,...,...,...,...,...,...
12,111,VO2_cosmedweighted,0.366065,0.896454,2.111966,1.622658,0.204855,4,exp_FTMTLCASA
13,118,VO2_cosmedweighted,0.849035,0.922346,1.834405,1.423136,0.227044,4,exp_FTMTLCASA
14,120,VO2_cosmedweighted,0.595905,0.824527,2.267525,1.648608,0.338391,4,exp_FTMTLCASA
15,121,VO2_cosmedweighted,0.740308,0.930325,2.136204,1.819393,0.301105,4,exp_FTMTLCASA


In [24]:
metric_name = 'PCC'
metric_name = 'PCC'
metric_name = 'rmse'

df_table = df_performance_merged.groupby(['ablation_name', 'i_rep']).mean()[[metric_name]]


In [25]:
df_table

Unnamed: 0_level_0,Unnamed: 1_level_0,rmse
ablation_name,i_rep,Unnamed: 2_level_1
exp_FT,0,3.354683
exp_FT,1,3.258643
exp_FT,2,3.202298
exp_FT,3,3.125854
exp_FT,4,3.537141
exp_FTMTL,0,3.026947
exp_FTMTL,1,2.487669
exp_FTMTL,2,3.064186
exp_FTMTL,3,2.813796
exp_FTMTL,4,2.889742


In [26]:
df_table_mean = df_table.reset_index(['ablation_name', 'i_rep']).groupby(['ablation_name']).mean()[[metric_name]]
df_table_std = df_table.reset_index(['ablation_name', 'i_rep']).groupby(['ablation_name']).std()[[metric_name]]

df_table_mean =  df_table_mean.reset_index(['ablation_name'])
df_table_std =  df_table_std.reset_index(['ablation_name'])

display(df_table_mean)
display(df_table_std)


Unnamed: 0,ablation_name,rmse
0,exp_FT,3.295724
1,exp_FTMTL,2.856468
2,exp_FTMTLCA,2.908311
3,exp_FTMTLCASA,2.779402
4,exp_MTL,4.01445
5,exp_nothing,3.910469


Unnamed: 0,ablation_name,rmse
0,exp_FT,0.15868
1,exp_FTMTL,0.229756
2,exp_FTMTLCA,0.280883
3,exp_FTMTLCASA,0.156834
4,exp_MTL,0.488358
5,exp_nothing,0.312511


In [27]:
df_table_merged = pd.DataFrame()

for ablation_name in df_table_mean['ablation_name'].unique():
    # print(aux_task)
    
    merged_mean = df_table_mean[df_table_mean['ablation_name']==ablation_name][metric_name].values[0]
    merged_std = df_table_std[df_table_mean['ablation_name']==ablation_name][metric_name].values[0]
    
    performance_dict = {
        'ablation_name': ablation_name,
        'merged_stats': '{:.2f}±{:.2f}'.format(merged_mean, merged_std),
    }
    
    df_table_merged = df_table_merged.append(pd.DataFrame(performance_dict, index=[0]))
    
df_table_merged.set_index(['ablation_name'])
# .loc[ablation_names]


Unnamed: 0_level_0,merged_stats
ablation_name,Unnamed: 1_level_1
exp_FT,3.30±0.16
exp_FTMTL,2.86±0.23
exp_FTMTLCA,2.91±0.28
exp_FTMTLCASA,2.78±0.16
exp_MTL,4.01±0.49
exp_nothing,3.91±0.31


In [28]:
repN = 5
ablation_names = ['exp_MTL', 'exp_FTMTL', 'exp_FTMTLCA']
target_names = ['merged-HR_patch', 'merged-RR_cosmed']

In [29]:
path_str = inputdir + 'DFR_DL1111/ablation/{}/rep{}/'

In [30]:
df_performance_merged = pd.DataFrame()

for i_rep in range(repN):
    
    for ablation_name in ablation_names:
        for target_name in target_names:
        
            df_folder = path_str.format(ablation_name, i_rep)
            folder = ablation_folders[ablation_name]

            # for folder in os.listdir(df_folder):
            if 'CardioRespXGBRegression' in folder:
                # print(ablation_name, '\t', folder, '\n')
                continue

            df_dir = df_folder + folder + '/numeric_results/df_performance_val_{}.csv'.format(target_name)

            df = pd.read_csv(df_dir)

            if df.shape[0]!=17:
                print('\t', df.shape)
            df['i_rep'] = i_rep
            df['ablation_target'] = ablation_name+'-'+target_name

            df_performance_merged = df_performance_merged.append(df)


In [31]:
df_performance_merged

Unnamed: 0,CV,task,Rsquared,PCC,rmse,mae,mape,i_rep,ablation_target
0,105,merged-HR_patch,0.533487,0.909348,11.306383,9.072909,0.102885,0,exp_MTL-merged-HR_patch
1,115,merged-HR_patch,0.886911,0.979187,6.201058,3.727732,0.028661,0,exp_MTL-merged-HR_patch
2,107,merged-HR_patch,0.937919,0.980398,4.070647,3.001665,0.037436,0,exp_MTL-merged-HR_patch
3,117,merged-HR_patch,0.843476,0.935155,6.551111,4.558159,0.038187,0,exp_MTL-merged-HR_patch
4,106,merged-HR_patch,0.719970,0.970013,8.440413,7.267683,0.066591,0,exp_MTL-merged-HR_patch
...,...,...,...,...,...,...,...,...,...
12,111,merged-RR_cosmed,0.722205,0.881911,2.037602,1.721632,0.077653,4,exp_FTMTLCA-merged-RR_cosmed
13,118,merged-RR_cosmed,0.603582,0.916596,4.281392,3.453451,0.153335,4,exp_FTMTLCA-merged-RR_cosmed
14,120,merged-RR_cosmed,0.830025,0.928551,1.132492,0.861091,0.047413,4,exp_FTMTLCA-merged-RR_cosmed
15,121,merged-RR_cosmed,-1.600242,0.834166,6.784829,6.246900,0.336172,4,exp_FTMTLCA-merged-RR_cosmed


In [32]:
for target_name in target_names:
    df_performance_best = load_best(inputdir, repN, target_name)
    df_performance_best['ablation_target'] = 'exp_FTMTLCASA'+'-'+target_name

    # df_performance_best['ablation_name'] = ablation_name
    df_performance_merged = df_performance_merged.append(df_performance_best)


0
1
2
3
4
0
1
2
3
4


In [33]:
# path_str = inputdir + 'DFR_DL1111/exp_weights/rep{}/auxweight0.2-fixed-4feat-HR_patch+RR_cosmed-cardioresp_multiverse-CardioRespRegression-ch=1|spec=1-4dem/numeric_results/df_performance_val_{}.csv'
# ablation_name = 'exp_FTMTLCASA'

# for i_rep in range(repN):
    
#     for target_name in target_names:

#         print(i_rep, target_name)

#         df_dir = path_str.format(i_rep, target_name)

#         df = pd.read_csv(df_dir)

#         if df.shape[0]!=17:
#             print('\t', df.shape)
#         df['i_rep'] = i_rep
#         # df['ablation_name'] = ablation_name
#         df['ablation_target'] = ablation_name+'-'+target_name

#         df_performance_merged = df_performance_merged.append(df)

In [34]:
metric_name = 'PCC'
metric_name = 'PCC'
metric_name = 'rmse'

df_table = df_performance_merged.groupby(['ablation_target', 'i_rep']).mean()[[metric_name]]


In [35]:
df_table

Unnamed: 0_level_0,Unnamed: 1_level_0,rmse
ablation_target,i_rep,Unnamed: 2_level_1
exp_FTMTL-merged-HR_patch,0,3.469577
exp_FTMTL-merged-HR_patch,1,3.516387
exp_FTMTL-merged-HR_patch,2,3.866804
exp_FTMTL-merged-HR_patch,3,3.34772
exp_FTMTL-merged-HR_patch,4,3.79074
exp_FTMTL-merged-RR_cosmed,0,3.7631
exp_FTMTL-merged-RR_cosmed,1,3.966893
exp_FTMTL-merged-RR_cosmed,2,3.443115
exp_FTMTL-merged-RR_cosmed,3,3.999763
exp_FTMTL-merged-RR_cosmed,4,3.6956


In [36]:
df_table_mean = df_table.reset_index(['ablation_target', 'i_rep']).groupby(['ablation_target']).mean()[[metric_name]]
df_table_std = df_table.reset_index(['ablation_target', 'i_rep']).groupby(['ablation_target']).std()[[metric_name]]

df_table_mean =  df_table_mean.reset_index(['ablation_target'])
df_table_std =  df_table_std.reset_index(['ablation_target'])

display(df_table_mean)
display(df_table_std)


Unnamed: 0,ablation_target,rmse
0,exp_FTMTL-merged-HR_patch,3.598246
1,exp_FTMTL-merged-RR_cosmed,3.773694
2,exp_FTMTLCA-merged-HR_patch,4.053961
3,exp_FTMTLCA-merged-RR_cosmed,3.308804
4,exp_FTMTLCASA-merged-HR_patch,4.146885
5,exp_FTMTLCASA-merged-RR_cosmed,3.624812
6,exp_MTL-merged-HR_patch,8.1169
7,exp_MTL-merged-RR_cosmed,4.707028


Unnamed: 0,ablation_target,rmse
0,exp_FTMTL-merged-HR_patch,0.220906
1,exp_FTMTL-merged-RR_cosmed,0.225791
2,exp_FTMTLCA-merged-HR_patch,0.306202
3,exp_FTMTLCA-merged-RR_cosmed,0.307872
4,exp_FTMTLCASA-merged-HR_patch,0.474425
5,exp_FTMTLCASA-merged-RR_cosmed,0.290869
6,exp_MTL-merged-HR_patch,0.589729
7,exp_MTL-merged-RR_cosmed,0.925202


In [37]:
df_table_merged = pd.DataFrame()

for ablation_target in df_table_mean['ablation_target'].unique():
    # print(aux_task)
    
    merged_mean = df_table_mean[df_table_mean['ablation_target']==ablation_target][metric_name].values[0]
    merged_std = df_table_std[df_table_mean['ablation_target']==ablation_target][metric_name].values[0]
    
    performance_dict = {
        'ablation_target': ablation_target,
        'merged_stats': '{:.2f}±{:.2f}'.format(merged_mean, merged_std),
    }
    
    df_table_merged = df_table_merged.append(pd.DataFrame(performance_dict, index=[0]))
    
df_table_merged.set_index(['ablation_target'])


Unnamed: 0_level_0,merged_stats
ablation_target,Unnamed: 1_level_1
exp_FTMTL-merged-HR_patch,3.60±0.22
exp_FTMTL-merged-RR_cosmed,3.77±0.23
exp_FTMTLCA-merged-HR_patch,4.05±0.31
exp_FTMTLCA-merged-RR_cosmed,3.31±0.31
exp_FTMTLCASA-merged-HR_patch,4.15±0.47
exp_FTMTLCASA-merged-RR_cosmed,3.62±0.29
exp_MTL-merged-HR_patch,8.12±0.59
exp_MTL-merged-RR_cosmed,4.71±0.93
