In [7]:
# import torch
import argparse
import joblib
from pathlib import Path
import os

from tqdm import tqdm

import numpy as np
import pandas as pd

from sklearn.model_selection import KFold
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import Lasso,Ridge,ElasticNet
from sklearn.cross_decomposition import PLSRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor as KNN
from sklearn.model_selection import GridSearchCV
import xgboost as XGB

import plotnine as p9

In [145]:
data_path = '/nobackup/users/hmbaghda/metastatic_potential/'

In [394]:
res = pd.read_csv(os.path.join(data_path, 'processed', 'coarse_model_tests.csv'), index_col = 0)
res.groupby('model_type').test_pearson.mean().sort_values(ascending = False)

model_type
PLSR          0.376988
elasticNet    0.370737
svm           0.362755
rf            0.302660
Name: test_pearson, dtype: float64

In [346]:
res_hvg_out = pd.read_csv(os.path.join(data_path, 'processed', 'hvg_coarse_model_tests.csv'), index_col = 0)
res_hvg_out.groupby('model_type').test_pearson.mean().sort_values(ascending = False)

model_type
PLSR          0.396928
svm           0.371084
elasticNet    0.366895
rf            0.336032
knn           0.235397
xgboost       0.221204
Name: test_pearson, dtype: float64

In [390]:
res_hvg_in = pd.read_csv(os.path.join(data_path, 'processed', 'hvg_internal_coarse_model_tests.csv'), index_col = 0)
res_hvg_in.groupby('model_type').test_pearson.mean().sort_values(ascending = False)



model_type
elasticNet    0.376176
svm           0.374258
PLSR          0.323554
rf            0.320223
knn           0.259951
xgboost       0.229400
Name: test_pearson, dtype: float64

In [348]:
res_da = pd.read_csv(os.path.join(data_path, 'processed', 'da_coarse_model_tests.csv'), index_col = 0)
res_da.groupby('model_type').test_pearson.mean().sort_values(ascending = False)


model_type
PLSR     0.390296
ridge    0.366935
Name: test_pearson, dtype: float64

In [378]:
ast.literal_eval(res.loc[idx, 'best_params'])

{'C': 0.001, 'degree': 2, 'gamma': 0.001, 'kernel': 'linear'}

In [381]:
res.head(10)

Unnamed: 0,model_type,fold,train_pearson,test_pearson,best_params
0,PLSR,0,0.999541,0.206836,{'n_components': 12}
1,PLSR,1,0.887889,0.208972,{'n_components': 4}
2,PLSR,2,0.895865,0.464922,{'n_components': 4}
3,PLSR,3,0.900876,0.434596,{'n_components': 4}
4,PLSR,4,0.892195,0.532019,{'n_components': 4}
5,PLSR,5,0.908728,0.275733,{'n_components': 4}
6,PLSR,6,0.999904,0.435449,{'n_components': 14}
7,PLSR,7,0.907147,0.359425,{'n_components': 4}
8,PLSR,8,0.888426,0.499843,{'n_components': 4}
9,PLSR,9,0.892109,0.352088,{'n_components': 4}


In [393]:
res

Unnamed: 0,model_type,fold,train_pearson,test_pearson,best_params
0,PLSR,0,0.999541,0.206836,{'n_components': 12}
1,PLSR,1,0.887889,0.208972,{'n_components': 4}
2,PLSR,2,0.895865,0.464922,{'n_components': 4}
3,PLSR,3,0.900876,0.434596,{'n_components': 4}
4,PLSR,4,0.892195,0.532019,{'n_components': 4}
5,PLSR,5,0.908728,0.275733,{'n_components': 4}
6,PLSR,6,0.999904,0.435449,{'n_components': 14}
7,PLSR,7,0.907147,0.359425,{'n_components': 4}
8,PLSR,8,0.888426,0.499843,{'n_components': 4}
9,PLSR,9,0.892109,0.352088,{'n_components': 4}


In [388]:
for idx in res[res.model_type == 'svm'].index.tolist():
    print(ast.literal_eval(res.loc[idx, 'best_params'])['epsilon'])

KeyError: 'epsilon'

In [391]:
res

Unnamed: 0,model_type,fold,train_pearson,test_pearson,best_params
0,PLSR,0,0.999541,0.206836,{'n_components': 12}
1,PLSR,1,0.887889,0.208972,{'n_components': 4}
2,PLSR,2,0.895865,0.464922,{'n_components': 4}
3,PLSR,3,0.900876,0.434596,{'n_components': 4}
4,PLSR,4,0.892195,0.532019,{'n_components': 4}
5,PLSR,5,0.908728,0.275733,{'n_components': 4}
6,PLSR,6,0.999904,0.435449,{'n_components': 14}
7,PLSR,7,0.907147,0.359425,{'n_components': 4}
8,PLSR,8,0.888426,0.499843,{'n_components': 4}
9,PLSR,9,0.892109,0.352088,{'n_components': 4}
