In [1]:
import pandas as pd
import numpy as np
import itertools
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

### AKI

In [2]:
from blood_panel_data_preprocessing import blood_panel_data, aki_data, sepsis_data

In [3]:
index, data, block, cost = aki_data()

In [4]:
from data_loader import Data_Loader
data_loader = Data_Loader(data, block, test_ratio = 0.2, val_ratio = 0.2, index= index)


In [7]:
X_train = data_loader.train[:,:-1]
X_test = data_loader.test[:,:-1]
X_val = data_loader.val[:,:-1]

y_train = data_loader.train[:,-1]
y_test = data_loader.test[:,-1]
y_val = data_loader.val[:,-1]

In [29]:
from sklearn.ensemble import RandomForestClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = RandomForestClassifier(class_weight='balanced',n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/RFAKI.csv')

100%|███████████████████████████████████████████| 66/66 [11:11<00:00, 10.18s/it]


In [43]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
8,80.0,11.0,0.504432,0.785962,0.43932,0.763929,0.418014,0.462916
2,20.0,11.0,0.500406,0.781915,0.416157,0.756495,0.399061,0.434783
26,260.0,11.0,0.500402,0.792163,0.454321,0.775117,0.439141,0.470588
37,380.0,7.0,0.499345,0.79091,0.452611,0.770829,0.363919,0.598465
13,140.0,7.0,0.499343,0.789083,0.453307,0.770194,0.365777,0.595908


In [None]:
from sklearn.linear_model import LogisticRegression
C =  [0.0001,0.001,0.1,1,10,100]
penalty = ['l1', 'l2', 'elasticnet', 'none']
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(C, penalty))):
    clf = LogisticRegression(class_weight='balanced',C = hp1, penalty = hp2, solver='saga', max_iter=1000, l1_ratio=0.5)
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LRAKI.csv')

In [39]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
12,1.0,l1,0.474725,0.798561,0.452303,0.797081,0.333333,0.703325
11,0.1,none,0.474725,0.798619,0.452675,0.797155,0.333738,0.703325
22,100.0,elasticnet,0.474725,0.79862,0.452675,0.797153,0.333738,0.703325
21,100.0,l2,0.474725,0.798623,0.452675,0.797157,0.333738,0.703325
20,100.0,l1,0.474725,0.798618,0.452675,0.797149,0.333738,0.703325


In [44]:
from xgboost import XGBClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = XGBClassifier(n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/XGAKI.csv')

100%|███████████████████████████████████████████| 66/66 [05:11<00:00,  4.72s/it]


In [45]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
42,440.0,3.0,0.440567,0.78634,0.404605,0.784796,0.56682,0.314578
48,500.0,3.0,0.436681,0.784338,0.411862,0.783264,0.578704,0.319693
36,380.0,3.0,0.436205,0.787949,0.422259,0.784493,0.586364,0.329923
30,320.0,3.0,0.428256,0.788449,0.413223,0.788339,0.584112,0.319693
7,80.0,7.0,0.426667,0.777801,0.391003,0.766269,0.604278,0.289003


In [47]:
from lightgbm import LGBMClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = LGBMClassifier(n_estimators = hp1, max_depth = hp2, class_weight= 'balanced' )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LGAKI.csv')

100%|███████████████████████████████████████████| 66/66 [00:57<00:00,  1.16it/s]


In [48]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
41,380.0,61.0,0.523577,0.790138,0.474453,0.789645,0.452436,0.498721
40,380.0,31.0,0.523577,0.790138,0.474453,0.789645,0.452436,0.498721
39,380.0,21.0,0.523577,0.790138,0.474453,0.789645,0.452436,0.498721
38,380.0,11.0,0.523002,0.794636,0.472906,0.78702,0.456057,0.491049
44,440.0,11.0,0.522314,0.79165,0.465823,0.783818,0.461153,0.470588


### Sepsis

In [49]:
from blood_panel_data_preprocessing import blood_panel_data, aki_data, sepsis_data

In [50]:
data, block, cost = sepsis_data()

In [51]:
from data_loader import Data_Loader
data_loader = Data_Loader(data, block, test_ratio = 0.2, val_ratio = 0.2)


In [52]:
X_train = data_loader.train[:,:-1]
X_test = data_loader.test[:,:-1]
X_val = data_loader.val[:,:-1]

y_train = data_loader.train[:,-1]
y_test = data_loader.test[:,-1]
y_val = data_loader.val[:,-1]

In [53]:
from sklearn.ensemble import RandomForestClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = RandomForestClassifier(class_weight='balanced',n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/RFSepsis.csv')

100%|███████████████████████████████████████████| 66/66 [02:39<00:00,  2.42s/it]


In [54]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
6,80.0,3.0,0.437908,0.7661,0.45614,0.800963,0.374101,0.58427
18,200.0,3.0,0.422442,0.759139,0.480349,0.80931,0.392857,0.617978
61,620.0,7.0,0.421525,0.78838,0.423077,0.830291,0.492537,0.370787
31,320.0,7.0,0.416667,0.790831,0.38961,0.830887,0.461538,0.337079
36,380.0,3.0,0.410774,0.765872,0.477477,0.806856,0.398496,0.595506


In [55]:
from sklearn.linear_model import LogisticRegression
C =  [0.0001,0.001,0.1,1,10,100]
penalty = ['l1', 'l2', 'elasticnet', 'none']
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(C, penalty))):
    clf = LogisticRegression(class_weight='balanced',C = hp1, penalty = hp2, solver='saga', max_iter=1000, l1_ratio=0.5)
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LRSepsis.csv')

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


100%|███████████████████████████████████████████| 24/24 [00:01<00:00, 12.64it/s]


In [56]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
10,0.1,elasticnet,0.455882,0.790852,0.505929,0.824994,0.390244,0.719101
12,1.0,l1,0.455206,0.79139,0.505929,0.823848,0.390244,0.719101
13,1.0,l2,0.455206,0.791566,0.498024,0.823137,0.384146,0.707865
22,100.0,elasticnet,0.455206,0.791514,0.498024,0.822885,0.384146,0.707865
21,100.0,l2,0.455206,0.791493,0.498024,0.822908,0.384146,0.707865


In [57]:
from xgboost import XGBClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = XGBClassifier(n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/XGSepsis.csv')

100%|███████████████████████████████████████████| 66/66 [01:45<00:00,  1.60s/it]


In [58]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
24,260.0,3.0,0.373737,0.781636,0.430769,0.82754,0.682927,0.314607
12,140.0,3.0,0.369231,0.790904,0.459259,0.829466,0.673913,0.348315
6,80.0,3.0,0.366492,0.799561,0.390625,0.832653,0.641026,0.280899
18,200.0,3.0,0.336842,0.783022,0.466165,0.825843,0.704545,0.348315
60,620.0,3.0,0.336634,0.770217,0.415385,0.825086,0.658537,0.303371


In [59]:
from lightgbm import LGBMClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = LGBMClassifier(n_estimators = hp1, max_depth = hp2, class_weight= 'balanced' )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LGSepsis.csv')

100%|███████████████████████████████████████████| 66/66 [00:41<00:00,  1.59it/s]


In [60]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
6,80.0,3.0,0.464183,0.778523,0.5,0.843729,0.410072,0.640449
18,200.0,3.0,0.463415,0.778957,0.502415,0.846549,0.440678,0.58427
12,140.0,3.0,0.461538,0.780695,0.497696,0.846595,0.421875,0.606742
24,260.0,3.0,0.461538,0.776723,0.525253,0.842055,0.477064,0.58427
30,320.0,3.0,0.441558,0.775792,0.529101,0.841023,0.5,0.561798


### BP

In [61]:
data, block, cost = blood_panel_data()

In [62]:
from data_loader import Data_Loader
data_loader = Data_Loader(data, block, test_ratio = 0.2, val_ratio = 0.2)


In [63]:
X_train = data_loader.train[:,:-1]
X_test = data_loader.test[:,:-1]
X_val = data_loader.val[:,:-1]

y_train = data_loader.train[:,-1]
y_test = data_loader.test[:,-1]
y_val = data_loader.val[:,-1]

In [64]:
from sklearn.ensemble import RandomForestClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = RandomForestClassifier(class_weight='balanced',n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/RFBP.csv')

100%|███████████████████████████████████████████| 66/66 [23:54<00:00, 21.74s/it]


In [65]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
32,320.0,11.0,0.649046,0.945845,0.595059,0.937782,0.515829,0.703046
14,140.0,11.0,0.646628,0.94655,0.604502,0.938462,0.523191,0.715736
50,500.0,11.0,0.644543,0.946376,0.596567,0.937597,0.516729,0.705584
38,380.0,11.0,0.641537,0.94599,0.593383,0.937345,0.511971,0.705584
20,200.0,11.0,0.640408,0.945425,0.603854,0.93712,0.522222,0.715736


In [66]:
from sklearn.linear_model import LogisticRegression
C =  [0.0001,0.001,0.1,1,10,100]
penalty = ['l1', 'l2', 'elasticnet', 'none']
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(C, penalty))):
    clf = LogisticRegression(class_weight='balanced',C = hp1, penalty = hp2, solver='saga', max_iter=1000, l1_ratio=0.5)
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LRBP.csv')



100%|███████████████████████████████████████████| 24/24 [05:46<00:00, 14.46s/it]


In [67]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
12,1.0,l1,0.561497,0.938746,0.5375,0.934131,0.388262,0.873096
10,0.1,elasticnet,0.561497,0.939356,0.539484,0.934505,0.389831,0.875635
14,1.0,elasticnet,0.561197,0.938878,0.538763,0.934058,0.389581,0.873096
18,10.0,elasticnet,0.561159,0.938319,0.535938,0.933115,0.387133,0.870558
13,1.0,l2,0.560897,0.938913,0.538341,0.933983,0.38914,0.873096


In [68]:
from xgboost import XGBClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = XGBClassifier(n_estimators = hp1, max_depth = hp2 )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/XGBP.csv')

100%|███████████████████████████████████████████| 66/66 [06:47<00:00,  6.18s/it]


In [69]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
37,380.0,7.0,0.617188,0.948749,0.617143,0.93808,0.705882,0.548223
41,380.0,61.0,0.61629,0.950904,0.62029,0.94441,0.722973,0.543147
40,380.0,31.0,0.61629,0.950904,0.62029,0.94441,0.722973,0.543147
49,500.0,7.0,0.614786,0.948335,0.606581,0.938363,0.695082,0.538071
61,620.0,7.0,0.613437,0.947937,0.605452,0.938191,0.69637,0.535533


In [70]:
from lightgbm import LGBMClassifier
n_estimators =  np.linspace(20,620,11,dtype=int)
max_depths = [3,7,11,21,31,61]
results = pd.DataFrame(columns = ['hp1','hp2', 'val_f1', 'val_auc', 'test_f1','test_auc', 'test_prec', 'test_recall'])
i = 0
for hp1, hp2 in tqdm(list(itertools.product(n_estimators, max_depth))):
    clf = LGBMClassifier(n_estimators = hp1, max_depth = hp2, class_weight= 'balanced' )
    clf.fit(X_train, y_train)
    y_hat_val = clf.predict_proba(X_val)[:,1]
    y_hat_test = clf.predict_proba(X_test)[:,1]
    results.loc[i] = [hp1, hp2,
                       precision_recall_fscore_support(y_val, y_hat_val > 0.5,average='binary')[2],
                       roc_auc_score(y_val, y_hat_val),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[2],
                       roc_auc_score(y_test, y_hat_test),
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[0],
                       precision_recall_fscore_support(y_test, y_hat_test > 0.5,average='binary')[1]
                      ]
    i+=1
results.to_csv('/home/ylo7832/Blood_Panel-master/baseline/LGBP.csv')

100%|███████████████████████████████████████████| 66/66 [01:17<00:00,  1.17s/it]


In [71]:
results.sort_values('val_f1', ascending=False).head()

Unnamed: 0,hp1,hp2,val_f1,val_auc,test_f1,test_auc,test_prec,test_recall
38,380.0,11.0,0.675266,0.952017,0.626932,0.940576,0.554688,0.720812
31,320.0,7.0,0.672071,0.947787,0.627282,0.943151,0.543762,0.741117
61,620.0,7.0,0.67101,0.947281,0.636472,0.942239,0.6,0.677665
56,560.0,11.0,0.669348,0.952441,0.631829,0.942014,0.59375,0.675127
32,320.0,11.0,0.669118,0.951881,0.625806,0.941327,0.54291,0.738579
