In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

In [2]:
csv_path = os.path.join("..", "..", "data","LINKED_DATA", "TSR_EHR", "TSR_12_CLEANED.csv")
tsr_12 = pd.read_csv(csv_path)
tsr_12.head()

Unnamed: 0,height_nm,weight_nm,edu_id,pro_id,opc_id,ih_fl,ivtpamg_nm,hospitalised_time,nivtpa_id,nivtpa1_fl,...,nihs_9_out,nihs_10_out,nihs_11_out,total_out,SexName,Age,mrs_tx_1,mrs_tx_3,mrs_tx_6,mrs_tx_12
0,153.0,62.0,3,1,3,0,0.0,8.0,0,999,...,0,1,0,4,0,67.0,1,1,1,1
1,152.0,62.0,3,1,2,0,0.0,4.0,0,999,...,0,0,0,1,0,69.0,1,0,0,0
2,148.0,56.0,2,1,2,0,0.0,5.0,0,999,...,0,0,0,2,0,71.0,0,0,0,0
3,152.0,56.0,4,1,2,0,0.0,3.0,1,0,...,0,0,0,0,0,71.0,0,0,0,0
4,170.0,87.8,5,8,3,0,0.0,8.491,0,999,...,0,0,2,2,1,59.0,5,5,5,4


In [3]:
tsr_12_input = tsr_12.drop(["mrs_tx_12"], axis=1)
tsr_12_input[tsr_12_input == "N"] = 0
tsr_12_input[tsr_12_input == "Y"] = 1
tsr_12_input = tsr_12_input.astype("float64")
tsr_12_input = np.array(tsr_12_input.values)

tsr_12_input_nomrs = tsr_12.drop(["mrs_tx_12", "mrs_tx_6", "mrs_tx_3", "mrs_tx_1"], axis=1)
tsr_12_input_nomrs[tsr_12_input_nomrs == "N"] = 0
tsr_12_input_nomrs[tsr_12_input_nomrs == "Y"] = 1
tsr_12_input_nomrs = tsr_12_input_nomrs.astype("float64")
tsr_12_input_nomrs = np.array(tsr_12_input_nomrs.values)

# 6 classes

In [4]:
tsr_12_output = tsr_12.mrs_tx_12
tsr_12_output = tsr_12_output.astype("float64")
tsr_12_output = np.array(tsr_12_output.values)

## SVM

In [5]:
svc = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores = cross_val_score(svc,tsr_12_input,tsr_12_output,cv = 10,scoring='accuracy')
print(svc_scores)
print(svc_scores.mean(), svc_scores.std())



[0.53571429 0.46785714 0.53571429 0.46785714 0.575      0.51785714
 0.51785714 0.609319   0.59856631 0.56272401]
0.5388466461853558 0.0462490485469709


In [6]:
svc.fit(tsr_12_input,tsr_12_output)
svc_predict =svc.predict_proba(tsr_12_input)
print(svc_predict)

[[0.26862567 0.42985033 0.12548006 0.05352898 0.09252688 0.02998809]
 [0.52726051 0.29378585 0.0744953  0.0409451  0.05668026 0.00683299]
 [0.52288646 0.27226025 0.0675113  0.03200687 0.09966727 0.00566785]
 ...
 [0.53660072 0.26364246 0.11760841 0.05957725 0.01800509 0.00456607]
 [0.00077262 0.12611465 0.14062475 0.07678097 0.12375893 0.53194808]
 [0.32776374 0.43132432 0.09309819 0.09738773 0.04097868 0.00944734]]


In [7]:
svc_pred = cross_val_predict(svc,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, svc_pred)



array([[453, 158,   2,   0,   3,   0],
       [179, 717,  11,   3,   6,   3],
       [ 23, 278,  11,  15,  22,  12],
       [  6, 175,  17,  13,  50,  15],
       [  8,  93,  15,  10, 108,  96],
       [  6,  24,   3,   5,  52, 205]], dtype=int64)

In [8]:
svc1 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores1 = cross_val_score(svc1,tsr_12_input_nomrs,tsr_12_output,cv = 10,scoring='accuracy')
print(svc_scores1)
print(svc_scores1.mean(), svc_scores1.std())



[0.42142857 0.35357143 0.42857143 0.38571429 0.43928571 0.44642857
 0.41071429 0.50179211 0.46594982 0.44802867]
0.43014848950332824 0.039214069394878315


In [9]:
svc1.fit(tsr_12_input_nomrs,tsr_12_output)
svc_predict1 =svc1.predict_proba(tsr_12_input_nomrs)
print(svc_predict1)

[[0.283366   0.40323795 0.11620381 0.05835906 0.09216713 0.04666606]
 [0.36038494 0.35354252 0.09388962 0.06166255 0.10100427 0.0295161 ]
 [0.31098667 0.32804006 0.09045273 0.05595231 0.18360576 0.03096248]
 ...
 [0.34107005 0.32517816 0.16877138 0.09649115 0.03765395 0.03083532]
 [0.01556813 0.13911473 0.15329409 0.08473056 0.11879156 0.48850092]
 [0.38093119 0.39544099 0.07620147 0.08998908 0.03755514 0.01988214]]


In [10]:
svc_pred1 = cross_val_predict(svc,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, svc_pred1)



array([[453, 158,   2,   0,   3,   0],
       [179, 717,  11,   3,   6,   3],
       [ 23, 278,  11,  15,  22,  12],
       [  6, 175,  17,  13,  50,  15],
       [  8,  93,  15,  10, 108,  96],
       [  6,  24,   3,   5,  52, 205]], dtype=int64)

## RF

In [11]:
rf = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores = cross_val_score(rf,tsr_12_input,tsr_12_output,cv = 10,scoring='accuracy')
print(rf_scores)
print(rf_scores.mean(), rf_scores.std())

[0.77142857 0.68214286 0.69285714 0.60357143 0.675      0.69285714
 0.8        0.81362007 0.85663082 0.88530466]
0.7473412698412699 0.08649567788891958


In [12]:
rf.fit(tsr_12_input,tsr_12_output)
rf_predict =rf.predict_proba(tsr_12_input)
print(rf_predict)

[[0.05662813 0.82855727 0.04319412 0.02727794 0.02654978 0.01779277]
 [0.83645516 0.05585939 0.04010879 0.02573722 0.02499647 0.01684298]
 [0.83645516 0.05585939 0.04010879 0.02573722 0.02499647 0.01684298]
 ...
 [0.63184022 0.23672176 0.0565786  0.03016374 0.0268531  0.01784257]
 [0.04234577 0.06645309 0.0428842  0.02917601 0.29997862 0.51916232]
 [0.11910171 0.73456312 0.05469441 0.04433156 0.02834939 0.01895981]]


In [13]:
rf_pred = cross_val_predict(rf,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, rf_pred)

array([[458, 145,   8,   5,   0,   0],
       [ 51, 744,  81,  36,   6,   1],
       [ 14,  64, 188,  77,  13,   5],
       [  5,  19,  20, 199,  29,   4],
       [  6,  12,  12,  26, 263,  11],
       [  5,   5,   4,   9,  34, 238]], dtype=int64)

In [14]:
rf1 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores1 = cross_val_score(rf1,tsr_12_input_nomrs,tsr_12_output,cv = 10,scoring='accuracy')
print(rf_scores1)
print(rf_scores1.mean(), rf_scores1.std())

[0.53571429 0.40357143 0.525      0.425      0.46071429 0.45714286
 0.45714286 0.49820789 0.56272401 0.52688172]
0.4852099334357399 0.04951110600630904


In [15]:
rf1.fit(tsr_12_input_nomrs,tsr_12_output)
rf_predict1 =rf1.predict_proba(tsr_12_input_nomrs)
print(rf_predict1)

[[0.12812284 0.68101822 0.07097383 0.0467219  0.04732446 0.02583876]
 [0.60046639 0.21772178 0.06661134 0.0437208  0.04448425 0.02699543]
 [0.67632141 0.1425348  0.06856071 0.04250215 0.04653346 0.02354747]
 ...
 [0.52237154 0.21471618 0.13236512 0.0551207  0.04654724 0.02887922]
 [0.05874565 0.10309731 0.07079875 0.04339208 0.09787484 0.62609137]
 [0.17933556 0.58330904 0.1052152  0.05673492 0.04879848 0.02660679]]


In [16]:
rf_pred1 = cross_val_predict(rf1,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, rf_pred1)

array([[458, 145,   8,   5,   0,   0],
       [ 51, 744,  81,  36,   6,   1],
       [ 14,  64, 188,  77,  13,   5],
       [  5,  19,  20, 199,  29,   4],
       [  6,  12,  12,  26, 263,  11],
       [  5,   5,   4,   9,  34, 238]], dtype=int64)

## XGBoost

In [17]:
xgb = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores = cross_val_score(xgb,tsr_12_input,tsr_12_output,cv = 10)
print(xgb_scores)
print(xgb_scores.mean(), xgb_scores.std())

[0.76071429 0.675      0.69642857 0.625      0.68214286 0.68928571
 0.79642857 0.80645161 0.85663082 0.89247312]
0.7480555555555556 0.08336595260962269


In [18]:
xgb.fit(tsr_12_input,tsr_12_output)
xgb_predict =xgb.predict_proba(tsr_12_input)
print(xgb_predict)

[[0.06396884 0.78702614 0.05939582 0.03831351 0.0299266  0.02136908]
 [0.79570418 0.06793865 0.05429286 0.03501803 0.02747711 0.01956916]
 [0.79556722 0.06800748 0.05430523 0.03502392 0.02752726 0.01956888]
 ...
 [0.79351711 0.06952384 0.05477236 0.03507073 0.02749117 0.01962478]
 [0.0504763  0.06353923 0.05050435 0.03277261 0.20158263 0.60112488]
 [0.06843061 0.78029102 0.06030222 0.039416   0.03001825 0.0215419 ]]


In [19]:
xgb_pred = cross_val_predict(xgb,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, xgb_pred)

array([[465, 138,   9,   4,   0,   0],
       [ 47, 755,  79,  31,   6,   1],
       [ 14,  68, 194,  62,  18,   5],
       [  7,  25,  34, 178,  28,   4],
       [  6,  14,   9,  26, 262,  13],
       [  5,   5,   4,   9,  34, 238]], dtype=int64)

In [20]:
xgb1 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores1 = cross_val_score(xgb1,tsr_12_input_nomrs,tsr_12_output,cv = 10)
print(xgb_scores1)
print(xgb_scores1.mean(), xgb_scores1.std())

[0.52142857 0.375      0.47142857 0.42857143 0.46428571 0.41428571
 0.46785714 0.52688172 0.5734767  0.52688172]
0.4770097286226319 0.05763394579881807


In [21]:
xgb1.fit(tsr_12_input_nomrs,tsr_12_output)
xgb_predict1 =xgb1.predict_proba(tsr_12_input_nomrs)
print(xgb_predict1)

[[0.19023124 0.52540152 0.10374019 0.07016727 0.07161628 0.03884349]
 [0.60921063 0.14203524 0.09100051 0.06130604 0.0625158  0.03393178]
 [0.58146314 0.16303523 0.09304935 0.06305768 0.06455678 0.03483783]
 ...
 [0.5247633  0.15058007 0.15080192 0.06778914 0.06936884 0.03669672]
 [0.08386744 0.11937508 0.0820815  0.05583905 0.05872342 0.60011351]
 [0.21455837 0.49574631 0.10656391 0.07139273 0.07234626 0.03939242]]


In [22]:
xgb_pred1 = cross_val_predict(xgb1,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, xgb_pred1)

array([[465, 138,   9,   4,   0,   0],
       [ 47, 755,  79,  31,   6,   1],
       [ 14,  68, 194,  62,  18,   5],
       [  7,  25,  34, 178,  28,   4],
       [  6,  14,   9,  26, 262,  13],
       [  5,   5,   4,   9,  34, 238]], dtype=int64)

# 2 classes

In [23]:
tsr_12_output[(tsr_12_output == 0)|(tsr_12_output == 1)|(tsr_12_output == 2)] = 0
tsr_12_output[(tsr_12_output == 3)|(tsr_12_output == 4)|(tsr_12_output == 5)] = 1

## SVM

In [24]:
svc2 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores2 = cross_val_score(svc2,tsr_12_input,tsr_12_output,cv = 10,scoring='accuracy')
print(svc_scores2)
print(svc_scores2.mean(), svc_scores2.std())

[0.84285714 0.87857143 0.90714286 0.91071429 0.86428571 0.89642857
 0.91428571 0.94982079 0.92831541 0.91397849]
0.9006400409626216 0.029762104068616728


In [25]:
svc2.fit(tsr_12_input,tsr_12_output)
svc_predict2 =svc2.predict_proba(tsr_12_input)
print(svc_predict2)

[[0.94202835 0.05797165]
 [0.96532179 0.03467821]
 [0.96307711 0.03692289]
 ...
 [0.98329803 0.01670197]
 [0.18638126 0.81361874]
 [0.88578938 0.11421062]]


In [26]:
svc_pred2 = cross_val_predict(svc2,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, svc_pred2)

array([[1788,  108],
       [ 170,  731]], dtype=int64)

In [27]:
svc3 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores3 = cross_val_score(svc3,tsr_12_input_nomrs,tsr_12_output,cv = 10,scoring='accuracy')
print(svc_scores3)
print(svc_scores3.mean(), svc_scores3.std())

[0.775      0.81071429 0.85714286 0.85714286 0.84285714 0.82142857
 0.85       0.87096774 0.8781362  0.8172043 ]
0.8380593958013313 0.030031358071791324


In [28]:
svc3.fit(tsr_12_input_nomrs,tsr_12_output)
svc_predict3 =svc3.predict_proba(tsr_12_input_nomrs)
print(svc_predict3)

[[0.88859438 0.11140562]
 [0.87022052 0.12977948]
 [0.80416561 0.19583439]
 ...
 [0.90347411 0.09652589]
 [0.19564977 0.80435023]
 [0.88294622 0.11705378]]


In [29]:
svc_pred3 = cross_val_predict(svc3,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, svc_pred3)

array([[1788,  108],
       [ 170,  731]], dtype=int64)

## RF

In [30]:
rf2 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores2 = cross_val_score(rf2,tsr_12_input,tsr_12_output,cv = 10,scoring='accuracy')
print(rf_scores2)
print(rf_scores2.mean(), rf_scores2.std())

[0.91785714 0.88214286 0.89285714 0.92142857 0.85       0.9
 0.92142857 0.9390681  0.94265233 0.94982079]
0.911725550435228 0.02925731736761189


In [31]:
rf2.fit(tsr_12_input,tsr_12_output)
rf_predict2 =rf2.predict_proba(tsr_12_input)
print(rf_predict2)

[[0.94881985 0.05118015]
 [0.95692315 0.04307685]
 [0.96608224 0.03391776]
 ...
 [0.96608224 0.03391776]
 [0.0519897  0.9480103 ]
 [0.94762963 0.05237037]]


In [32]:
rf_pred2 = cross_val_predict(rf2,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, rf_pred2)

array([[1758,  138],
       [ 109,  792]], dtype=int64)

In [33]:
rf3 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8))  
rf_scores3 = cross_val_score(rf3,tsr_12_input_nomrs,tsr_12_output,cv = 10,scoring='accuracy')
print(rf_scores3)
print(rf_scores3.mean(), rf_scores3.std())

[0.85714286 0.83571429 0.86428571 0.86428571 0.81071429 0.83928571
 0.86071429 0.84229391 0.84946237 0.84587814]
0.8469777265745009 0.015561316806108338


In [34]:
rf3.fit(tsr_12_input_nomrs,tsr_12_output)
rf_predict3 =rf3.predict_proba(tsr_12_input_nomrs)
print(rf_predict3)

[[0.93237391 0.06762609]
 [0.94462744 0.05537256]
 [0.93298557 0.06701443]
 ...
 [0.93169674 0.06830326]
 [0.1044069  0.8955931 ]
 [0.91104731 0.08895269]]


In [35]:
rf_pred3 = cross_val_predict(rf3,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, rf_pred3)

array([[1758,  138],
       [ 109,  792]], dtype=int64)

## XGBoost

In [36]:
xgb2 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores2 = cross_val_score(xgb2,tsr_12_input,tsr_12_output,cv = 10)
print(xgb_scores2)
print(xgb_scores2.mean(), xgb_scores2.std())

[0.91071429 0.88571429 0.89642857 0.91428571 0.85       0.89285714
 0.92857143 0.95340502 0.94265233 0.95340502]
0.9128033794162826 0.03128451548459597


In [37]:
xgb2.fit(tsr_12_input,tsr_12_output)
xgb_predict2 =xgb2.predict_proba(tsr_12_input)
print(xgb_predict2)

[[0.94687044 0.05312956]
 [0.94653153 0.05346847]
 [0.94693236 0.05306764]
 ...
 [0.94704338 0.05295662]
 [0.09892663 0.90107337]
 [0.94697237 0.05302763]]


In [38]:
xgb_pred2 = cross_val_predict(xgb2,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, xgb_pred2)

array([[1772,  124],
       [ 120,  781]], dtype=int64)

In [39]:
xgb3 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores3 = cross_val_score(xgb3,tsr_12_input_nomrs,tsr_12_output,cv = 10)
print(xgb_scores3)
print(xgb_scores3.mean(), xgb_scores3.std())

[0.80357143 0.83571429 0.87857143 0.87142857 0.82142857 0.83214286
 0.85       0.84946237 0.86738351 0.86021505]
0.8469918074756784 0.022553799731251673


In [40]:
xgb3.fit(tsr_12_input_nomrs,tsr_12_output)
xgb_predict3 =xgb3.predict_proba(tsr_12_input_nomrs)
print(xgb_predict3)

[[0.89878808 0.10121192]
 [0.8983423  0.1016577 ]
 [0.89525222 0.10474778]
 ...
 [0.89909796 0.10090204]
 [0.14860073 0.85139927]
 [0.8965817  0.1034183 ]]


In [41]:
xgb_pred3 = cross_val_predict(xgb3,tsr_12_input,tsr_12_output,cv = 10)
confusion_matrix(tsr_12_output, xgb_pred3)

array([[1772,  124],
       [ 120,  781]], dtype=int64)

# Summary

## Mean & Std

In [42]:
svc_mean = np.array([svc_scores.mean(), svc_scores.std(), svc_scores[0], svc_scores[1], svc_scores[2], svc_scores[3],
                     svc_scores[4], svc_scores[5], svc_scores[6], svc_scores[7], svc_scores[8], svc_scores[9]])
rf_mean = np.array([rf_scores.mean(), rf_scores.std(), rf_scores[0], rf_scores[1], rf_scores[2], rf_scores[3],rf_scores[4], 
                    rf_scores[5], rf_scores[6], rf_scores[7], rf_scores[8], rf_scores[9]])
xgb_mean = np.array([xgb_scores.mean(), xgb_scores.std(), xgb_scores[0], xgb_scores[1], xgb_scores[2], xgb_scores[3],
                     xgb_scores[4], xgb_scores[5], xgb_scores[6], xgb_scores[7], xgb_scores[8], xgb_scores[9]])

svc_mean1 = np.array([svc_scores1.mean(), svc_scores1.std(), svc_scores1[0], svc_scores1[1], svc_scores1[2], svc_scores1[3],
                     svc_scores1[4], svc_scores1[5], svc_scores1[6], svc_scores1[7], svc_scores1[8], svc_scores1[9]])
rf_mean1 = np.array([rf_scores1.mean(), rf_scores1.std(), rf_scores1[0], rf_scores1[1], rf_scores1[2], rf_scores1[3],rf_scores1[4], 
                    rf_scores1[5], rf_scores1[6], rf_scores1[7], rf_scores1[8], rf_scores1[9]])
xgb_mean1 = np.array([xgb_scores1.mean(), xgb_scores1.std(), xgb_scores1[0], xgb_scores1[1], xgb_scores1[2], xgb_scores1[3],
                     xgb_scores1[4], xgb_scores1[5], xgb_scores1[6], xgb_scores1[7], xgb_scores1[8], xgb_scores1[9]])

svc_mean2 = np.array([svc_scores2.mean(), svc_scores2.std(), svc_scores2[0], svc_scores2[1], svc_scores2[2], svc_scores2[3],
                     svc_scores2[4], svc_scores2[5], svc_scores2[6], svc_scores2[7], svc_scores2[8], svc_scores2[9]])
rf_mean2 = np.array([rf_scores2.mean(), rf_scores2.std(), rf_scores2[0], rf_scores2[1], rf_scores2[2], rf_scores2[3],rf_scores2[4], 
                    rf_scores2[5], rf_scores2[6], rf_scores2[7], rf_scores2[8], rf_scores2[9]])
xgb_mean2 = np.array([xgb_scores2.mean(), xgb_scores2.std(), xgb_scores2[0], xgb_scores2[1], xgb_scores2[2], xgb_scores2[3],
                     xgb_scores2[4], xgb_scores2[5], xgb_scores2[6], xgb_scores2[7], xgb_scores2[8], xgb_scores2[9]])

svc_mean3 = np.array([svc_scores3.mean(), svc_scores3.std(), svc_scores3[0], svc_scores3[1], svc_scores3[2], svc_scores3[3],
                     svc_scores3[4], svc_scores3[5], svc_scores3[6], svc_scores3[7], svc_scores3[8], svc_scores3[9]])
rf_mean3 = np.array([rf_scores3.mean(), rf_scores3.std(), rf_scores3[0], rf_scores3[1], rf_scores3[2], rf_scores3[3],rf_scores3[4], 
                    rf_scores3[5], rf_scores3[6], rf_scores3[7], rf_scores3[8], rf_scores3[9]])
xgb_mean3 = np.array([xgb_scores3.mean(), xgb_scores3.std(), xgb_scores3[0], xgb_scores3[1], xgb_scores3[2], xgb_scores3[3],
                     xgb_scores3[4], xgb_scores3[5], xgb_scores3[6], xgb_scores3[7], xgb_scores3[8], xgb_scores3[9]])

In [43]:
tsr_12_mean = pd.DataFrame([svc_mean, rf_mean, xgb_mean,svc_mean1, rf_mean1, xgb_mean1, svc_mean2, rf_mean2, xgb_mean2,
                          svc_mean3, rf_mean3, xgb_mean3]).T
tsr_12_mean.index = ["Mean", "Std", "mean_1", "mean_2", "mean_3", "mean_4", "mean_5", "mean_6", "mean_7", "mean_8", "mean_9", "mean_10"]
tsr_12_mean.columns = ["svc", 'rf', 'xgb', "svc1", 'rf1', 'xgb1', 'svc2', 'rf2', 'xgb2', "svc3", 'rf3', 'xgb3']

In [44]:
csv_save = os.path.join(".", "tsr_12_mean.csv")
tsr_12_mean.to_csv(csv_save, index = True)

## Predicted Probability

In [45]:
svc_predict_0 = svc_predict[:, 0]
svc_predict_1 = svc_predict[:, 1]
svc_predict_2 = svc_predict[:, 2]
svc_predict_3 = svc_predict[:, 3]
svc_predict_4 = svc_predict[:, 4]
svc_predict_5 = svc_predict[:, 5]

rf_predict_0 = rf_predict[:, 0]
rf_predict_1 = rf_predict[:, 1]
rf_predict_2 = rf_predict[:, 2]
rf_predict_3 = rf_predict[:, 3]
rf_predict_4 = rf_predict[:, 4]
rf_predict_5 = rf_predict[:, 5]

xgb_predict_0 = xgb_predict[:, 0]
xgb_predict_1 = xgb_predict[:, 1]
xgb_predict_2 = xgb_predict[:, 2]
xgb_predict_3 = xgb_predict[:, 3]
xgb_predict_4 = xgb_predict[:, 4]
xgb_predict_5 = xgb_predict[:, 5]

svc_predict1_0 = svc_predict1[:, 0]
svc_predict1_1 = svc_predict1[:, 1]
svc_predict1_2 = svc_predict1[:, 2]
svc_predict1_3 = svc_predict1[:, 3]
svc_predict1_4 = svc_predict1[:, 4]
svc_predict1_5 = svc_predict1[:, 5]

rf_predict1_0 = rf_predict1[:, 0]
rf_predict1_1 = rf_predict1[:, 1]
rf_predict1_2 = rf_predict1[:, 2]
rf_predict1_3 = rf_predict1[:, 3]
rf_predict1_4 = rf_predict1[:, 4]
rf_predict1_5 = rf_predict1[:, 5]

xgb_predict1_0 = xgb_predict1[:, 0]
xgb_predict1_1 = xgb_predict1[:, 1]
xgb_predict1_2 = xgb_predict1[:, 2]
xgb_predict1_3 = xgb_predict1[:, 3]
xgb_predict1_4 = xgb_predict1[:, 4]
xgb_predict1_5 = xgb_predict1[:, 5]

svc_predict2_0 = svc_predict2[:, 0]
svc_predict2_1 = svc_predict2[:, 1]

rf_predict2_0 = rf_predict2[:, 0]
rf_predict2_1 = rf_predict2[:, 1]

xgb_predict2_0 = xgb_predict2[:, 0]
xgb_predict2_1 = xgb_predict2[:, 1]

svc_predict3_0 = svc_predict3[:, 0]
svc_predict3_1 = svc_predict3[:, 1]

rf_predict3_0 = rf_predict3[:, 0]
rf_predict3_1 = rf_predict3[:, 1]

xgb_predict3_0 = xgb_predict3[:, 0]
xgb_predict3_1 = xgb_predict3[:, 1]

In [46]:
tsr_12_pred_prob = pd.DataFrame([svc_predict_0, svc_predict_1, svc_predict_2, svc_predict_3, svc_predict_4, svc_predict_5, 
                               rf_predict_0, rf_predict_1, rf_predict_2, rf_predict_3, rf_predict_4, rf_predict_5,
                               xgb_predict_0, xgb_predict_1, xgb_predict_2, xgb_predict_3, xgb_predict_4, xgb_predict_5,
                                svc_predict1_0, svc_predict1_1, svc_predict1_2, svc_predict1_3, svc_predict1_4, svc_predict1_5, 
                               rf_predict1_0, rf_predict1_1, rf_predict1_2, rf_predict1_3, rf_predict1_4, rf_predict1_5,
                               xgb_predict1_0, xgb_predict1_1, xgb_predict1_2, xgb_predict1_3, xgb_predict1_4, xgb_predict1_5,
                               svc_predict2_0, svc_predict2_1, rf_predict2_0, rf_predict2_1, xgb_predict2_0, xgb_predict2_1,
                               svc_predict3_0, svc_predict3_1, rf_predict3_0, rf_predict3_1, xgb_predict3_0, xgb_predict3_1]).T
tsr_12_pred_prob.columns = ["svc_predict_0", "svc_predict_1", "svc_predict_2", "svc_predict_3", "svc_predict_4", "svc_predict_5", 
                           "rf_predict_0", "rf_predict_1", "rf_predict_2", "rf_predict_3", "rf_predict_4", "rf_predict_5",
                           "xgb_predict_0", "xgb_predict_1", "xgb_predict_2", 'xgb_predict_3', "xgb_predict_4", "xgb_predict_5",
                           "svc_predict1_0", "svc_predict1_1", "svc_predict1_2", "svc_predict1_3", "svc_predict1_4", "svc_predict1_5", 
                           "rf_predict1_0", "rf_predict1_1", "rf_predict1_2", "rf_predict1_3", "rf_predict1_4", "rf_predict1_5",
                           "xgb_predict1_0", "xgb_predict1_1", "xgb_predict1_2", 'xgb_predict1_3', "xgb_predict1_4", "xgb_predict1_5",
                           "svc_predict2_0", "svc_predict2_1", "rf_predict2_0", "rf_predict2_1", "xgb_predict2_0", "xgb_predict2_1",
                           "svc_predict3_0", "svc_predict3_1", "rf_predict3_0", "rf_predict3_1", "xgb_predict3_0", "xgb_predict3_1"]

In [47]:
csv_save2 = os.path.join(".", "tsr_12_pred_prob.csv")
tsr_12_pred_prob.to_csv(csv_save2, index = False)