In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

In [2]:
csv_path = os.path.join("..", "..", "data","LINKED_DATA", "TSR_EHR", "TSR_3_CLEANED.csv")
tsr_3 = pd.read_csv(csv_path)
tsr_3.head()

Unnamed: 0,height_nm,weight_nm,edu_id,pro_id,opc_id,ih_fl,ivtpamg_nm,hospitalised_time,nivtpa_id,nivtpa1_fl,...,nihs_7_out,nihs_8_out,nihs_9_out,nihs_10_out,nihs_11_out,total_out,SexName,Age,mrs_tx_1,mrs_tx_3
0,153.0,62.0,3,1,3,0,0.0,8.0,0,999,...,1,1,0,1,0,4,0,67.0,1,1
1,152.0,62.0,3,1,2,0,0.0,4.0,0,999,...,0,1,0,0,0,1,0,69.0,1,0
2,148.0,56.0,2,999,2,0,0.0,5.0,0,999,...,0,1,0,0,0,2,0,71.0,0,0
3,152.0,56.0,4,1,2,0,0.0,3.0,1,0,...,0,0,0,0,0,0,0,71.0,0,0
4,160.0,60.0,2,1,3,0,0.0,4.0,0,999,...,0,0,0,0,0,4,0,62.0,3,3


In [3]:
tsr_3_input = tsr_3.drop(["mrs_tx_3"], axis=1)
tsr_3_input[tsr_3_input == "N"] = 0
tsr_3_input[tsr_3_input == "Y"] = 1
tsr_3_input = tsr_3_input.astype("float64")
tsr_3_input = np.array(tsr_3_input.values)

tsr_3_input_nomrs = tsr_3.drop(["mrs_tx_3", "mrs_tx_1"], axis=1)
tsr_3_input_nomrs[tsr_3_input_nomrs == "N"] = 0
tsr_3_input_nomrs[tsr_3_input_nomrs == "Y"] = 1
tsr_3_input_nomrs = tsr_3_input_nomrs.astype("float64")
tsr_3_input_nomrs = np.array(tsr_3_input_nomrs.values)

# 6 classes

In [4]:
tsr_3_output = tsr_3.mrs_tx_3
tsr_3_output = tsr_3_output.astype("float64")
tsr_3_output = np.array(tsr_3_output.values)

## SVM

In [5]:
svc = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores = cross_val_score(svc,tsr_3_input,tsr_3_output,cv = 10,scoring='accuracy')
print(svc_scores)
print(svc_scores.mean(), svc_scores.std())

[0.53151261 0.54947368 0.56       0.52842105 0.57684211 0.56631579
 0.58315789 0.57052632 0.59157895 0.46105263]
0.5518881026094649 0.03608321614436032


In [6]:
svc.fit(tsr_3_input,tsr_3_output)
svc_predict =svc.predict_proba(tsr_3_input)
print(svc_predict)

[[1.76307332e-01 4.30350821e-01 2.49061730e-01 6.82342610e-02
  7.48439376e-02 1.20191865e-03]
 [2.17854260e-01 4.50075064e-01 1.74372120e-01 6.69150808e-02
  9.01903025e-02 5.93172028e-04]
 [3.91336502e-01 4.63598413e-01 6.14407853e-02 3.43684433e-02
  4.91386481e-02 1.17208131e-04]
 ...
 [1.91806082e-02 4.14167610e-03 1.31714780e-02 4.59814858e-02
  4.71209469e-02 8.70403805e-01]
 [3.03693915e-03 1.86436448e-02 1.75840865e-02 1.27932878e-01
  3.28786214e-01 5.04016238e-01]
 [1.28946271e-03 6.50358419e-03 1.13843921e-02 2.67352562e-01
  3.63539560e-01 3.49930439e-01]]


In [7]:
svc_pred = cross_val_predict(svc,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, svc_pred)

array([[390, 227,   7,   5,   4,   2],
       [157, 927,  96,  24,  22,   5],
       [ 14, 400, 130,  95,  55,   5],
       [  4,  78, 145, 215, 212,  27],
       [  4,  27,  47, 115, 425, 217],
       [  1,   7,   7,  16, 104, 535]], dtype=int64)

In [8]:
svc1 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores1 = cross_val_score(svc1,tsr_3_input_nomrs,tsr_3_output,cv = 10,scoring='accuracy')
print(svc_scores1)
print(svc_scores1.mean(), svc_scores1.std())

[0.45378151 0.49684211 0.52421053 0.46947368 0.51789474 0.50105263
 0.53473684 0.50105263 0.53473684 0.39789474]
0.4931676249447148 0.040445681801667445


In [9]:
svc1.fit(tsr_3_input_nomrs,tsr_3_output)
svc_predict1 =svc1.predict_proba(tsr_3_input_nomrs)
print(svc_predict1)

[[0.16855196 0.40317621 0.26057512 0.07412501 0.08419971 0.00937199]
 [0.24281638 0.42950373 0.16551773 0.06601409 0.09203533 0.00411273]
 [0.25682072 0.51277684 0.0896385  0.05519765 0.07971618 0.00585011]
 ...
 [0.02073372 0.00402605 0.0142895  0.0534655  0.04065332 0.86683191]
 [0.01917575 0.02673935 0.01629599 0.14309402 0.35888324 0.43581164]
 [0.11024199 0.02008327 0.00916914 0.31057019 0.32300409 0.22693132]]


In [10]:
svc_pred1 = cross_val_predict(svc,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, svc_pred1)

array([[390, 227,   7,   5,   4,   2],
       [157, 927,  96,  24,  22,   5],
       [ 14, 400, 130,  95,  55,   5],
       [  4,  78, 145, 215, 212,  27],
       [  4,  27,  47, 115, 425, 217],
       [  1,   7,   7,  16, 104, 535]], dtype=int64)

## RF

In [11]:
rf = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores = cross_val_score(rf,tsr_3_input,tsr_3_output,cv = 10,scoring='accuracy')
print(rf_scores)
print(rf_scores.mean(), rf_scores.std())

[0.76470588 0.68       0.69052632 0.70315789 0.76631579 0.73684211
 0.74526316 0.81894737 0.83157895 0.71578947]
0.7453126934984521 0.04865064616462876


In [12]:
rf.fit(tsr_3_input,tsr_3_output)
rf_predict =rf.predict_proba(tsr_3_input)
print(rf_predict)

[[0.03899871 0.79945784 0.07153687 0.03665986 0.03260206 0.02074465]
 [0.54492772 0.33431638 0.03567008 0.03447077 0.0308678  0.01974727]
 [0.83853846 0.04530087 0.03431816 0.03340202 0.02960393 0.01883656]
 ...
 [0.02579372 0.04100794 0.03333556 0.0325007  0.03581615 0.83154593]
 [0.02647307 0.04206947 0.03752733 0.04102439 0.07045703 0.78244871]
 [0.0265463  0.0419321  0.0343228  0.04114929 0.10380873 0.75224078]]


In [13]:
rf_pred = cross_val_predict(rf,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, rf_pred)

array([[432, 176,  16,   5,   5,   1],
       [ 56, 941, 182,  40,  10,   2],
       [  5,  63, 474, 123,  30,   4],
       [  1,  14,  27, 474, 149,  16],
       [  2,   8,  19,  40, 663, 103],
       [  1,   2,   4,   5, 101, 557]], dtype=int64)

In [14]:
rf1 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores1 = cross_val_score(rf1,tsr_3_input_nomrs,tsr_3_output,cv = 10,scoring='accuracy')
print(rf_scores1)
print(rf_scores1.mean(), rf_scores1.std())

[0.59663866 0.56631579 0.58526316 0.52210526 0.61894737 0.57263158
 0.60421053 0.66526316 0.68421053 0.51578947]
0.5931375497567448 0.05150497453782392


In [15]:
rf1.fit(tsr_3_input_nomrs,tsr_3_output)
rf_predict1 =rf1.predict_proba(tsr_3_input_nomrs)
print(rf_predict1)

[[0.07446657 0.75168179 0.05408466 0.04553275 0.04901812 0.02521611]
 [0.52529396 0.30906069 0.05281334 0.04446261 0.04376494 0.02460445]
 [0.66897697 0.15757792 0.06031427 0.04468925 0.04383532 0.02460627]
 ...
 [0.03651016 0.05903353 0.04727445 0.03975426 0.04651298 0.77091463]
 [0.03781923 0.06114883 0.04903608 0.05316796 0.1491875  0.6496404 ]
 [0.04148468 0.06950572 0.04968638 0.06530419 0.20207518 0.57194385]]


In [16]:
rf_pred1 = cross_val_predict(rf1,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, rf_pred1)

array([[432, 176,  16,   5,   5,   1],
       [ 56, 941, 182,  40,  10,   2],
       [  5,  63, 474, 123,  30,   4],
       [  1,  14,  27, 474, 149,  16],
       [  2,   8,  19,  40, 663, 103],
       [  1,   2,   4,   5, 101, 557]], dtype=int64)

## XGBoost

In [17]:
xgb = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores = cross_val_score(xgb,tsr_3_input,tsr_3_output,cv = 10)
print(xgb_scores)
print(xgb_scores.mean(), xgb_scores.std())

[0.75630252 0.68       0.67578947 0.68631579 0.75789474 0.73894737
 0.72421053 0.81263158 0.82315789 0.69684211]
0.7352091994692614 0.05026436447249889


In [18]:
xgb.fit(tsr_3_input,tsr_3_output)
xgb_predict =xgb.predict_proba(tsr_3_input)
print(xgb_predict)

[[0.04636045 0.78814483 0.05238291 0.04541253 0.04218967 0.02550961]
 [0.63608592 0.21126584 0.04686133 0.04253801 0.03951601 0.0237329 ]
 [0.79603102 0.05404138 0.04630509 0.04166527 0.03869733 0.0232599 ]
 ...
 [0.03654637 0.05168943 0.04516958 0.0413263  0.03841453 0.78685378]
 [0.03703376 0.05254928 0.04588284 0.04206854 0.04593821 0.77652737]
 [0.03933196 0.05604725 0.05028176 0.04617041 0.09088218 0.71728643]]


In [19]:
xgb_pred = cross_val_predict(xgb,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, xgb_pred)

array([[430, 179,  16,   6,   3,   1],
       [ 55, 954, 169,  42,   9,   2],
       [  5,  97, 440, 128,  25,   4],
       [  1,  12,  43, 465, 146,  14],
       [  2,   8,  18,  59, 650,  98],
       [  1,   2,   5,   7, 101, 554]], dtype=int64)

In [20]:
xgb1 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="multi:softprob", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19)) 
xgb_scores1 = cross_val_score(xgb1,tsr_3_input_nomrs,tsr_3_output,cv = 10)
print(xgb_scores1)
print(xgb_scores1.mean(), xgb_scores1.std())

[0.5987395  0.6        0.58526316 0.53052632 0.58947368 0.55789474
 0.58736842 0.64421053 0.66526316 0.50526316]
0.5864002653693057 0.045197302273031384


In [21]:
xgb1.fit(tsr_3_input_nomrs,tsr_3_output)
xgb_predict1 =xgb1.predict_proba(tsr_3_input_nomrs)
print(xgb_predict1)

[[0.06312771 0.7079936  0.07416856 0.06135311 0.05764879 0.03570823]
 [0.55166744 0.23638274 0.06785108 0.05712492 0.05387502 0.0330988 ]
 [0.61584298 0.17014106 0.06892728 0.057482   0.0544954  0.03311128]
 ...
 [0.04564278 0.07262122 0.06168569 0.05228112 0.0494812  0.71828798]
 [0.04844621 0.07820047 0.06537962 0.05651013 0.10126498 0.65019858]
 [0.05025452 0.08211499 0.0750388  0.09524979 0.11725396 0.58008795]]


In [22]:
xgb_pred1 = cross_val_predict(xgb1,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, xgb_pred1)

array([[430, 179,  16,   6,   3,   1],
       [ 55, 954, 169,  42,   9,   2],
       [  5,  97, 440, 128,  25,   4],
       [  1,  12,  43, 465, 146,  14],
       [  2,   8,  18,  59, 650,  98],
       [  1,   2,   5,   7, 101, 554]], dtype=int64)

# 2 classes

In [23]:
tsr_3_output[(tsr_3_output == 0)|(tsr_3_output == 1)|(tsr_3_output == 2)] = 0
tsr_3_output[(tsr_3_output == 3)|(tsr_3_output == 4)|(tsr_3_output == 5)] = 1

## SVM

In [24]:
svc2 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores2 = cross_val_score(svc2,tsr_3_input,tsr_3_output,cv = 10,scoring='accuracy')
print(svc_scores2)
print(svc_scores2.mean(), svc_scores2.std())

[0.85294118 0.92842105 0.93684211 0.92842105 0.94526316 0.92631579
 0.89473684 0.87578947 0.95789474 0.89894737]
0.9145572755417957 0.031267037893870746


In [25]:
svc2.fit(tsr_3_input,tsr_3_output)
svc_predict2 =svc2.predict_proba(tsr_3_input)
print(svc_predict2)

[[0.96872861 0.03127139]
 [0.96537622 0.03462378]
 [0.99146159 0.00853841]
 ...
 [0.00850917 0.99149083]
 [0.00427977 0.99572023]
 [0.00320323 0.99679677]]


In [26]:
svc_pred2 = cross_val_predict(svc2,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, svc_pred2)

array([[2359,  206],
       [ 200, 1986]], dtype=int64)

In [27]:
svc3 = CalibratedClassifierCV(LinearSVC(penalty = "l2", dual=False, loss = "squared_hinge", C = 1, multi_class = "ovr", 
                                       random_state = 19)) 
svc_scores3 = cross_val_score(svc3,tsr_3_input_nomrs,tsr_3_output,cv = 10,scoring='accuracy')
print(svc_scores3)
print(svc_scores3.mean(), svc_scores3.std())

[0.85714286 0.88842105 0.90947368 0.88210526 0.91157895 0.88421053
 0.86315789 0.80631579 0.93052632 0.84421053]
0.8777142857142858 0.03460638850869574


In [28]:
svc3.fit(tsr_3_input_nomrs,tsr_3_output)
svc_predict3 =svc3.predict_proba(tsr_3_input_nomrs)
print(svc_predict3)

[[0.93250694 0.06749306]
 [0.94201006 0.05798994]
 [0.95992297 0.04007703]
 ...
 [0.01068807 0.98931193]
 [0.01059869 0.98940131]
 [0.02622772 0.97377228]]


In [29]:
svc_pred3 = cross_val_predict(svc3,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, svc_pred3)

array([[2359,  206],
       [ 200, 1986]], dtype=int64)

## RF

In [30]:
rf2 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8)) 
rf_scores2 = cross_val_score(rf2,tsr_3_input,tsr_3_output,cv = 10,scoring='accuracy')
print(rf_scores2)
print(rf_scores2.mean(), rf_scores2.std())

[0.90546218 0.93473684 0.94315789 0.95368421 0.96210526 0.93052632
 0.91789474 0.90947368 0.98526316 0.89894737]
0.9341251658558161 0.026087580662657553


In [31]:
rf2.fit(tsr_3_input,tsr_3_output)
rf_predict2 =rf2.predict_proba(tsr_3_input)
print(rf_predict2)

[[0.96950238 0.03049762]
 [0.97106949 0.02893051]
 [0.97106949 0.02893051]
 ...
 [0.03577185 0.96422815]
 [0.03924607 0.96075393]
 [0.04315408 0.95684592]]


In [32]:
rf_pred2 = cross_val_predict(rf2,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, rf_pred2)

array([[2345,  220],
       [  93, 2093]], dtype=int64)

In [33]:
rf3 = CalibratedClassifierCV(RandomForestClassifier(criterion = "gini", n_estimators = 15, bootstrap=True, random_state = 19,
                                                  max_features = 0.8))  
rf_scores3 = cross_val_score(rf3,tsr_3_input_nomrs,tsr_3_output,cv = 10,scoring='accuracy')
print(rf_scores3)
print(rf_scores3.mean(), rf_scores3.std())

[0.83613445 0.88210526 0.89894737 0.87368421 0.92       0.88210526
 0.88210526 0.81684211 0.94105263 0.84631579]
0.8779292348518355 0.03569938844965633


In [34]:
rf3.fit(tsr_3_input_nomrs,tsr_3_output)
rf_predict3 =rf3.predict_proba(tsr_3_input_nomrs)
print(rf_predict3)

[[0.89905583 0.10094417]
 [0.95508925 0.04491075]
 [0.95508925 0.04491075]
 ...
 [0.05056272 0.94943728]
 [0.05543192 0.94456808]
 [0.07511621 0.92488379]]


In [35]:
rf_pred3 = cross_val_predict(rf3,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, rf_pred3)

array([[2345,  220],
       [  93, 2093]], dtype=int64)

## XGBoost

In [36]:
xgb2 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores2 = cross_val_score(xgb2,tsr_3_input,tsr_3_output,cv = 10)
print(xgb_scores2)
print(xgb_scores2.mean(), xgb_scores2.std())

[0.90756303 0.93052632 0.94736842 0.95157895 0.96421053 0.93052632
 0.91789474 0.89684211 0.97894737 0.91368421]
0.9339141972578506 0.02489270741362081


In [37]:
xgb2.fit(tsr_3_input,tsr_3_output)
xgb_predict2 =xgb2.predict_proba(tsr_3_input)
print(xgb_predict2)

[[0.95956128 0.04043872]
 [0.95959253 0.04040747]
 [0.9595347  0.0404653 ]
 ...
 [0.06557068 0.93442932]
 [0.06590487 0.93409513]
 [0.07075953 0.92924047]]


In [38]:
xgb_pred2 = cross_val_predict(xgb2,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, xgb_pred2)

array([[2361,  204],
       [ 110, 2076]], dtype=int64)

In [39]:
xgb3 = CalibratedClassifierCV(XGBClassifier(booster = "gbtree", objective="binary:logistic", eval_metric = "auc", 
                                            use_label_encoder = False, random_state = 19))
xgb_scores3 = cross_val_score(xgb3,tsr_3_input_nomrs,tsr_3_output,cv = 10)
print(xgb_scores3)
print(xgb_scores3.mean(), xgb_scores3.std())

[0.85504202 0.90105263 0.91368421 0.89684211 0.90947368 0.88842105
 0.88       0.81473684 0.94315789 0.87578947]
0.8878199911543565 0.03328425201144544


In [40]:
xgb3.fit(tsr_3_input_nomrs,tsr_3_output)
xgb_predict3 =xgb3.predict_proba(tsr_3_input_nomrs)
print(xgb_predict3)

[[0.92822533 0.07177467]
 [0.92816759 0.07183241]
 [0.92795494 0.07204506]
 ...
 [0.0860306  0.9139694 ]
 [0.08615253 0.91384747]
 [0.08753951 0.91246049]]


In [41]:
xgb_pred3 = cross_val_predict(xgb3,tsr_3_input,tsr_3_output,cv = 10)
confusion_matrix(tsr_3_output, xgb_pred3)

array([[2361,  204],
       [ 110, 2076]], dtype=int64)

# Summary

## Mean & Std

In [42]:
svc_mean = np.array([svc_scores.mean(), svc_scores.std(), svc_scores[0], svc_scores[1], svc_scores[2], svc_scores[3],
                     svc_scores[4], svc_scores[5], svc_scores[6], svc_scores[7], svc_scores[8], svc_scores[9]])
rf_mean = np.array([rf_scores.mean(), rf_scores.std(), rf_scores[0], rf_scores[1], rf_scores[2], rf_scores[3],rf_scores[4], 
                    rf_scores[5], rf_scores[6], rf_scores[7], rf_scores[8], rf_scores[9]])
xgb_mean = np.array([xgb_scores.mean(), xgb_scores.std(), xgb_scores[0], xgb_scores[1], xgb_scores[2], xgb_scores[3],
                     xgb_scores[4], xgb_scores[5], xgb_scores[6], xgb_scores[7], xgb_scores[8], xgb_scores[9]])

svc_mean1 = np.array([svc_scores1.mean(), svc_scores1.std(), svc_scores1[0], svc_scores1[1], svc_scores1[2], svc_scores1[3],
                     svc_scores1[4], svc_scores1[5], svc_scores1[6], svc_scores1[7], svc_scores1[8], svc_scores1[9]])
rf_mean1 = np.array([rf_scores1.mean(), rf_scores1.std(), rf_scores1[0], rf_scores1[1], rf_scores1[2], rf_scores1[3],rf_scores1[4], 
                    rf_scores1[5], rf_scores1[6], rf_scores1[7], rf_scores1[8], rf_scores1[9]])
xgb_mean1 = np.array([xgb_scores1.mean(), xgb_scores1.std(), xgb_scores1[0], xgb_scores1[1], xgb_scores1[2], xgb_scores1[3],
                     xgb_scores1[4], xgb_scores1[5], xgb_scores1[6], xgb_scores1[7], xgb_scores1[8], xgb_scores1[9]])

svc_mean2 = np.array([svc_scores2.mean(), svc_scores2.std(), svc_scores2[0], svc_scores2[1], svc_scores2[2], svc_scores2[3],
                     svc_scores2[4], svc_scores2[5], svc_scores2[6], svc_scores2[7], svc_scores2[8], svc_scores2[9]])
rf_mean2 = np.array([rf_scores2.mean(), rf_scores2.std(), rf_scores2[0], rf_scores2[1], rf_scores2[2], rf_scores2[3],rf_scores2[4], 
                    rf_scores2[5], rf_scores2[6], rf_scores2[7], rf_scores2[8], rf_scores2[9]])
xgb_mean2 = np.array([xgb_scores2.mean(), xgb_scores2.std(), xgb_scores2[0], xgb_scores2[1], xgb_scores2[2], xgb_scores2[3],
                     xgb_scores2[4], xgb_scores2[5], xgb_scores2[6], xgb_scores2[7], xgb_scores2[8], xgb_scores2[9]])

svc_mean3 = np.array([svc_scores3.mean(), svc_scores3.std(), svc_scores3[0], svc_scores3[1], svc_scores3[2], svc_scores3[3],
                     svc_scores3[4], svc_scores3[5], svc_scores3[6], svc_scores3[7], svc_scores3[8], svc_scores3[9]])
rf_mean3 = np.array([rf_scores3.mean(), rf_scores3.std(), rf_scores3[0], rf_scores3[1], rf_scores3[2], rf_scores3[3],rf_scores3[4], 
                    rf_scores3[5], rf_scores3[6], rf_scores3[7], rf_scores3[8], rf_scores3[9]])
xgb_mean3 = np.array([xgb_scores3.mean(), xgb_scores3.std(), xgb_scores3[0], xgb_scores3[1], xgb_scores3[2], xgb_scores3[3],
                     xgb_scores3[4], xgb_scores3[5], xgb_scores3[6], xgb_scores3[7], xgb_scores3[8], xgb_scores3[9]])

In [43]:
tsr_3_mean = pd.DataFrame([svc_mean, rf_mean, xgb_mean,svc_mean1, rf_mean1, xgb_mean1, svc_mean2, rf_mean2, xgb_mean2,
                          svc_mean3, rf_mean3, xgb_mean3]).T
tsr_3_mean.index = ["Mean", "Std", "mean_1", "mean_2", "mean_3", "mean_4", "mean_5", "mean_6", "mean_7", "mean_8", "mean_9", "mean_10"]
tsr_3_mean.columns = ["svc", 'rf', 'xgb', "svc1", 'rf1', 'xgb1', 'svc2', 'rf2', 'xgb2', "svc3", 'rf3', 'xgb3']

In [44]:
csv_save = os.path.join(".", "tsr_3_mean.csv")
tsr_3_mean.to_csv(csv_save, index = True)

## Predicted Probability

In [45]:
svc_predict_0 = svc_predict[:, 0]
svc_predict_1 = svc_predict[:, 1]
svc_predict_2 = svc_predict[:, 2]
svc_predict_3 = svc_predict[:, 3]
svc_predict_4 = svc_predict[:, 4]
svc_predict_5 = svc_predict[:, 5]

rf_predict_0 = rf_predict[:, 0]
rf_predict_1 = rf_predict[:, 1]
rf_predict_2 = rf_predict[:, 2]
rf_predict_3 = rf_predict[:, 3]
rf_predict_4 = rf_predict[:, 4]
rf_predict_5 = rf_predict[:, 5]

xgb_predict_0 = xgb_predict[:, 0]
xgb_predict_1 = xgb_predict[:, 1]
xgb_predict_2 = xgb_predict[:, 2]
xgb_predict_3 = xgb_predict[:, 3]
xgb_predict_4 = xgb_predict[:, 4]
xgb_predict_5 = xgb_predict[:, 5]

svc_predict1_0 = svc_predict1[:, 0]
svc_predict1_1 = svc_predict1[:, 1]
svc_predict1_2 = svc_predict1[:, 2]
svc_predict1_3 = svc_predict1[:, 3]
svc_predict1_4 = svc_predict1[:, 4]
svc_predict1_5 = svc_predict1[:, 5]

rf_predict1_0 = rf_predict1[:, 0]
rf_predict1_1 = rf_predict1[:, 1]
rf_predict1_2 = rf_predict1[:, 2]
rf_predict1_3 = rf_predict1[:, 3]
rf_predict1_4 = rf_predict1[:, 4]
rf_predict1_5 = rf_predict1[:, 5]

xgb_predict1_0 = xgb_predict1[:, 0]
xgb_predict1_1 = xgb_predict1[:, 1]
xgb_predict1_2 = xgb_predict1[:, 2]
xgb_predict1_3 = xgb_predict1[:, 3]
xgb_predict1_4 = xgb_predict1[:, 4]
xgb_predict1_5 = xgb_predict1[:, 5]

svc_predict2_0 = svc_predict2[:, 0]
svc_predict2_1 = svc_predict2[:, 1]

rf_predict2_0 = rf_predict2[:, 0]
rf_predict2_1 = rf_predict2[:, 1]

xgb_predict2_0 = xgb_predict2[:, 0]
xgb_predict2_1 = xgb_predict2[:, 1]

svc_predict3_0 = svc_predict3[:, 0]
svc_predict3_1 = svc_predict3[:, 1]

rf_predict3_0 = rf_predict3[:, 0]
rf_predict3_1 = rf_predict3[:, 1]

xgb_predict3_0 = xgb_predict3[:, 0]
xgb_predict3_1 = xgb_predict3[:, 1]

In [46]:
tsr_3_pred_prob = pd.DataFrame([svc_predict_0, svc_predict_1, svc_predict_2, svc_predict_3, svc_predict_4, svc_predict_5, 
                               rf_predict_0, rf_predict_1, rf_predict_2, rf_predict_3, rf_predict_4, rf_predict_5,
                               xgb_predict_0, xgb_predict_1, xgb_predict_2, xgb_predict_3, xgb_predict_4, xgb_predict_5,
                                svc_predict1_0, svc_predict1_1, svc_predict1_2, svc_predict1_3, svc_predict1_4, svc_predict1_5, 
                               rf_predict1_0, rf_predict1_1, rf_predict1_2, rf_predict1_3, rf_predict1_4, rf_predict1_5,
                               xgb_predict1_0, xgb_predict1_1, xgb_predict1_2, xgb_predict1_3, xgb_predict1_4, xgb_predict1_5,
                               svc_predict2_0, svc_predict2_1, rf_predict2_0, rf_predict2_1, xgb_predict2_0, xgb_predict2_1,
                               svc_predict3_0, svc_predict3_1, rf_predict3_0, rf_predict3_1, xgb_predict3_0, xgb_predict3_1]).T
tsr_3_pred_prob.columns = ["svc_predict_0", "svc_predict_1", "svc_predict_2", "svc_predict_3", "svc_predict_4", "svc_predict_5", 
                           "rf_predict_0", "rf_predict_1", "rf_predict_2", "rf_predict_3", "rf_predict_4", "rf_predict_5",
                           "xgb_predict_0", "xgb_predict_1", "xgb_predict_2", 'xgb_predict_3', "xgb_predict_4", "xgb_predict_5",
                           "svc_predict1_0", "svc_predict1_1", "svc_predict1_2", "svc_predict1_3", "svc_predict1_4", "svc_predict1_5", 
                           "rf_predict1_0", "rf_predict1_1", "rf_predict1_2", "rf_predict1_3", "rf_predict1_4", "rf_predict1_5",
                           "xgb_predict1_0", "xgb_predict1_1", "xgb_predict1_2", 'xgb_predict1_3', "xgb_predict1_4", "xgb_predict1_5",
                           "svc_predict2_0", "svc_predict2_1", "rf_predict2_0", "rf_predict2_1", "xgb_predict2_0", "xgb_predict2_1",
                           "svc_predict3_0", "svc_predict3_1", "rf_predict3_0", "rf_predict3_1", "xgb_predict3_0", "xgb_predict3_1"]

In [47]:
csv_save2 = os.path.join(".", "tsr_3_pred_prob.csv")
tsr_3_pred_prob.to_csv(csv_save2, index = False)