In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from imblearn.metrics import sensitivity_score
from imblearn.metrics import specificity_score
from sklearn.metrics import precision_score
from sklearn.svm import SVC

In [2]:
seed = 34675341

In [3]:
sp = RepeatedStratifiedKFold(random_state=seed,n_repeats=3,n_splits=5)

In [4]:
TCGA = pd.read_csv('data_TCGA.csv')
X_TCGA = TCGA.iloc[:,2:]
y_TCGA = TCGA.iloc[:,1]
GSE5764 = pd.read_csv('data_GSE5764.csv')
X_GSE5764 = GSE5764.iloc[:,2:]
y_GSE5764 = GSE5764.iloc[:,1]
GSE7904 = pd.read_csv('data_GSE7904.csv')
X_GSE7904 = GSE7904.iloc[:,2:]
y_GSE7904 = GSE7904.iloc[:,1]
GSE10780 = pd.read_csv('data_GSE10780.csv')
X_GSE10780 = GSE10780.iloc[:,2:]
y_GSE10780 = GSE10780.iloc[:,1]
GSE10810 = pd.read_csv('data_GSE10810.csv')
X_GSE10810 = GSE10810.iloc[:,2:]
y_GSE10810 = GSE10810.iloc[:,1]
GSE26910 = pd.read_csv('data_GSE26910.csv')
X_GSE26910 = GSE26910.iloc[:,2:]
y_GSE26910 = GSE26910.iloc[:,1]
GSE29431 = pd.read_csv('data_GSE29431.csv')
X_GSE29431 = GSE29431.iloc[:,2:]
y_GSE29431 = GSE29431.iloc[:,1]
GSE31192 = pd.read_csv('data_GSE31192.csv')
X_GSE31192 = GSE31192.iloc[:,2:]
y_GSE31192 = GSE31192.iloc[:,1]
GSE33447 = pd.read_csv('data_GSE33447.csv')
X_GSE33447 = GSE33447.iloc[:,2:]
y_GSE33447 = GSE33447.iloc[:,1]
GSE36295 = pd.read_csv('data_GSE36295.csv')
X_GSE36295 = GSE36295.iloc[:,2:]
y_GSE36295 = GSE36295.iloc[:,1]
GSE38959 = pd.read_csv('data_GSE38959.csv')
X_GSE38959 = GSE38959.iloc[:,2:]
y_GSE38959 = GSE38959.iloc[:,1]
GSE42568 = pd.read_csv('data_GSE42568.csv')
X_GSE42568 = GSE42568.iloc[:,2:]
y_GSE42568 = GSE42568.iloc[:,1]
GSE65216 = pd.read_csv('data_GSE65216.csv')
X_GSE65216 = GSE65216.iloc[:,2:]
y_GSE65216 = GSE65216.iloc[:,1]
GSE71053 = pd.read_csv('data_GSE71053.csv')
X_GSE71053 = GSE71053.iloc[:,2:]
y_GSE71053 = GSE71053.iloc[:,1]
GSE76250 = pd.read_csv('data_GSE76250.csv')
X_GSE76250 = GSE76250.iloc[:,2:]
y_GSE76250 = GSE76250.iloc[:,1]
GSE86374 = pd.read_csv('data_GSE86374.csv')
X_GSE86374 = GSE86374.iloc[:,2:]
y_GSE86374 = GSE86374.iloc[:,1]
GSE162228 = pd.read_csv('data_GSE162228.csv')
X_GSE162228 = GSE162228.iloc[:,2:]
y_GSE162228 = GSE162228.iloc[:,1]
GSE233242 = pd.read_csv('data_GSE233242.csv')
X_GSE233242 = GSE233242.iloc[:,2:]
y_GSE233242 = GSE233242.iloc[:,1]

In [13]:
import hyperopt
from hyperopt import hp

In [14]:
def objective(params):
    aucs = []
    for train_index,test_index in sp.split(X_TCGA,y_TCGA):
        X_train = X_TCGA.iloc[train_index,:]
        X_vali = X_TCGA.iloc[test_index,:]
        y_train = y_TCGA[train_index]
        y_vali = y_TCGA[test_index]
        model = SVC(random_state=seed,
                    C=params['C'],
                    gamma=params['gamma'],
                    probability=True)
        model.fit(X_train,y_train)
        pro_vali = model.predict_proba(X_vali)[:,1]
        auc_vali = roc_auc_score(y_vali,pro_vali)
        aucs.append(auc_vali)
    return -np.mean(aucs)

In [15]:
space = {
    'C':hp.uniform('C',0,10),
    'gamma':hp.uniform('gamma',0,10),
}

In [16]:
best_param = hyperopt.fmin(objective,space,hyperopt.tpe.suggest,max_evals=50)

100%|██████████| 50/50 [03:40<00:00,  4.41s/trial, best loss: -1.0]                 


In [17]:
best_param

{'C': np.float64(7.919027695105535), 'gamma': np.float64(0.013708528499826934)}

In [5]:
model = SVC(random_state=seed,
            C=7.919027695105535,
            gamma=0.013708528499826934,
            probability=True)
model.fit(X_TCGA,y_TCGA)

In [6]:
pro_TCGA = model.predict_proba(X_TCGA)[:,1]
pro_GSE5764 = model.predict_proba(X_GSE5764)[:,1]
pro_GSE7904 = model.predict_proba(X_GSE7904)[:,1]
pro_GSE10780 = model.predict_proba(X_GSE10780)[:,1]
pro_GSE10810 = model.predict_proba(X_GSE10810)[:,1]
pro_GSE26910 = model.predict_proba(X_GSE26910)[:,1]
pro_GSE29431 = model.predict_proba(X_GSE29431)[:,1]
pro_GSE31192 = model.predict_proba(X_GSE31192)[:,1]
pro_GSE33447 = model.predict_proba(X_GSE33447)[:,1]
pro_GSE36295 = model.predict_proba(X_GSE36295)[:,1]
pro_GSE38959 = model.predict_proba(X_GSE38959)[:,1]
pro_GSE42568 = model.predict_proba(X_GSE42568)[:,1]
pro_GSE65216 = model.predict_proba(X_GSE65216)[:,1]
pro_GSE71053 = model.predict_proba(X_GSE71053)[:,1]
pro_GSE76250 = model.predict_proba(X_GSE76250)[:,1]
pro_GSE86374 = model.predict_proba(X_GSE86374)[:,1]
pro_GSE162228 = model.predict_proba(X_GSE162228)[:,1]
pro_GSE233242 = model.predict_proba(X_GSE233242)[:,1]
pre_TCGA = model.predict(X_TCGA)
pre_GSE5764 = model.predict(X_GSE5764)
pre_GSE7904 = model.predict(X_GSE7904)
pre_GSE10780 = model.predict(X_GSE10780)
pre_GSE10810 = model.predict(X_GSE10810)
pre_GSE26910 = model.predict(X_GSE26910)
pre_GSE29431 = model.predict(X_GSE29431)
pre_GSE31192 = model.predict(X_GSE31192)
pre_GSE33447 = model.predict(X_GSE33447)
pre_GSE36295 = model.predict(X_GSE36295)
pre_GSE38959 = model.predict(X_GSE38959)
pre_GSE42568 = model.predict(X_GSE42568)
pre_GSE65216 = model.predict(X_GSE65216)
pre_GSE71053 = model.predict(X_GSE71053)
pre_GSE76250 = model.predict(X_GSE76250)
pre_GSE86374 = model.predict(X_GSE86374)
pre_GSE162228 = model.predict(X_GSE162228)
pre_GSE233242 = model.predict(X_GSE233242)

In [8]:
pd.DataFrame({
    'True':y_GSE5764,
    'Pre':pre_GSE5764
}).to_csv('SVM_GSE5764.csv',index=False)
pd.DataFrame({
    'True':y_GSE7904,
    'Pre':pre_GSE7904
}).to_csv('SVM_GSE7904.csv',index=False)
pd.DataFrame({
    'True':y_GSE10780,
    'Pre':pre_GSE10780
}).to_csv('SVM_GSE10780.csv',index=False)
pd.DataFrame({
    'True':y_GSE10810,
    'Pre':pre_GSE10810
}).to_csv('SVM_GSE10810.csv',index=False)
pd.DataFrame({
    'True':y_GSE26910,
    'Pre':pre_GSE26910
}).to_csv('SVM_GSE26910.csv',index=False)
pd.DataFrame({
    'True':y_GSE29431,
    'Pre':pre_GSE29431
}).to_csv('SVM_GSE29431.csv',index=False)
pd.DataFrame({
    'True':y_GSE31192,
    'Pre':pre_GSE31192
}).to_csv('SVM_GSE31192.csv',index=False)
pd.DataFrame({
    'True':y_GSE33447,
    'Pre':pre_GSE33447
}).to_csv('SVM_GSE33447.csv',index=False)
pd.DataFrame({
    'True':y_GSE36295,
    'Pre':pre_GSE36295
}).to_csv('SVM_GSE36295.csv',index=False)
pd.DataFrame({
    'True':y_GSE38959,
    'Pre':pre_GSE38959
}).to_csv('SVM_GSE38959.csv',index=False)
pd.DataFrame({
    'True':y_GSE42568,
    'Pre':pre_GSE42568
}).to_csv('SVM_GSE42568.csv',index=False)
pd.DataFrame({
    'True':y_GSE65216,
    'Pre':pre_GSE65216
}).to_csv('SVM_GSE65216.csv',index=False)
pd.DataFrame({
    'True':y_GSE71053,
    'Pre':pre_GSE71053
}).to_csv('SVM_GSE71053.csv',index=False)
pd.DataFrame({
    'True':y_GSE76250,
    'Pre':pre_GSE76250
}).to_csv('SVM_GSE76250.csv',index=False)
pd.DataFrame({
    'True':y_GSE86374,
    'Pre':pre_GSE86374
}).to_csv('SVM_GSE86374.csv',index=False)
pd.DataFrame({
    'True':y_GSE162228,
    'Pre':pre_GSE162228
}).to_csv('SVM_GSE162228.csv',index=False)
pd.DataFrame({
    'True':y_GSE233242,
    'Pre':pre_GSE233242
}).to_csv('SVM_GSE233242.csv',index=False)

In [20]:
print('AUC = {:.3f}'.format(roc_auc_score(y_TCGA,pro_TCGA)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_TCGA,pre_TCGA)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_TCGA,pre_TCGA)))
print('Specificity = {:.3f}'.format(specificity_score(y_TCGA,pre_TCGA)))
print('Precision = {:.3f}'.format(precision_score(y_TCGA,pre_TCGA)))
print('F1 = {:.3f}'.format(f1_score(y_TCGA,pre_TCGA)))

AUC = 1.000
Accuracy = 1.000
Sensitivity = 1.000
Specificity = 1.000
Precision = 1.000
F1 = 1.000


In [21]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE5764,pro_GSE5764)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE5764,pre_GSE5764)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE5764,pre_GSE5764)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE5764,pre_GSE5764)))
print('Precision = {:.3f}'.format(precision_score(y_GSE5764,pre_GSE5764)))
print('F1 = {:.3f}'.format(f1_score(y_GSE5764,pre_GSE5764)))

AUC = 0.580
Accuracy = 0.333
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.333
F1 = 0.500


In [22]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE7904,pro_GSE7904)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE7904,pre_GSE7904)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE7904,pre_GSE7904)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE7904,pre_GSE7904)))
print('Precision = {:.3f}'.format(precision_score(y_GSE7904,pre_GSE7904)))
print('F1 = {:.3f}'.format(f1_score(y_GSE7904,pre_GSE7904)))

AUC = 0.618
Accuracy = 0.860
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.860
F1 = 0.925


In [23]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE10780,pro_GSE10780)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE10780,pre_GSE10780)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE10780,pre_GSE10780)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE10780,pre_GSE10780)))
print('Precision = {:.3f}'.format(precision_score(y_GSE10780,pre_GSE10780)))
print('F1 = {:.3f}'.format(f1_score(y_GSE10780,pre_GSE10780)))

AUC = 0.602
Accuracy = 0.238
Sensitivity = 1.000
Specificity = 0.014
Precision = 0.230
F1 = 0.373


In [24]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE10810,pro_GSE10810)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE10810,pre_GSE10810)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE10810,pre_GSE10810)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE10810,pre_GSE10810)))
print('Precision = {:.3f}'.format(precision_score(y_GSE10810,pre_GSE10810)))
print('F1 = {:.3f}'.format(f1_score(y_GSE10810,pre_GSE10810)))

AUC = 0.615
Accuracy = 0.534
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.534
F1 = 0.697


In [25]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE26910,pro_GSE26910)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE26910,pre_GSE26910)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE26910,pre_GSE26910)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE26910,pre_GSE26910)))
print('Precision = {:.3f}'.format(precision_score(y_GSE26910,pre_GSE26910)))
print('F1 = {:.3f}'.format(f1_score(y_GSE26910,pre_GSE26910)))

AUC = 0.917
Accuracy = 0.500
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.500
F1 = 0.667


In [26]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE29431,pro_GSE29431)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE29431,pre_GSE29431)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE29431,pre_GSE29431)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE29431,pre_GSE29431)))
print('Precision = {:.3f}'.format(precision_score(y_GSE29431,pre_GSE29431)))
print('F1 = {:.3f}'.format(f1_score(y_GSE29431,pre_GSE29431)))

AUC = 0.981
Accuracy = 0.818
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.818
F1 = 0.900


In [27]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE31192,pro_GSE31192)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE31192,pre_GSE31192)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE31192,pre_GSE31192)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE31192,pre_GSE31192)))
print('Precision = {:.3f}'.format(precision_score(y_GSE31192,pre_GSE31192)))
print('F1 = {:.3f}'.format(f1_score(y_GSE31192,pre_GSE31192)))

AUC = 0.535
Accuracy = 0.606
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.606
F1 = 0.755


In [28]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE33447,pro_GSE33447)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE33447,pre_GSE33447)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE33447,pre_GSE33447)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE33447,pre_GSE33447)))
print('Precision = {:.3f}'.format(precision_score(y_GSE33447,pre_GSE33447)))
print('F1 = {:.3f}'.format(f1_score(y_GSE33447,pre_GSE33447)))

AUC = 0.750
Accuracy = 0.562
Sensitivity = 1.000
Specificity = 0.125
Precision = 0.533
F1 = 0.696


In [29]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE36295,pro_GSE36295)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE36295,pre_GSE36295)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE36295,pre_GSE36295)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE36295,pre_GSE36295)))
print('Precision = {:.3f}'.format(precision_score(y_GSE36295,pre_GSE36295)))
print('F1 = {:.3f}'.format(f1_score(y_GSE36295,pre_GSE36295)))

AUC = 0.791
Accuracy = 0.900
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.900
F1 = 0.947


In [30]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE38959,pro_GSE38959)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE38959,pre_GSE38959)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE38959,pre_GSE38959)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE38959,pre_GSE38959)))
print('Precision = {:.3f}'.format(precision_score(y_GSE38959,pre_GSE38959)))
print('F1 = {:.3f}'.format(f1_score(y_GSE38959,pre_GSE38959)))

AUC = 0.504
Accuracy = 0.638
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.638
F1 = 0.779


In [31]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE42568,pro_GSE42568)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE42568,pre_GSE42568)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE42568,pre_GSE42568)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE42568,pre_GSE42568)))
print('Precision = {:.3f}'.format(precision_score(y_GSE42568,pre_GSE42568)))
print('F1 = {:.3f}'.format(f1_score(y_GSE42568,pre_GSE42568)))

AUC = 0.899
Accuracy = 0.860
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.860
F1 = 0.924


In [32]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE65216,pro_GSE65216)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE65216,pre_GSE65216)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE65216,pre_GSE65216)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE65216,pre_GSE65216)))
print('Precision = {:.3f}'.format(precision_score(y_GSE65216,pre_GSE65216)))
print('F1 = {:.3f}'.format(f1_score(y_GSE65216,pre_GSE65216)))

AUC = 0.997
Accuracy = 0.936
Sensitivity = 1.000
Specificity = 0.045
Precision = 0.936
F1 = 0.967


In [33]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE71053,pro_GSE71053)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE71053,pre_GSE71053)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE71053,pre_GSE71053)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE71053,pre_GSE71053)))
print('Precision = {:.3f}'.format(precision_score(y_GSE71053,pre_GSE71053)))
print('F1 = {:.3f}'.format(f1_score(y_GSE71053,pre_GSE71053)))

AUC = 0.653
Accuracy = 0.333
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.333
F1 = 0.500


In [34]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE76250,pro_GSE76250)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE76250,pre_GSE76250)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE76250,pre_GSE76250)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE76250,pre_GSE76250)))
print('Precision = {:.3f}'.format(precision_score(y_GSE76250,pre_GSE76250)))
print('F1 = {:.3f}'.format(f1_score(y_GSE76250,pre_GSE76250)))

AUC = 0.250
Accuracy = 0.833
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.833
F1 = 0.909


In [35]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE86374,pro_GSE86374)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE86374,pre_GSE86374)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE86374,pre_GSE86374)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE86374,pre_GSE86374)))
print('Precision = {:.3f}'.format(precision_score(y_GSE86374,pre_GSE86374)))
print('F1 = {:.3f}'.format(f1_score(y_GSE86374,pre_GSE86374)))

AUC = 0.344
Accuracy = 0.780
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.780
F1 = 0.876


In [36]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE162228,pro_GSE162228)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE162228,pre_GSE162228)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE162228,pre_GSE162228)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE162228,pre_GSE162228)))
print('Precision = {:.3f}'.format(precision_score(y_GSE162228,pre_GSE162228)))
print('F1 = {:.3f}'.format(f1_score(y_GSE162228,pre_GSE162228)))

AUC = 0.650
Accuracy = 0.827
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.827
F1 = 0.905


In [37]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE233242,pro_GSE233242)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE233242,pre_GSE233242)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE233242,pre_GSE233242)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE233242,pre_GSE233242)))
print('Precision = {:.3f}'.format(precision_score(y_GSE233242,pre_GSE233242)))
print('F1 = {:.3f}'.format(f1_score(y_GSE233242,pre_GSE233242)))

AUC = 0.889
Accuracy = 0.570
Sensitivity = 1.000
Specificity = 0.140
Precision = 0.537
F1 = 0.699
