In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from imblearn.metrics import sensitivity_score
from imblearn.metrics import specificity_score
from sklearn.metrics import precision_score

In [5]:
seed = 873456345

In [6]:
TCGA = pd.read_csv('data_TCGA.csv')
X_TCGA = TCGA.iloc[:,2:]
y_TCGA = TCGA.iloc[:,1]
GSE4183 = pd.read_csv('data_GSE4183.csv')
X_GSE4183 = GSE4183.iloc[:,2:]
y_GSE4183 = GSE4183.iloc[:,1]
GSE9348 = pd.read_csv('data_GSE9348.csv')
X_GSE9348 = GSE9348.iloc[:,2:]
y_GSE9348 = GSE9348.iloc[:,1]
GSE20916 = pd.read_csv('data_GSE20916.csv')
X_GSE20916 = GSE20916.iloc[:,2:]
y_GSE20916 = GSE20916.iloc[:,1]
GSE22598 = pd.read_csv('data_GSE22598.csv')
X_GSE22598 = GSE22598.iloc[:,2:]
y_GSE22598 = GSE22598.iloc[:,1]
GSE37364 = pd.read_csv('data_GSE37364.csv')
X_GSE37364 = GSE37364.iloc[:,2:]
y_GSE37364 = GSE37364.iloc[:,1]
GSE44076 = pd.read_csv('data_GSE44076.csv')
X_GSE44076 = GSE44076.iloc[:,2:]
y_GSE44076 = GSE44076.iloc[:,1]
GSE44861 = pd.read_csv('data_GSE44861.csv')
X_GSE44861 = GSE44861.iloc[:,2:]
y_GSE44861 = GSE44861.iloc[:,1]
GSE81558 = pd.read_csv('data_GSE81558.csv')
X_GSE81558 = GSE81558.iloc[:,2:]
y_GSE81558 = GSE81558.iloc[:,1]
GSE110224 = pd.read_csv('data_GSE110224.csv')
X_GSE110224 = GSE110224.iloc[:,2:]
y_GSE110224 = GSE110224.iloc[:,1]
GSE113513 = pd.read_csv('data_GSE113513.csv')
X_GSE113513 = GSE113513.iloc[:,2:]
y_GSE113513 = GSE113513.iloc[:,1]

In [7]:
sp = RepeatedStratifiedKFold(random_state=seed,n_repeats=3,n_splits=5)

In [19]:
import hyperopt
from hyperopt import hp

In [20]:
def objective(params):
    aucs = []
    for train_index,test_index in sp.split(X_TCGA,y_TCGA):
        X_train = X_TCGA.iloc[train_index,:]
        X_vali = X_TCGA.iloc[test_index,:]
        y_train = y_TCGA[train_index]
        y_vali = y_TCGA[test_index]
        model = SVC(random_state=seed,
                    C=params['C'],
                    gamma=params['gamma'],
                    probability=True)
        model.fit(X_train,y_train)
        pro_vali = model.predict_proba(X_vali)[:,1]
        auc_vali = roc_auc_score(y_vali,pro_vali)
        aucs.append(auc_vali)
    return -np.mean(aucs)

In [21]:
space = {
    'C':hp.uniform('C',0,1),
    'gamma':hp.uniform('gamma',0,1),
}

In [22]:
best_param = hyperopt.fmin(objective,space,hyperopt.tpe.suggest,max_evals=100)

100%|██████████| 100/100 [00:39<00:00,  2.53trial/s, best loss: -1.0]


In [23]:
best_param

{'C': np.float64(0.13527689628982287), 'gamma': np.float64(0.496799293554835)}

In [8]:
model = SVC(random_state=seed,
            C=0.13527689628982287,
            gamma=0.496799293554835,
            probability=True)
model.fit(X_TCGA,y_TCGA)

In [9]:
pro_TCGA = model.predict_proba(X_TCGA)[:,1]
pro_GSE4183 = model.predict_proba(X_GSE4183)[:,1]
pro_GSE9348 = model.predict_proba(X_GSE9348)[:,1]
pro_GSE20916 = model.predict_proba(X_GSE20916)[:,1]
pro_GSE22598 = model.predict_proba(X_GSE22598)[:,1]
pro_GSE37364 = model.predict_proba(X_GSE37364)[:,1]
pro_GSE44076 = model.predict_proba(X_GSE44076)[:,1]
pro_GSE44861 = model.predict_proba(X_GSE44861)[:,1]
pro_GSE81558 = model.predict_proba(X_GSE81558)[:,1]
pro_GSE110224 = model.predict_proba(X_GSE110224)[:,1]
pro_GSE113513 = model.predict_proba(X_GSE113513)[:,1]
pre_TCGA = model.predict(X_TCGA)
pre_GSE4183 = model.predict(X_GSE4183)
pre_GSE9348 = model.predict(X_GSE9348)
pre_GSE20916 = model.predict(X_GSE20916)
pre_GSE22598 = model.predict(X_GSE22598)
pre_GSE37364 = model.predict(X_GSE37364)
pre_GSE44076 = model.predict(X_GSE44076)
pre_GSE44861 = model.predict(X_GSE44861)
pre_GSE81558 = model.predict(X_GSE81558)
pre_GSE110224 = model.predict(X_GSE110224)
pre_GSE113513 = model.predict(X_GSE113513)

In [10]:
pd.DataFrame({
    'True':y_GSE4183,
    'Pre':pre_GSE4183
}).to_csv('SVM_GSE4183.csv',index=False)
pd.DataFrame({
    'True':y_GSE9348,
    'Pre':pre_GSE9348
}).to_csv('SVM_GSE9348.csv',index=False)
pd.DataFrame({
    'True':y_GSE20916,
    'Pre':pre_GSE20916
}).to_csv('SVM_GSE20916.csv',index=False)
pd.DataFrame({
    'True':y_GSE22598,
    'Pre':pre_GSE22598
}).to_csv('SVM_GSE22598.csv',index=False)
pd.DataFrame({
    'True':y_GSE37364,
    'Pre':pre_GSE37364
}).to_csv('SVM_GSE37364.csv',index=False)
pd.DataFrame({
    'True':y_GSE44076,
    'Pre':pre_GSE44076
}).to_csv('SVM_GSE44076.csv',index=False)
pd.DataFrame({
    'True':y_GSE44861,
    'Pre':pre_GSE44861
}).to_csv('SVM_GSE44861.csv',index=False)
pd.DataFrame({
    'True':y_GSE81558,
    'Pre':pre_GSE81558
}).to_csv('SVM_GSE81558.csv',index=False)
pd.DataFrame({
    'True':y_GSE110224,
    'Pre':pre_GSE110224
}).to_csv('SVM_GSE110224.csv',index=False)
pd.DataFrame({
    'True':y_GSE113513,
    'Pre':pre_GSE113513
}).to_csv('SVM_GSE113513.csv',index=False)

In [28]:
print('AUC = {:.3f}'.format(roc_auc_score(y_TCGA,pro_TCGA)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_TCGA,pre_TCGA)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_TCGA,pre_TCGA)))
print('Specificity = {:.3f}'.format(specificity_score(y_TCGA,pre_TCGA)))
print('Precision = {:.3f}'.format(precision_score(y_TCGA,pre_TCGA)))
print('F1 = {:.3f}'.format(f1_score(y_TCGA,pre_TCGA)))

AUC = 1.000
Accuracy = 0.882
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.882
F1 = 0.938


In [29]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE4183,pro_GSE4183)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE4183,pre_GSE4183)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE4183,pre_GSE4183)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE4183,pre_GSE4183)))
print('Precision = {:.3f}'.format(precision_score(y_GSE4183,pre_GSE4183)))
print('F1 = {:.3f}'.format(f1_score(y_GSE4183,pre_GSE4183)))

AUC = 0.975
Accuracy = 0.652
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.652
F1 = 0.789


In [30]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE9348,pro_GSE9348)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE9348,pre_GSE9348)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE9348,pre_GSE9348)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE9348,pre_GSE9348)))
print('Precision = {:.3f}'.format(precision_score(y_GSE9348,pre_GSE9348)))
print('F1 = {:.3f}'.format(f1_score(y_GSE9348,pre_GSE9348)))

AUC = 1.000
Accuracy = 0.854
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.854
F1 = 0.921


In [31]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE20916,pro_GSE20916)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE20916,pre_GSE20916)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE20916,pre_GSE20916)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE20916,pre_GSE20916)))
print('Precision = {:.3f}'.format(precision_score(y_GSE20916,pre_GSE20916)))
print('F1 = {:.3f}'.format(f1_score(y_GSE20916,pre_GSE20916)))

AUC = 0.976
Accuracy = 0.469
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.469
F1 = 0.638


In [32]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE22598,pro_GSE22598)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE22598,pre_GSE22598)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE22598,pre_GSE22598)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE22598,pre_GSE22598)))
print('Precision = {:.3f}'.format(precision_score(y_GSE22598,pre_GSE22598)))
print('F1 = {:.3f}'.format(f1_score(y_GSE22598,pre_GSE22598)))

AUC = 1.000
Accuracy = 0.500
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.500
F1 = 0.667


In [33]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE37364,pro_GSE37364)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE37364,pre_GSE37364)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE37364,pre_GSE37364)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE37364,pre_GSE37364)))
print('Precision = {:.3f}'.format(precision_score(y_GSE37364,pre_GSE37364)))
print('F1 = {:.3f}'.format(f1_score(y_GSE37364,pre_GSE37364)))

AUC = 0.994
Accuracy = 0.269
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.269
F1 = 0.424


In [34]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE44076,pro_GSE44076)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE44076,pre_GSE44076)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE44076,pre_GSE44076)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE44076,pre_GSE44076)))
print('Precision = {:.3f}'.format(precision_score(y_GSE44076,pre_GSE44076)))
print('F1 = {:.3f}'.format(f1_score(y_GSE44076,pre_GSE44076)))

AUC = 0.993
Accuracy = 0.500
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.500
F1 = 0.667


In [35]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE44861,pro_GSE44861)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE44861,pre_GSE44861)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE44861,pre_GSE44861)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE44861,pre_GSE44861)))
print('Precision = {:.3f}'.format(precision_score(y_GSE44861,pre_GSE44861)))
print('F1 = {:.3f}'.format(f1_score(y_GSE44861,pre_GSE44861)))

AUC = 0.923
Accuracy = 0.505
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.505
F1 = 0.671


In [36]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE81558,pro_GSE81558)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE81558,pre_GSE81558)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE81558,pre_GSE81558)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE81558,pre_GSE81558)))
print('Precision = {:.3f}'.format(precision_score(y_GSE81558,pre_GSE81558)))
print('F1 = {:.3f}'.format(f1_score(y_GSE81558,pre_GSE81558)))

AUC = 1.000
Accuracy = 0.719
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.719
F1 = 0.836


In [37]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE110224,pro_GSE110224)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE110224,pre_GSE110224)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE110224,pre_GSE110224)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE110224,pre_GSE110224)))
print('Precision = {:.3f}'.format(precision_score(y_GSE110224,pre_GSE110224)))
print('F1 = {:.3f}'.format(f1_score(y_GSE110224,pre_GSE110224)))

AUC = 0.851
Accuracy = 0.500
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.500
F1 = 0.667


In [38]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE113513,pro_GSE113513)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE113513,pre_GSE113513)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE113513,pre_GSE113513)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE113513,pre_GSE113513)))
print('Precision = {:.3f}'.format(precision_score(y_GSE113513,pre_GSE113513)))
print('F1 = {:.3f}'.format(f1_score(y_GSE113513,pre_GSE113513)))

AUC = 0.949
Accuracy = 0.500
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.500
F1 = 0.667
