In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from imblearn.metrics import sensitivity_score
from imblearn.metrics import specificity_score
from sklearn.metrics import precision_score
from sklearn.linear_model import LogisticRegression

In [4]:
seed = 73467541

In [5]:
sp = RepeatedStratifiedKFold(random_state=seed,n_repeats=3,n_splits=5)

In [6]:
GSE13904 = pd.read_csv('data_GSE13904.csv')
X_GSE13904 = GSE13904.iloc[:,2:]
y_GSE13904 = GSE13904.iloc[:,1]
GSE54514 = pd.read_csv('data_GSE54514.csv')
X_GSE54514 = GSE54514.iloc[:,2:]
y_GSE54514 = GSE54514.iloc[:,1]
GSE64457 = pd.read_csv('data_GSE64457.csv')
X_GSE64457 = GSE64457.iloc[:,2:]
y_GSE64457 = GSE64457.iloc[:,1]
GSE65682 = pd.read_csv('data_GSE65682.csv')
X_GSE65682 = GSE65682.iloc[:,2:]
y_GSE65682 = GSE65682.iloc[:,1]
GSE69063 = pd.read_csv('data_GSE69063.csv')
X_GSE69063 = GSE69063.iloc[:,2:]
y_GSE69063 = GSE69063.iloc[:,1]
GSE95233 = pd.read_csv('data_GSE95233.csv')
X_GSE95233 = GSE95233.iloc[:,2:]
y_GSE95233 = GSE95233.iloc[:,1]
GSE100159 = pd.read_csv('data_GSE100159.csv')
X_GSE100159 = GSE100159.iloc[:,2:]
y_GSE100159 = GSE100159.iloc[:,1]
GSE134347 = pd.read_csv('data_GSE134347.csv')
X_GSE134347 = GSE134347.iloc[:,2:]
y_GSE134347 = GSE134347.iloc[:,1]
GSE137342 = pd.read_csv('data_GSE137342.csv')
X_GSE137342 = GSE137342.iloc[:,2:]
y_GSE137342 = GSE137342.iloc[:,1]
GSE154918 = pd.read_csv('data_GSE154918.csv')
X_GSE154918 = GSE154918.iloc[:,2:]
y_GSE154918 = GSE154918.iloc[:,1]
GSE205672 = pd.read_csv('data_GSE205672.csv')
X_GSE205672 = GSE205672.iloc[:,2:]
y_GSE205672 = GSE205672.iloc[:,1]
GSE210797 = pd.read_csv('data_GSE210797.csv')
X_GSE210797 = GSE210797.iloc[:,2:]
y_GSE210797 = GSE210797.iloc[:,1]
GSE211210 = pd.read_csv('data_GSE211210.csv')
X_GSE211210 = GSE211210.iloc[:,2:]
y_GSE211210 = GSE211210.iloc[:,1]
GSE236713 = pd.read_csv('data_GSE236713.csv')
X_GSE236713 = GSE236713.iloc[:,2:]
y_GSE236713 = GSE236713.iloc[:,1]

In [7]:
import hyperopt
from hyperopt import hp

In [8]:
def objective(params):
    aucs = []
    for train_index,test_index in sp.split(X_GSE205672,y_GSE205672):
        X_train = X_GSE205672.iloc[train_index,:]
        X_vali = X_GSE205672.iloc[test_index,:]
        y_train = y_GSE205672[train_index]
        y_vali = y_GSE205672[test_index]
        model = LogisticRegression(random_state=seed,
                                   C=params['C'])
        model.fit(X_train,y_train)
        pro_vali = model.predict_proba(X_vali)[:,1]
        auc_vali = roc_auc_score(y_vali,pro_vali)
        aucs.append(auc_vali)
    return -np.mean(aucs)

In [9]:
space = {
    'C':hp.uniform('C',0,1)
}

In [10]:
best_param = hyperopt.fmin(objective,space,hyperopt.tpe.suggest,max_evals=100)

100%|██████████| 100/100 [01:03<00:00,  1.58trial/s, best loss: -0.9977811483478857]


In [11]:
best_param

{'C': np.float64(0.9613851675417848)}

In [7]:
model = LogisticRegression(random_state=seed,
                           C=0.9613851675417848)
model.fit(X_GSE205672,y_GSE205672)

In [8]:
pro_GSE13904 = model.predict_proba(X_GSE13904)[:,1]
pro_GSE54514 = model.predict_proba(X_GSE54514)[:,1]
pro_GSE64457 = model.predict_proba(X_GSE64457)[:,1]
pro_GSE65682 = model.predict_proba(X_GSE65682)[:,1]
pro_GSE69063 = model.predict_proba(X_GSE69063)[:,1]
pro_GSE95233 = model.predict_proba(X_GSE95233)[:,1]
pro_GSE100159 = model.predict_proba(X_GSE100159)[:,1]
pro_GSE134347 = model.predict_proba(X_GSE134347)[:,1]
pro_GSE137342 = model.predict_proba(X_GSE137342)[:,1]
pro_GSE154918 = model.predict_proba(X_GSE154918)[:,1]
pro_GSE205672 = model.predict_proba(X_GSE205672)[:,1]
pro_GSE210797 = model.predict_proba(X_GSE210797)[:,1]
pro_GSE211210 = model.predict_proba(X_GSE211210)[:,1]
pro_GSE236713 = model.predict_proba(X_GSE236713)[:,1]
pre_GSE13904 = model.predict(X_GSE13904)
pre_GSE54514 = model.predict(X_GSE54514)
pre_GSE64457 = model.predict(X_GSE64457)
pre_GSE65682 = model.predict(X_GSE65682)
pre_GSE69063 = model.predict(X_GSE69063)
pre_GSE95233 = model.predict(X_GSE95233)
pre_GSE100159 = model.predict(X_GSE100159)
pre_GSE134347 = model.predict(X_GSE134347)
pre_GSE137342 = model.predict(X_GSE137342)
pre_GSE154918 = model.predict(X_GSE154918)
pre_GSE205672 = model.predict(X_GSE205672)
pre_GSE210797 = model.predict(X_GSE210797)
pre_GSE211210 = model.predict(X_GSE211210)
pre_GSE236713 = model.predict(X_GSE236713)

In [9]:
pd.DataFrame({
    'True':y_GSE13904,
    'Pre':pre_GSE13904
}).to_csv('Logistic_GSE13904.csv',index=False)
pd.DataFrame({
    'True':y_GSE54514,
    'Pre':pre_GSE54514
}).to_csv('Logistic_GSE54514.csv',index=False)
pd.DataFrame({
    'True':y_GSE64457,
    'Pre':pre_GSE64457
}).to_csv('Logistic_GSE64457.csv',index=False)
pd.DataFrame({
    'True':y_GSE65682,
    'Pre':pre_GSE65682
}).to_csv('Logistic_GSE65682.csv',index=False)
pd.DataFrame({
    'True':y_GSE69063,
    'Pre':pre_GSE69063
}).to_csv('Logistic_GSE69063.csv',index=False)
pd.DataFrame({
    'True':y_GSE95233,
    'Pre':pre_GSE95233
}).to_csv('Logistic_GSE95233.csv',index=False)
pd.DataFrame({
    'True':y_GSE100159,
    'Pre':pre_GSE100159
}).to_csv('Logistic_GSE100159.csv',index=False)
pd.DataFrame({
    'True':y_GSE134347,
    'Pre':pre_GSE134347
}).to_csv('Logistic_GSE134347.csv',index=False)
pd.DataFrame({
    'True':y_GSE137342,
    'Pre':pre_GSE137342
}).to_csv('Logistic_GSE137342.csv',index=False)
pd.DataFrame({
    'True':y_GSE154918,
    'Pre':pre_GSE154918
}).to_csv('Logistic_GSE154918.csv',index=False)
pd.DataFrame({
    'True':y_GSE210797,
    'Pre':pre_GSE210797
}).to_csv('Logistic_GSE210797.csv',index=False)
pd.DataFrame({
    'True':y_GSE211210,
    'Pre':pre_GSE211210
}).to_csv('Logistic_GSE211210.csv',index=False)
pd.DataFrame({
    'True':y_GSE236713,
    'Pre':pre_GSE236713
}).to_csv('Logistic_GSE236713.csv',index=False)

In [15]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE205672,pro_GSE205672)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE205672,pre_GSE205672)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE205672,pre_GSE205672)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE205672,pre_GSE205672)))
print('Precision = {:.3f}'.format(precision_score(y_GSE205672,pre_GSE205672)))
print('F1 = {:.3f}'.format(f1_score(y_GSE205672,pre_GSE205672)))

AUC = 0.999
Accuracy = 0.987
Sensitivity = 0.975
Specificity = 0.993
Precision = 0.987
F1 = 0.981


In [16]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE13904,pro_GSE13904)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE13904,pre_GSE13904)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE13904,pre_GSE13904)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE13904,pre_GSE13904)))
print('Precision = {:.3f}'.format(precision_score(y_GSE13904,pre_GSE13904)))
print('F1 = {:.3f}'.format(f1_score(y_GSE13904,pre_GSE13904)))

AUC = 0.941
Accuracy = 0.864
Sensitivity = 0.854
Specificity = 0.944
Precision = 0.993
F1 = 0.918


In [17]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE54514,pro_GSE54514)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE54514,pre_GSE54514)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE54514,pre_GSE54514)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE54514,pre_GSE54514)))
print('Precision = {:.3f}'.format(precision_score(y_GSE54514,pre_GSE54514)))
print('F1 = {:.3f}'.format(f1_score(y_GSE54514,pre_GSE54514)))

AUC = 0.381
Accuracy = 0.463
Sensitivity = 0.484
Specificity = 0.444
Precision = 0.429
F1 = 0.455


In [18]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE64457,pro_GSE64457)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE64457,pre_GSE64457)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE64457,pre_GSE64457)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE64457,pre_GSE64457)))
print('Precision = {:.3f}'.format(precision_score(y_GSE64457,pre_GSE64457)))
print('F1 = {:.3f}'.format(f1_score(y_GSE64457,pre_GSE64457)))

AUC = 0.950
Accuracy = 0.870
Sensitivity = 0.800
Specificity = 1.000
Precision = 1.000
F1 = 0.889


In [19]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE65682,pro_GSE65682)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE65682,pre_GSE65682)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE65682,pre_GSE65682)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE65682,pre_GSE65682)))
print('Precision = {:.3f}'.format(precision_score(y_GSE65682,pre_GSE65682)))
print('F1 = {:.3f}'.format(f1_score(y_GSE65682,pre_GSE65682)))

AUC = 0.528
Accuracy = 0.581
Sensitivity = 0.725
Specificity = 0.405
Precision = 0.597
F1 = 0.655


In [20]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE69063,pro_GSE69063)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE69063,pre_GSE69063)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE69063,pre_GSE69063)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE69063,pre_GSE69063)))
print('Precision = {:.3f}'.format(precision_score(y_GSE69063,pre_GSE69063)))
print('F1 = {:.3f}'.format(f1_score(y_GSE69063,pre_GSE69063)))

AUC = 0.812
Accuracy = 0.711
Sensitivity = 0.684
Specificity = 0.758
Precision = 0.830
F1 = 0.750


In [21]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE95233,pro_GSE95233)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE95233,pre_GSE95233)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE95233,pre_GSE95233)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE95233,pre_GSE95233)))
print('Precision = {:.3f}'.format(precision_score(y_GSE95233,pre_GSE95233)))
print('F1 = {:.3f}'.format(f1_score(y_GSE95233,pre_GSE95233)))

AUC = 0.922
Accuracy = 0.863
Sensitivity = 0.863
Specificity = 0.864
Precision = 0.967
F1 = 0.912


In [22]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE100159,pro_GSE100159)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE100159,pre_GSE100159)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE100159,pre_GSE100159)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE100159,pre_GSE100159)))
print('Precision = {:.3f}'.format(precision_score(y_GSE100159,pre_GSE100159)))
print('F1 = {:.3f}'.format(f1_score(y_GSE100159,pre_GSE100159)))

AUC = 1.000
Accuracy = 0.978
Sensitivity = 0.970
Specificity = 1.000
Precision = 1.000
F1 = 0.985


In [23]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE134347,pro_GSE134347)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE134347,pre_GSE134347)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE134347,pre_GSE134347)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE134347,pre_GSE134347)))
print('Precision = {:.3f}'.format(precision_score(y_GSE134347,pre_GSE134347)))
print('F1 = {:.3f}'.format(f1_score(y_GSE134347,pre_GSE134347)))

AUC = 0.756
Accuracy = 0.707
Sensitivity = 0.776
Specificity = 0.578
Precision = 0.776
F1 = 0.776


In [24]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE137342,pro_GSE137342)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE137342,pre_GSE137342)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE137342,pre_GSE137342)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE137342,pre_GSE137342)))
print('Precision = {:.3f}'.format(precision_score(y_GSE137342,pre_GSE137342)))
print('F1 = {:.3f}'.format(f1_score(y_GSE137342,pre_GSE137342)))

AUC = 0.855
Accuracy = 0.800
Sensitivity = 0.837
Specificity = 0.667
Precision = 0.900
F1 = 0.867


In [25]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE154918,pro_GSE154918)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE154918,pre_GSE154918)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE154918,pre_GSE154918)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE154918,pre_GSE154918)))
print('Precision = {:.3f}'.format(precision_score(y_GSE154918,pre_GSE154918)))
print('F1 = {:.3f}'.format(f1_score(y_GSE154918,pre_GSE154918)))

AUC = 0.639
Accuracy = 0.717
Sensitivity = 0.600
Specificity = 0.775
Precision = 0.571
F1 = 0.585


In [26]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE210797,pro_GSE210797)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE210797,pre_GSE210797)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE210797,pre_GSE210797)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE210797,pre_GSE210797)))
print('Precision = {:.3f}'.format(precision_score(y_GSE210797,pre_GSE210797)))
print('F1 = {:.3f}'.format(f1_score(y_GSE210797,pre_GSE210797)))

AUC = 0.472
Accuracy = 0.417
Sensitivity = 0.500
Specificity = 0.333
Precision = 0.429
F1 = 0.462


In [27]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE211210,pro_GSE211210)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE211210,pre_GSE211210)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE211210,pre_GSE211210)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE211210,pre_GSE211210)))
print('Precision = {:.3f}'.format(precision_score(y_GSE211210,pre_GSE211210)))
print('F1 = {:.3f}'.format(f1_score(y_GSE211210,pre_GSE211210)))

AUC = 0.920
Accuracy = 0.800
Sensitivity = 1.000
Specificity = 0.600
Precision = 0.714
F1 = 0.833


In [28]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE236713,pro_GSE236713)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE236713,pre_GSE236713)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE236713,pre_GSE236713)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE236713,pre_GSE236713)))
print('Precision = {:.3f}'.format(precision_score(y_GSE236713,pre_GSE236713)))
print('F1 = {:.3f}'.format(f1_score(y_GSE236713,pre_GSE236713)))

AUC = 0.995
Accuracy = 0.890
Sensitivity = 0.880
Specificity = 1.000
Precision = 1.000
F1 = 0.936
