In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from imblearn.metrics import sensitivity_score
from imblearn.metrics import specificity_score
from sklearn.metrics import precision_score

In [3]:
seed = 63456341

In [4]:
sp = RepeatedStratifiedKFold(random_state=seed,n_repeats=3,n_splits=5)

In [5]:
GSE57691 = pd.read_csv('data_GSE57691.csv')
X_GSE57691 = GSE57691.iloc[:,2:]
y_GSE57691 = GSE57691.iloc[:,1]
GSE47472 = pd.read_csv('data_GSE47472.csv')
X_GSE47472 = GSE47472.iloc[:,2:]
y_GSE47472 = GSE47472.iloc[:,1]
GSE183464 = pd.read_csv('data_GSE183464.csv')
X_GSE183464 = GSE183464.iloc[:,2:]
y_GSE183464 = GSE183464.iloc[:,1]
GSE232911 = pd.read_csv('data_GSE232911.csv')
X_GSE232911 = GSE232911.iloc[:,2:]
y_GSE232911 = GSE232911.iloc[:,1]
GSE235161 = pd.read_csv('data_GSE235161.csv')
X_GSE235161 = GSE235161.iloc[:,2:]
y_GSE235161 = GSE235161.iloc[:,1]

In [23]:
import hyperopt
from hyperopt import hp

In [24]:
def objective(params):
    aucs = []
    for train_index,test_index in sp.split(X_GSE57691,y_GSE57691):
        X_train = X_GSE57691.iloc[train_index,:]
        X_vali = X_GSE57691.iloc[test_index,:]
        y_train = y_GSE57691[train_index]
        y_vali = y_GSE57691[test_index]
        model = LogisticRegression(random_state=seed,
                                   C=params['C'])
        model.fit(X_train,y_train)
        pro_vali = model.predict_proba(X_vali)[:,1]
        auc_vali = roc_auc_score(y_vali,pro_vali)
        aucs.append(auc_vali)
    return -np.mean(aucs)

In [25]:
space = {
    'C':hp.uniform('C',0,10)
}

In [26]:
best_param = hyperopt.fmin(objective,space,hyperopt.tpe.suggest,max_evals=100)

100%|██████████| 100/100 [00:25<00:00,  3.96trial/s, best loss: -0.9833333333333333]


In [27]:
best_param

{'C': np.float64(9.398468741195186)}

In [6]:
model = LogisticRegression(random_state=seed,
                           C=9.398468741195186)
model.fit(X_GSE57691,y_GSE57691)

In [7]:
pro_GSE57691 = model.predict_proba(X_GSE57691)[:,1]
pro_GSE47472 = model.predict_proba(X_GSE47472)[:,1]
pro_GSE183464 = model.predict_proba(X_GSE183464)[:,1]
pro_GSE232911 = model.predict_proba(X_GSE232911)[:,1]
pro_GSE235161 = model.predict_proba(X_GSE235161)[:,1]
pre_GSE57691 = model.predict(X_GSE57691)
pre_GSE47472 = model.predict(X_GSE47472)
pre_GSE183464 = model.predict(X_GSE183464)
pre_GSE232911 = model.predict(X_GSE232911)
pre_GSE235161 = model.predict(X_GSE235161)

In [8]:
pd.DataFrame({
    'True':y_GSE47472,
    'Pre':pre_GSE47472
}).to_csv('Logistic_GSE47472.csv',index=False)
pd.DataFrame({
    'True':y_GSE183464,
    'Pre':pre_GSE183464
}).to_csv('Logistic_GSE183464.csv',index=False)
pd.DataFrame({
    'True':y_GSE232911,
    'Pre':pre_GSE232911
}).to_csv('Logistic_GSE232911.csv',index=False)
pd.DataFrame({
    'True':y_GSE235161,
    'Pre':pre_GSE235161
}).to_csv('Logistic_GSE235161.csv',index=False)

In [30]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE57691,pro_GSE57691)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE57691,pre_GSE57691)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE57691,pre_GSE57691)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE57691,pre_GSE57691)))
print('Precision = {:.3f}'.format(precision_score(y_GSE57691,pre_GSE57691)))
print('F1 = {:.3f}'.format(f1_score(y_GSE57691,pre_GSE57691)))

AUC = 0.992
Accuracy = 0.983
Sensitivity = 1.000
Specificity = 0.900
Precision = 0.980
F1 = 0.990


In [31]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE47472,pro_GSE47472)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE47472,pre_GSE47472)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE47472,pre_GSE47472)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE47472,pre_GSE47472)))
print('Precision = {:.3f}'.format(precision_score(y_GSE47472,pre_GSE47472)))
print('F1 = {:.3f}'.format(f1_score(y_GSE47472,pre_GSE47472)))

AUC = 0.884
Accuracy = 0.636
Sensitivity = 1.000
Specificity = 0.000
Precision = 0.636
F1 = 0.778


In [32]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE183464,pro_GSE183464)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE183464,pre_GSE183464)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE183464,pre_GSE183464)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE183464,pre_GSE183464)))
print('Precision = {:.3f}'.format(precision_score(y_GSE183464,pre_GSE183464)))
print('F1 = {:.3f}'.format(f1_score(y_GSE183464,pre_GSE183464)))

AUC = 0.592
Accuracy = 0.429
Sensitivity = 0.571
Specificity = 0.286
Precision = 0.444
F1 = 0.500


In [33]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE232911,pro_GSE232911)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE232911,pre_GSE232911)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE232911,pre_GSE232911)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE232911,pre_GSE232911)))
print('Precision = {:.3f}'.format(precision_score(y_GSE232911,pre_GSE232911)))
print('F1 = {:.3f}'.format(f1_score(y_GSE232911,pre_GSE232911)))

AUC = 0.848
Accuracy = 0.866
Sensitivity = 0.900
Specificity = 0.577
Precision = 0.947
F1 = 0.923


In [34]:
print('AUC = {:.3f}'.format(roc_auc_score(y_GSE235161,pro_GSE235161)))
print('Accuracy = {:.3f}'.format(accuracy_score(y_GSE235161,pre_GSE235161)))
print('Sensitivity = {:.3f}'.format(sensitivity_score(y_GSE235161,pre_GSE235161)))
print('Specificity = {:.3f}'.format(specificity_score(y_GSE235161,pre_GSE235161)))
print('Precision = {:.3f}'.format(precision_score(y_GSE235161,pre_GSE235161)))
print('F1 = {:.3f}'.format(f1_score(y_GSE235161,pre_GSE235161)))

AUC = 0.733
Accuracy = 0.707
Sensitivity = 0.867
Specificity = 0.273
Precision = 0.765
F1 = 0.812
