In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## ANN

In [2]:
import sys
import os

import pandas as pd
import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import auc, roc_curve

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import ensemble
from scipy.spatial.distance import pdist, squareform
from sklearn.model_selection import cross_val_score, ShuffleSplit


import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['font.sans-serif'] = ['FangSong']
matplotlib.rcParams['axes.unicode_minus'] = False 
stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)

### ANN without 1h/h average SBP & DBP

In [3]:
import torch
data_range_pro = pd.read_csv('/content/drive/My Drive/hypertension test/Nocturnal_hypertension by C/data_10factors_gbk_dayBP_201222.csv', encoding = 'gbk', index_col = 0)
def preprocess(data0):
    data = data0.copy()
    X_col = [col for col in data.columns if col != 'nocturnal_hypertension']
    dummy_col = [col for col in X_col if len(data[col].unique()) < 5]
    data[dummy_col] = data[dummy_col].astype('int64')
    float_col = [col for col in X_col if col not in dummy_col]
    # standard
    m,s = data[float_col].mean(), data[float_col].std()
    for col in float_col:
        data[col] = (data[col] - data[col].mean()) / (data[col].std())
    temp = pd.DataFrame()
    for col in dummy_col:
        temp = pd.concat([temp, pd.get_dummies(data[col], prefix = col, prefix_sep = '_', drop_first = True)], axis = 1)

    temp = pd.concat([data[float_col], temp, data['nocturnal_hypertension']], axis = 1)
    temp.index = pd.Index(range(len(temp)))
    return temp, m, s, float_col, dummy_col, X_col, 'nocturnal_hypertension'

data, m, s, float_col, dummy_col, X_col, y_col = preprocess(data_range_pro)
data['nocturnal_hypertension'] = data['nocturnal_hypertension'].astype('int')#bool转换成int

R = 115306

temp = ['Clinic SBP,mmHg','eGFR,mL/ (min·1.73 m2)','BUN,mmol/L','Clinic DBP,mmHg'
             ,'nRAAS drugs intake_1', 'Hypertension_1','Age,y'] + [y_col]

data_tensor = torch.from_numpy(data[temp].values).float()

X = data_tensor[:,:-1]
y = data_tensor[:,-1].resize(3103,1)

kf = KFold(10,shuffle = True, random_state = R)

result = []
for train_idx, test_idx in kf.split(range(X.shape[0])):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    model = torch.nn.Sequential(torch.nn.Linear(7,15),
                            torch.nn.Sigmoid(), 
                            torch.nn.Linear(15,1),
                            torch.nn.Sigmoid())
    loss = torch.nn.MSELoss(reduce = False)
    optimizer=torch.optim.SGD(model.parameters(),lr=0.05,weight_decay=0.001)
    sw = y_train.clone()

    sw[sw == 1] = y_train.shape[0]/(2*y_train.float().sum())   
    
    sw[sw == 0] = y_train.shape[0]/(2*(y_train==0).float().sum())
    
    for epoch in range(10000):
        optimizer.zero_grad()  
        out = model(X_train)  
        loss1 = (sw * loss(out, y_train)).sum()/sw.sum()  
        loss1.backward()    
        optimizer.step()    
    out_train = model(X_train) 
    out_test = model(X_test)

    acc_train = ((out_train>0.5).float()==y_train).float().mean()
    acc_test = ((out_test>0.5).float()==y_test).float().mean()
    sen_train =  ((out_train >0.5).float() == y_train)[y_train == 1].float().mean()
    sen_test =  ((out_test >0.5).float() == y_test)[y_test == 1].float().mean()
    spe_train =  ((out_train >0.5).float() == y_train)[y_train == 0].float().mean()
    spe_test =  ((out_test >0.5).float() == y_test)[y_test == 0].float().mean()

    train_fpr, train_tpr, train_thresholds = roc_curve(y_train.detach().numpy(), out_train.detach().numpy())
    test_fpr, test_tpr, test_thresholds = roc_curve(y_test.detach().numpy(), out_test.detach().numpy())
    train_auc = auc(train_fpr, train_tpr)
    test_auc = auc(test_fpr, test_tpr)

    train_pre = ((out_train>0.5)&(y_train == 1)).sum().numpy()/(out_train >0.5).sum().numpy()
    test_pre = ((out_test>0.5)&(y_test == 1)).sum().numpy()/(out_test>0.5).sum().numpy()

    F1_train = (2*train_pre*sen_train)/(train_pre + sen_train)
    F1_test = (2*test_pre*sen_test)/(test_pre + sen_test)

    result.append({'train':{'acc':acc_train, 'sensitivity':sen_train, 'specivity':spe_train,'auc':train_auc, 'F1':F1_train},
                  'test':{'acc':acc_test, 'sensitivity':sen_test, 'specivity':spe_test,'auc':test_auc, 'F1':F1_test}})
acc_train, acc_test, sen_train, sen_test, spe_train, spe_test, auc_train, auc_test,F1_train, F1_test = 0,0,0,0,0,0,0,0,0,0
for temp in result:
    a =  temp['train']
    b = temp ['test']
    acc_train += 0.1*a['acc']
    acc_test += 0.1*b['acc']
    sen_train += 0.1*a['sensitivity']
    sen_test += 0.1*b['sensitivity']
    spe_train += 0.1*a['specivity']
    spe_test += 0.1*b['specivity']
    auc_train += 0.1*a['auc']
    auc_test += 0.1*b['auc']
    F1_train += 0.1*a['F1']
    F1_test += 0.1 * b['F1']




### ANN with 1h/h average SBP & DBP 

In [4]:
data_range_pro = pd.read_csv('/content/drive/My Drive/hypertension test/Nocturnal_hypertension by C/data_10factors_gbk_dayBP_201222.csv', encoding = 'gbk', index_col = 0)
X_daytime = ['8-9SBP',
 '8-9DBP',
 '9-10SBP',
 '9-10DBP',
 '10-11SBP',
 '10-11DBP',
 '11-12SBP',
 '11-12DBP',
 '3-4SBP',
 '3-4DBP',
 '4-5SBP',
 '4-5DBP',
 '5-6SBP',
 '5-6DBP',
         
  '8-10SBP',
  '8-10DBP',
  '9-11SBP',
  '9-11DBP',
  '10-12SBP',
  '10-12DBP',
  '3-5SBP',
  '3-5DBP',
  '4-6SBP',
  '4-6DBP',
]            

In [5]:
df = pd.DataFrame( columns=('8-9SBP',
#  '8-9DBP',
 '9-10SBP',
#  '9-10DBP',
 '10-11SBP',
#  '10-11DBP',
 '11-12SBP',
#  '11-12DBP',
 '3-4SBP',
#  '3-4DBP',
 '4-5SBP',
#  '4-5DBP',
 '5-6SBP',
#  '5-6DBP',
         
  '8-10SBP',
#  '8-10DBP',
  '9-11SBP',
#  '9-11DBP',
  '10-12SBP',
#  '10-12DBP',
  '3-5SBP',
#  '3-5DBP',
  '4-6SBP',
#  '4-6DBP',
))

In [6]:
import torch
data_range_pro = pd.read_csv('/content/drive/My Drive/hypertension test/Nocturnal_hypertension by C/data_10factors_gbk_dayBP_201222.csv', encoding = 'gbk', index_col = 0)
def preprocess(data0):
    data = data0.copy()
    X_col = [col for col in data.columns if col != 'nocturnal_hypertension']
    dummy_col = [col for col in X_col if len(data[col].unique()) < 5]
    data[dummy_col] = data[dummy_col].astype('int64')
    float_col = [col for col in X_col if col not in dummy_col]
    # standard
    m,s = data[float_col].mean(), data[float_col].std()
    for col in float_col:
        data[col] = (data[col] - data[col].mean()) / (data[col].std())
    temp = pd.DataFrame()
    for col in dummy_col:
        temp = pd.concat([temp, pd.get_dummies(data[col], prefix = col, prefix_sep = '_', drop_first = True)], axis = 1)

    temp = pd.concat([data[float_col], temp, data['nocturnal_hypertension']], axis = 1)
    temp.index = pd.Index(range(len(temp)))
    return temp, m, s, float_col, dummy_col, X_col, 'nocturnal_hypertension'

data, m, s, float_col, dummy_col, X_col, y_col = preprocess(data_range_pro)
data['nocturnal_hypertension'] = data['nocturnal_hypertension'].astype('int')#bool转换成int

R = 115306

temp = ['Clinic SBP,mmHg','eGFR,mL/ (min·1.73 m2)','BUN,mmol/L','Clinic DBP,mmHg'
             ,'nRAAS drugs intake_1', 'Hypertension_1','Age,y'] + [y_col]

for j in range(int(len(X_daytime)/2)):
    i = j*2
    temp = ['Clinic SBP,mmHg','eGFR,mL/ (min·1.73 m2)','BUN,mmol/L','Clinic DBP,mmHg'
             ,'nRAAS drugs intake_1', 'Hypertension_1','Age,y',
      X_daytime[i],X_daytime[i+1]
    ] + [y_col]
    data_tensor = torch.from_numpy(data[temp].values).float()

    X = data_tensor[:,:-1]
    y = data_tensor[:,-1].resize(3103,1)

    kf = KFold(10,shuffle = True, random_state = R)

    result = []
    for train_idx, test_idx in kf.split(range(X.shape[0])):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        model = torch.nn.Sequential(torch.nn.Linear(9,15),
                                torch.nn.Sigmoid(), 
                                torch.nn.Linear(15,1),
                                torch.nn.Sigmoid())
        loss = torch.nn.MSELoss(reduce = False)
        optimizer=torch.optim.SGD(model.parameters(),lr=0.05,weight_decay=0.001)
        sw = y_train.clone()

        sw[sw == 1] = y_train.shape[0]/(2*y_train.float().sum())   
        
        sw[sw == 0] = y_train.shape[0]/(2*(y_train==0).float().sum())
        
        for epoch in range(10000):
            optimizer.zero_grad()  
            out = model(X_train)  
            loss1 = (sw * loss(out, y_train)).sum()/sw.sum()  
            loss1.backward()    
            optimizer.step()    
        out_train = model(X_train) 
        out_test = model(X_test)

        acc_train = ((out_train>0.5).float()==y_train).float().mean()
        acc_test = ((out_test>0.5).float()==y_test).float().mean()
        sen_train =  ((out_train >0.5).float() == y_train)[y_train == 1].float().mean()
        sen_test =  ((out_test >0.5).float() == y_test)[y_test == 1].float().mean()
        spe_train =  ((out_train >0.5).float() == y_train)[y_train == 0].float().mean()
        spe_test =  ((out_test >0.5).float() == y_test)[y_test == 0].float().mean()

        train_fpr, train_tpr, train_thresholds = roc_curve(y_train.detach().numpy(), out_train.detach().numpy())
        test_fpr, test_tpr, test_thresholds = roc_curve(y_test.detach().numpy(), out_test.detach().numpy())
        train_auc = auc(train_fpr, train_tpr)
        test_auc = auc(test_fpr, test_tpr)

        train_pre = ((out_train>0.5)&(y_train == 1)).sum().numpy()/(out_train >0.5).sum().numpy()
        test_pre = ((out_test>0.5)&(y_test == 1)).sum().numpy()/(out_test>0.5).sum().numpy()

        F1_train = (2*train_pre*sen_train)/(train_pre + sen_train)
        F1_test = (2*test_pre*sen_test)/(test_pre + sen_test)

        result.append({'train':{'acc':acc_train, 'sensitivity':sen_train, 'specivity':spe_train,'auc':train_auc, 'F1':F1_train},
                      'test':{'acc':acc_test, 'sensitivity':sen_test, 'specivity':spe_test,'auc':test_auc, 'F1':F1_test}})
    acc_train, acc_test, sen_train, sen_test, spe_train, spe_test, auc_train, auc_test,F1_train, F1_test = 0,0,0,0,0,0,0,0,0,0
    for temp in result:
        a =  temp['train']
        b = temp ['test']
        acc_train += 0.1*a['acc']
        acc_test += 0.1*b['acc']
        sen_train += 0.1*a['sensitivity']
        sen_test += 0.1*b['sensitivity']
        spe_train += 0.1*a['specivity']
        spe_test += 0.1*b['specivity']
        auc_train += 0.1*a['auc']
        auc_test += 0.1*b['auc']
        F1_train += 0.1*a['F1']
        F1_test += 0.1 * b['F1']



### **Test with extra data**

In [10]:
data_range_pro = pd.read_csv('/content/drive/My Drive/hypertension test/Nocturnal_hypertension by C/data_10factors_gbk_dayBP_201222.csv', encoding = 'gbk', index_col = 0)
data_ex_valid = pd.read_csv('/content/drive/My Drive/hypertension test/Nocturnal_hypertension by C/data_ex_valid_new.csv', encoding= 'gbk', index_col =0)

In [11]:
X_daytime = ['8-9SBP',
 '8-9DBP',
 '9-10SBP',
 '9-10DBP',
 '10-11SBP',
 '10-11DBP',
 '11-12SBP',
 '11-12DBP',
 '3-4SBP',
 '3-4DBP',
 '4-5SBP',
 '4-5DBP',
 '5-6SBP',
 '5-6DBP',
         
  '8-10SBP',
  '8-10DBP',
  '9-11SBP',
  '9-11DBP',
  '10-12SBP',
  '10-12DBP',
  '3-5SBP',
  '3-5DBP',
  '4-6SBP',
  '4-6DBP',
]

In [13]:
data, m, s, float_col, dummy_col, X_col, y_col = preprocess(data_range_pro)
data['nocturnal_hypertension'] = data['nocturnal_hypertension'].astype('int')
data_ex, m_ex, s_ex, float_col_ex, dummy_col_ex, X_col_ex, y_col_ex = preprocess(data_ex_valid)
data_ex['nocturnal_hypertension'] = data_ex['nocturnal_hypertension'].astype('int')                                                                                

for j in range(int(len(X_daytime)/2)):
    i = j*2
    temp = ['Clinic SBP,mmHg','eGFR,mL/ (min·1.73 m2)','BUN,mmol/L','Clinic DBP,mmHg'
             ,'nRAAS drugs intake_1', 'Hypertension_1','Age,y',
      X_daytime[i],X_daytime[i+1]
    ] + [y_col]

    data_tensor = torch.from_numpy(data[temp].values).float()
    data_tensor_ex = torch.from_numpy(data_ex[temp].values).float()

    X = data_tensor[:,:-1]
    y = data_tensor[:,-1].resize(3103,1)

    X_ex = data_tensor_ex[:,:-1]
    y_ex = data_tensor_ex[:,-1].resize(229,1)

    kf = KFold(10,shuffle = True, random_state = 115306)

    result = []
    for train_idx, test_idx in kf.split(range(X.shape[0])):

        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        model = torch.nn.Sequential(torch.nn.Linear(9,15),
                                 torch.nn.Sigmoid(), 

                                 torch.nn.Linear(15,1),
                                 torch.nn.Sigmoid())

        loss = torch.nn.MSELoss(reduce = False)

        optimizer=torch.optim.SGD(model.parameters(),lr=0.05,weight_decay=0.001)

        sw = y_train.clone()

        sw[sw == 1] = y_train.shape[0]/(2*y_train.float().sum())   

        sw[sw == 0] = y_train.shape[0]/(2*(y_train==0).float().sum())

        for epoch in range(10000):
            optimizer.zero_grad()  
            out = model(X_train)   
            loss1 = (sw * loss(out, y_train)).sum()/sw.sum()   

            loss1.backward()     
            optimizer.step()     

        out_train = model(X_train) 
        out_test = model(X_test)

        acc_train = ((out_train>0.5).float()==y_train).float().mean()
        acc_test = ((out_test>0.5).float()==y_test).float().mean()

        sen_train =  ((out_train >0.5).float() == y_train)[y_train == 1].float().mean()
        sen_test =  ((out_test >0.5).float() == y_test)[y_test == 1].float().mean()

        spe_train =  ((out_train >0.5).float() == y_train)[y_train == 0].float().mean()
        spe_test =  ((out_test >0.5).float() == y_test)[y_test == 0].float().mean()

        train_fpr, train_tpr, train_thresholds = roc_curve(y_train.detach().numpy(), out_train.detach().numpy())
        test_fpr, test_tpr, test_thresholds = roc_curve(y_test.detach().numpy(), out_test.detach().numpy())
        train_auc = auc(train_fpr, train_tpr)
        test_auc = auc(test_fpr, test_tpr)

        train_pre = ((out_train>0.5)&(y_train == 1)).sum().numpy()/(out_train >0.5).sum().numpy()
        test_pre = ((out_test>0.5)&(y_test == 1)).sum().numpy()/(out_test>0.5).sum().numpy()

        F1_train = (2*train_pre*sen_train)/(train_pre + sen_train)
        F1_test = (2*test_pre*sen_test)/(test_pre + sen_test)


        result.append({'train':{'acc':acc_train, 'sensitivity':sen_train, 'specivity':spe_train,'auc':train_auc, 'F1':F1_train},
                       'test':{'acc':acc_test, 'sensitivity':sen_test, 'specivity':spe_test,'auc':test_auc, 'F1':F1_test}})

    acc_train, acc_test, sen_train, sen_test, spe_train, spe_test, auc_train, auc_test,F1_train, F1_test = 0,0,0,0,0,0,0,0,0,0
    for temp in result:
        a =  temp['train']
        b = temp ['test']

        acc_train += 0.1*a['acc']
        acc_test += 0.1*b['acc']
        sen_train += 0.1*a['sensitivity']
        sen_test += 0.1*b['sensitivity']
        spe_train += 0.1*a['specivity']
        spe_test += 0.1*b['specivity']
        auc_train += 0.1*a['auc']
        auc_test += 0.1*b['auc']
        F1_train += 0.1*a['F1']
        F1_test += 0.1 * b['F1']

    out_ex = model(X_ex)
    # print(len(out_ex))
    acc_ex = ((out_ex>0.5).float()==y_ex).float().mean()
    sen_ex =  ((out_ex >0.5).float() == y_ex)[y_ex == 1].float().mean()
    spe_ex =  ((out_ex >0.5).float() == y_ex)[y_ex == 0].float().mean()

    test_fpr, test_tpr, test_thresholds = roc_curve(y_ex.detach().numpy(), out_ex.detach().numpy())
    auc_ex = auc(test_fpr, test_tpr)
    test_pre = ((out_ex>0.5)&(y_ex == 1)).sum().numpy()/(out_ex>0.5).sum().numpy()

    F1_ex = (2*test_pre*sen_test)/(test_pre + sen_test)


