In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import numpy as np
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
# 导入实现K折交叉检验的模块
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings("ignore")

from sklearn import manifold, datasets


In [None]:

seed_value = 2021   # 设定随机数种子

np.random.seed(seed_value)
random.seed(seed_value)
os.environ['PYTHONHASHSEED'] = str(seed_value)  # 为了禁止hash随机化，使得实验可复现。

torch.manual_seed(seed_value)     # 为CPU设置随机种子
torch.cuda.manual_seed(seed_value)      # 为当前GPU设置随机种子（只用一块GPU）
torch.cuda.manual_seed_all(seed_value)   # 为所有GPU设置随机种子（多块GPU）

torch.backends.cudnn.deterministic = True


In [None]:
#选择填充方式pad_0或者interp
pad_pattern='pad_0'
dataset=pd.read_csv('./data/'+pad_pattern+'/classify.csv')
data_diffusion_withoutgan_norm=pd.read_csv('./data/'+pad_pattern+'/diffusion_norm.csv')
data_diffusion_withoutgan=pd.read_csv('./data/'+pad_pattern+'/diffusion.csv')


# data_diffusion_withoutgan_norm=pd.read_csv('../脉搏/data/hypertensionData/wgan_generated_norm.csv')
# #在data_diffusion_withoutgan_norm后面拼上0
# data_diffusion_withoutgan_norm['56']=0
# data_diffusion_withoutgan=pd.read_csv('../脉搏/data/hypertensionData/wgan_generated.csv')
# data_diffusion_withoutgan['56']=1



In [None]:
data=np.array(dataset)
for i in data:
    if i[-1]==1:
        plt.plot(i)
        break

In [None]:
data[0]

In [None]:
label=dataset.iloc[:,-1]
#取前面的列
data=dataset.iloc[:,:-1]
#取出所有标签为1的数据
data_0=data[label==1]
gen_data=data_diffusion_withoutgan.iloc[:,:-1]
#把data_0和gen_data[:500]拼接起来
data_0=pd.concat([data_0,gen_data[:500]],axis=0)
data_0.shape

In [None]:
data_1=np.array(data_0)
plt.plot(data_1[503])

In [None]:
from scipy.stats import ttest_ind
v1=data_1[0:500]
v2=data_1[500:]
p=0
f=0
for i in range(500):
    res=ttest_ind(v1[i],v2[i])
    p+=res[1]
    f+=res[0]

f/500

In [None]:
p/500

In [None]:

tsne = manifold.TSNE(n_components=2)
X_tsne = tsne.fit_transform(data_0)

In [None]:
x_min, x_max = X_tsne.min(0), X_tsne.max(0)
X_norm = (X_tsne - x_min) / (x_max - x_min)  # 归一化

#隐藏坐标轴
plt.xticks([])
plt.yticks([])

n=0
m=0
for i in range(X_norm.shape[0]):
    #label为0和1的各画500个
    if i<500:
        n+=1
        plt.scatter(X_norm[i, 0], X_norm[i, 1],color='red',alpha=0.5)
    else:
        m+=1
        plt.scatter(X_norm[i, 0], X_norm[i, 1],color='blue',alpha=0.5)

In [None]:
#选择训练方式
train=['real','generate','real_gneerate']
train_way=train[2]
train_way

In [None]:
def get_dataset(train_way,dataset,data_diffusion_withoutgan_norm,data_diffusion_withoutgan):
    #取最后一列
    label=dataset.iloc[:,-1]
    #取前面的列
    data=dataset.iloc[:,:-1]
    data=np.array(data)
    label=np.array(label)
    #划分测试机和训练集
    train_x,test_x,train_y,test_y=train_test_split(data,label,test_size=0.2,random_state=0)
    
    if train_way=='real':
        pass
    elif train_way=='generate':
        #开始拼接
        label_diffusion=data_diffusion_withoutgan.iloc[:,-1]
        data_diffusion=data_diffusion_withoutgan.iloc[:,:-1]
        label_diffusion=np.array(label_diffusion)
        data_diffusion=np.array(data_diffusion)

        #开始拼接
        label_diffusion_norm=data_diffusion_withoutgan_norm.iloc[:,-1]
        data_diffusion_norm=data_diffusion_withoutgan_norm.iloc[:,:-1]
        label_diffusion_norm=np.array(label_diffusion_norm)
        data_diffusion_norm=np.array(data_diffusion_norm)

        #把train和diffusion拼接
        train_x=np.concatenate((data_diffusion_norm[:10000],data_diffusion[:10000]),axis=0)
        train_y=np.concatenate((label_diffusion_norm[:10000],label_diffusion[:10000]),axis=0)
    elif train_way=='real_gneerate':
        #开始拼接
        label_diffusion=data_diffusion_withoutgan.iloc[:,-1]
        data_diffusion=data_diffusion_withoutgan.iloc[:,:-1]
        label_diffusion=np.array(label_diffusion)
        data_diffusion=np.array(data_diffusion)

        #开始拼接
        label_diffusion_norm=data_diffusion_withoutgan_norm.iloc[:,-1]
        data_diffusion_norm=data_diffusion_withoutgan_norm.iloc[:,:-1]
        label_diffusion_norm=np.array(label_diffusion_norm)
        data_diffusion_norm=np.array(data_diffusion_norm)

        #把train和diffusion拼接
        train_x=np.concatenate((train_x,data_diffusion[:5000]),axis=0)
        train_y=np.concatenate((train_y,label_diffusion_norm[:5000]),axis=0)

        train_x=np.concatenate((train_x,data_diffusion_norm[:5000]),axis=0)
        train_y=np.concatenate((train_y,label_diffusion_norm[:5000]),axis=0)
    else:
        print('error')

    print(len(train_x))

    #归一化
    scaler = StandardScaler()
    scaler.fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)
    
    #转换成tensor
    train_x=torch.tensor(train_x).float()
    train_y=torch.tensor(train_y).float()
    test_x=torch.tensor(test_x).float()
    test_y=torch.tensor(test_y).float()


    #构建数据集
    batch_size = 256
    train_dataset=torch.utils.data.TensorDataset(train_x,train_y)
    test_dataset=torch.utils.data.TensorDataset(test_x,test_y)
    #构建数据迭代器
    train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,drop_last=True)
    test_loader=torch.utils.data.DataLoader(test_dataset,batch_size=batch_size,shuffle=True,drop_last=True)
    return train_loader,test_loader,train_x,train_y,test_x,test_y

In [None]:
train_loader,test_loader,train_x,train_y,test_x,test_y=get_dataset(train_way,dataset,data_diffusion_withoutgan_norm,data_diffusion_withoutgan)

kf = KFold(n_splits = 5)

In [None]:
#LSTM


#输入batch,channel,length
#输出batch,1
class classifier(nn.Module):
    def __init__(self):
        super(classifier, self).__init__()

        #lstm
        self.lstm=nn.LSTM(input_size=1,hidden_size=32,num_layers=2,batch_first=True)
        #全连接
        self.fc=nn.Sequential(  
            nn.Linear(32,1),
        )

    def forward(self, x):
        x=x.unsqueeze(2)

        #lstm
        x,_=self.lstm(x)
        #取最后一个时间步
        x=x[:,-1,:]
        
        #全连接
        x=self.fc(x)

        #sigmoid
        x=torch.sigmoid(x)
        return x.view(-1)

In [None]:
# #CNN

# #输入batch,length
# #输出batch,
# class classifier(nn.Module):
#     def __init__(self):
#         super(classifier, self).__init__()
#         self.sequential1=nn.Sequential(
#             nn.BatchNorm1d(1),
#             #两层卷积一层池化
#             nn.Conv1d(in_channels=1,out_channels=16,kernel_size=5,stride=1,padding=1),
#             nn.BatchNorm1d(16),
#             nn.ReLU(),
#             nn.Conv1d(in_channels=16,out_channels=32,kernel_size=5,stride=1,padding=1),
#             nn.BatchNorm1d(32),
#             nn.ReLU(),
#             nn.MaxPool1d(kernel_size=2,stride=2),
#         )
#         self.sequential2=nn.Sequential(
#             #两层卷积一层池化
#             nn.Conv1d(in_channels=32,out_channels=16,kernel_size=5,stride=1,padding=1),
#             nn.BatchNorm1d(16),
#             nn.ReLU(),
#             nn.Conv1d(in_channels=16,out_channels=1,kernel_size=5,stride=1,padding=1),
#             nn.BatchNorm1d(1),
#             nn.ReLU(),
#             nn.MaxPool1d(kernel_size=2,stride=2),
#         )

#         self.sequential3=nn.Sequential(
#             nn.Linear(11,1),
#             nn.Sigmoid()
#         )


#     def forward(self, x):
#         x1=x.view(x.size(0),1,-1)
#         x1=self.sequential1(x1)
#         x1=self.sequential2(x1)
#         x1 = x1.view(x1.size(0), -1)

#         x = self.sequential3(x1)
#         return x.view(-1)

In [None]:
model=classifier()
x=torch.randn(1,56)
x.shape
model(x)

In [None]:
#各种超参数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
epochs=200
loss_func=nn.BCELoss().to(device)
model=classifier().to(device)
optimizer=torch.optim.AdamW(model.parameters(),lr=0.01)

In [None]:
def train(epochs,train_loader,test_loader,model,loss_func,optimizer):
    train_loss = []
    train_accuracy = []
    test_loss = []
    test_accuracy = []

    test_precision=[]
    test_recall=[]
    test_f1=[]
    model.train()

    for epoch in range(epochs):
        epoch_train_loss = 0
        epoch_train_accuracy = 0
        
        for i, (x, y) in enumerate(train_loader):
            x=x.to(device)
            y=y.to(device)
            output = model(x)
            loss = loss_func(output, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            accuracy = (output > 0.5).float().eq(y).sum().item() / len(y)
            
            epoch_train_loss += loss.item()
            epoch_train_accuracy += accuracy
            
        epoch_train_loss /= len(train_loader)
        epoch_train_accuracy /= len(train_loader)
        train_loss.append(epoch_train_loss)
        train_accuracy.append(epoch_train_accuracy)
        
        print('epoch:{}, train_loss:{}, train_accuracy:{}'.format(epoch, epoch_train_loss, epoch_train_accuracy))
        
        with torch.no_grad():
            epoch_test_loss = 0
            epoch_test_accuracy = 0
            y_true=[]
            y_pred=[]

            for i, (x, y) in enumerate(test_loader):
                x=x.to(device)
                y=y.to(device)
                output = model(x)
                loss = loss_func(output, y)
                
                accuracy = (output > 0.5).float().eq(y).sum().item() / len(y)
                epoch_test_loss += loss.item()
                epoch_test_accuracy += accuracy
                y_true.extend(y.cpu().numpy())
                y_pred.extend(output.cpu().numpy()>0.5)

            precision, recall, f1 = precision_recall_fscore_support(y_true,y_pred,average='macro')[:-1]
            test_precision.append(precision)
            test_recall.append(recall)
            test_f1.append(f1)
            epoch_test_loss /= len(test_loader)
            epoch_test_accuracy /= len(test_loader)
            
            test_loss.append(epoch_test_loss)
            test_accuracy.append(epoch_test_accuracy)
            
            print('epoch:{}, test_loss:{}, test_accuracy:{}'.format(epoch, epoch_test_loss, epoch_test_accuracy))
    return train_loss,train_accuracy,test_loss,test_accuracy,test_precision,test_recall,test_f1


In [None]:
train_loss,train_accuracy,test_loss,test_accuracy,test_precision,test_recall,test_f1=train(epochs,train_loader,test_loader,model,loss_func,optimizer)

In [None]:
test_accuracy[-1]

In [None]:
test_precision[-1]

In [None]:
test_recall[-1]

In [None]:
test_f1[-1]