In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# 路径、超参数设置
nsr_path = '../Dataset/mit-bih-normal-sinus-rhythm-database-1.0.0/'
scd_path = '../Dataset/sudden-cardiac-death-holter-database-1.0.0/'
ecg_info_save_path = '../Dataset/ecg_info/'
hrv_info_save_path = '../Dataset/hrv_info/'
scd_fs = 250
nsr_fs = 128
# preprocess index define: delete 3 records since there SCA don't happen among them and 2 records because of their bad signal quality
scd_indexs = ['30','31','32','33','34','35','36','38','41','43','44','45','46','47','48','50','51','52']
nsr_indexs = ['16265','16272','16273','16420','16483','16539','16773','16786','16795','17052','17453','18177','18184','19088','19090','19093','19140','19830']

In [3]:
# 提取代表心拍
def beat_template_extraction(heart_beats,threshold=0.1):
    '''
    '''
    temp = np.array([heart_beats['0']])
    nums = np.array([1],dtype=np.int32)
    temp_map = {}
    temp_map['0'] = 0
    for i in range(1,heart_beats.shape[1]-1):
        beat = np.asarray(heart_beats[str(i)])
        dist = np.linalg.norm(temp-beat,axis=1)
        idx = np.argmin(dist)
        if dist[idx] < threshold:
            temp[idx] = (temp[idx]*nums[idx]+beat)/(nums[idx]+1)
            nums[idx] += 1
            temp_map[str(i)] = idx
        else:
            temp = np.append(temp,[beat],axis=0)
            temp_map[str(i)] = nums.shape[0]
            nums = np.append(nums,[1])
    return temp,nums,temp_map

In [4]:
# 构建同一长度的心拍数据集
# shapelets 数据集制作
# 独立心拍
dataset = []
labels = []
for index in scd_indexs:
    heart_beat = pd.read_csv(ecg_info_save_path+index+'_heartbeat_chan0.csv')
    temp,nums,temp_map = beat_template_extraction(heart_beat,threshold=0.5)
    for i in range(heart_beat.shape[1]-1):
        if nums[temp_map[str(i)]]>30:
            dataset.append(np.asarray(heart_beat[str(i)][:75],dtype=np.float32))
            labels.append(1)
for index in nsr_indexs:
    heart_beat = pd.read_csv(ecg_info_save_path+index+'_heartbeat_chan0.csv')
    temp,nums,temp_map = beat_template_extraction(heart_beat,threshold=0.5)
    for i in range(heart_beat.shape[1]-1):
        if nums[temp_map[str(i)]]>30:
            dataset.append(np.asarray(heart_beat[str(i)][:75],dtype=np.float32))
            labels.append(0)
dataset = np.asarray(dataset,dtype=np.float32)
labels = np.asarray(labels,dtype=np.int32)
print(dataset.shape)
print(labels.shape)

(24745, 75)
(24745,)


In [104]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(dataset,labels,random_state=666)
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test)
print(x_train.shape)
print(x_test.shape)

torch.Size([18558, 75])
torch.Size([6187, 75])


In [105]:
x_train = torch.tensor(x_train,dtype=torch.float32)
x_test = torch.tensor(x_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

x_train = x_train.reshape(x_train.shape[0], 1, 1, x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0], 1, 1, x_test.shape[1])
y_train = y_train.reshape(y_train.shape[0], 1, 1)
y_test = y_test.reshape(y_test.shape[0], 1, 1)
print(x_train.shape,x_test.shape)

torch.Size([18558, 1, 1, 75]) torch.Size([6187, 1, 1, 75])
  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [97]:
# 构建一维卷积神经网络模型
class cnn_model(nn.Module):
    def __init__(self,input_size):
        super(cnn_model,self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1,out_channels=10,kernel_size=3,stride=1)
        self.max_pool1 = nn.MaxPool1d(kernel_size=3,stride=2)
        self.conv2 = nn.Conv1d(10,20,3,1)
        self.max_pool2 = nn.MaxPool1d(3,2)
        self.conv3 = nn.Conv1d(20,40,3,1)

        self.linear1 = nn.Linear(560,70)
        self.linear2 = nn.Linear(70,10)
        self.linear3 = nn.Linear(10,1)
        self.input_size = input_size

    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.max_pool1(x)
        x = F.relu(self.conv2(x))
        x = self.max_pool2(x)
        x = F.relu(self.conv3(x))
        x = x.view(-1,560)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.sigmoid(self.linear3(x))
        return x
model = cnn_model(75)

In [98]:
x_train[0]

tensor([[[0.3741, 0.3820, 0.3789, 0.3776, 0.3904, 0.3962, 0.3840, 0.3746,
          0.3774, 0.3793, 0.3763, 0.3742, 0.3655, 0.3468, 0.3347, 0.3326,
          0.3189, 0.2958, 0.2892, 0.2466, 0.0913, 0.0000, 0.2468, 0.6745,
          0.9291, 1.0000, 0.9050, 0.5518, 0.2112, 0.1806, 0.2619, 0.2570,
          0.2536, 0.2707, 0.2798, 0.3075, 0.3246, 0.3346, 0.3654, 0.3670,
          0.3516, 0.3725, 0.3821, 0.3649, 0.3668, 0.3723, 0.3732, 0.3838,
          0.3698, 0.3427, 0.3577, 0.3846, 0.3750, 0.3612, 0.3769, 0.3979,
          0.3993, 0.3986, 0.4120, 0.4193, 0.4104, 0.4069, 0.4116, 0.4053,
          0.3837, 0.3621, 0.3557, 0.3563, 0.3449, 0.3340, 0.3439, 0.3569,
          0.3527, 0.3470, 0.3564]]])

In [99]:
model(x_train[0])

tensor([[0.4881]], grad_fn=<SigmoidBackward>)

In [100]:
y_train[0]

tensor([[0]])

In [101]:
# 优化器和损失函数
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [102]:
criterion(torch.Tensor([[0.7]]),torch.Tensor([[1]]))

tensor(0.3567)

In [79]:
y_train[0]

tensor([0])

In [107]:
# 训练过程
import time
from tqdm import tqdm
start = time.time()
for epoch in tqdm(range(10)):
    running_loss = 0
    for i, input_data in enumerate(x_train, 0):
        # print(input_data.shape)
        label = y_train[i]
        optimizer.zero_grad()
        
        outputs = model(input_data)
        #print(outputs)
        #print(label)
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %0.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('time = %2dm:%2ds' % ((time.time() - start)//60, (time.time()-start)%60))

  0%|          | 0/10 [00:00<?, ?it/s][1,  2000] loss: 0.153
[1,  4000] loss: 0.011
[1,  6000] loss: 0.016
[1,  8000] loss: 0.013
[1, 10000] loss: 0.000
[1, 12000] loss: 0.028
[1, 14000] loss: 0.009
[1, 16000] loss: 0.005
[1, 18000] loss: 0.009
 10%|█         | 1/10 [18:27<2:46:08, 1107.65s/it][2,  2000] loss: 0.017
[2,  4000] loss: 0.004
[2,  6000] loss: 0.009
[2,  8000] loss: 0.013
[2, 10000] loss: 0.001
[2, 12000] loss: 0.000
[2, 14000] loss: 0.005
[2, 16000] loss: 0.012
[2, 18000] loss: 0.007
 20%|██        | 2/10 [36:52<2:27:26, 1105.81s/it][3,  2000] loss: 0.001
[3,  4000] loss: 0.006
[3,  6000] loss: 0.000
[3,  8000] loss: 0.009
[3, 10000] loss: 0.000
[3, 12000] loss: 0.000
[3, 14000] loss: 0.011
[3, 16000] loss: 0.000
[3, 18000] loss: 0.000
 30%|███       | 3/10 [56:15<2:12:03, 1131.97s/it][4,  2000] loss: 0.013
[4,  4000] loss: 0.020
[4,  6000] loss: 0.000
[4,  8000] loss: 0.005
[4, 10000] loss: 0.000
[4, 12000] loss: 0.000
[4, 14000] loss: 0.015
[4, 16000] loss: 0.000
[4, 180

In [109]:
torch.save(model,'./cnn_model.pt')

In [None]:
# 验证测试结果

In [124]:
res = np.zeros(x_test.shape[0])
for idx,test in enumerate(x_test):
    res[idx] = float(model(test))



0.30000001192092896

In [125]:
a = np.asarray(y_test.flatten())

In [131]:
new_res = res
new_res[np.where(new_res>0.5)[0]] = 1
new_res[np.where(new_res<=0.5)[0]] = 0

In [132]:
np.where(new_res==a)[0].shape

(6187,)

In [127]:
a.shape

(6187,)