In [57]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torchmetrics.functional import auroc as AUROC

In [58]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader,SubsetRandomSampler
from torch.autograd import Variable
import random
import os
import gc
import copy
import numpy as np

In [59]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

## 超参数设定

In [60]:
NUM_FOLDS = 5
NUM_CLASS =  13

## 数据集读取与归一化处理

In [61]:
label_map = {"0": 0,
             "1": 1,
             "2": 2,
             "3": 3,
             "4": 4,
             "5": 5,
             "6": 6,
             "7": 7,
             "8": 8,
             "9": 9,
             "10": 10,
             "11": 11,
             "12": 9,
             "13": 10,
             "14": 11,
             "15":12
             }

In [62]:
import glob
import pickle as pkl
import numpy as np

In [63]:
def get_all_samples(files):
    '''
    输出[batch_size,seq_len.feature_dim]的数据
    其中seq_len为平均序列长度，过长的截断，
    过短的使用时间序列最后一个时刻数据重复
    
    '''
    original_sample_features = []
    original_sample_label = []
    seq_len = [] 
    for filename in files:
        with open(filename,"rb") as f:
            num_label=filename.split(".")[0]
            _,sample_num_str,label_str = num_label.split("_")
            original_lable =label_str
            label=label_map[original_lable]
            original_sample_label.append(label)
            feature = pkl.load(f)
            original_sample_features.append(feature)
            seq_len.append(len(feature)) 
    mean_seq_len = int(np.mean(seq_len))
    ##重新处理数据,长的截断，短的补0 
    sample_features = []
    for idx,item in enumerate(original_sample_features):
        if item.shape[0] > mean_seq_len:
            item = item[:mean_seq_len,:]
        else:
            last = item[-1,:].reshape(1,-1)
            padding = np.ones((mean_seq_len-item.shape[0],item.shape[1]))*last
            item = np.concatenate((item,padding))
        sample_features.append(item)
    sample_features = np.array(sample_features)
    original_sample_label=np.array(original_sample_label)
    return sample_features,original_sample_label,mean_seq_len

In [64]:
files=glob.glob("data/*.pkl")
sample_features,sample_label,mean_seq_len=get_all_samples(files)

In [65]:
len(sample_features),len(sample_label)

(160, 160)

In [66]:
##直接对所有数据集归一化
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
w=sample_features.reshape((-1,sample_features.shape[2]))
batch_size,seq_len,feature_dim = sample_features.shape
new_feature=scaler.fit_transform(w)
normaled_feature = new_feature.reshape((batch_size,seq_len,feature_dim))

## 切分数据集

 取出30% 数据作为最终的test 集,确保测试集和训练集一样各类别的样本个数相同

In [67]:
from sklearn.model_selection import StratifiedShuffleSplit

In [68]:
sp = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=0)

In [69]:
X,y=normaled_feature ,sample_label
for train_index, test_index in sp.split(X,y):
    X_train_vaild, X_test = X[train_index], X[test_index]
    y_train_vaild, y_test = y[train_index], y[test_index]

使用StratifiedKFold划分数据集

In [70]:
from sklearn.model_selection import StratifiedKFold

In [71]:
cv = StratifiedKFold(n_splits=NUM_FOLDS,random_state=42,shuffle=True)

In [72]:
kf= cv.split(X_train_vaild,y_train_vaild)

In [73]:
train_index_set = []
val_index_set = []
for fold, (train_index, val_index) in enumerate(kf):
    train_index_set.append(train_index)
    val_index_set.append(val_index)

# 转化为数据集

In [74]:
class  MyDataset(Dataset):
    def __init__(self,feature_data,target_data=None,train_lag=True):
        super(MyDataset,self).__init__()
        self.feature_data = feature_data
        self.train_lag = train_lag 
        self.target_data = target_data 
    def __len__(self):
        return len(self.feature_data)
    def __getitem__(self,idx):
        x = self.feature_data[idx]
        if self.train_lag:
            y = self.target_data[idx]
            return {"feature":torch.Tensor(x),
                    "label":torch.LongTensor([y])}
        else:
            return {"feature":torch.Tensor(x)}

In [75]:
class MyDataModule(pl.LightningDataModule):
    def __init__(self,train_index_set,val_index_set,fold_idx,batch_size,scaler=None):
        super().__init__()
        self.train_index_set = train_index_set
        self.val_index_set = val_index_set 
        self.fold_idx = fold_idx
        self.batch_size = batch_size
        self.scaler = scaler 
    def prepare(self,feature_data,target_data):
        self.total_dataset= self.get_input(feature_data,target_data,train_lag=True)
        train_indices = self.train_index_set[self.fold_idx]
        val_indices = self.val_index_set[self.fold_idx]
        #self.train_ds = torch.utils.data.Subset(self.total_dataset,train_indices)
        #self.val_ds = torch.utils.data.Subset(self.total_dataset,val_indices)
        self.train_sampler = SubsetRandomSampler(train_indices)
        self.valid_sampler = SubsetRandomSampler(val_indices)
    
    def train_dataloader(self):
        return DataLoader(self.total_dataset, self.batch_size,sampler=self.train_sampler,shuffle=False)     
    def val_dataloader(self):
        return DataLoader(self.total_dataset, self.batch_size,sampler =self.valid_sampler,shuffle=False)  
    @staticmethod
    def get_input(feature_data,target_data,train_lag=True,test_lag = False):
        ##额外的测试数据需要采用和训练/Valid一样的scaler归一化
        if test_lag:
            batch_size,seq_len,feature_dim = feature_data.shape
            w=feature_data.reshape((-1,feature_dim))
            new_feature=self.scaler.fit_transform(w)
            feature_data = new_feature.reshape((batch_size,seq_len,feature_dim))
        if train_lag:
            pass
        else:
            target_data = None 
        my_dataset = MyDataset(feature_data,target_data,train_lag)
        return my_dataset

'''测试 获取数据集'''

In [76]:
dm=MyDataModule(train_index_set,val_index_set,1,10)

In [77]:
dm=MyDataModule(train_index_set,val_index_set,1,10)
dm.prepare(X_train_vaild,y_train_vaild)

In [78]:
dataloader =dm.val_dataloader()

## 定义模型

In [79]:
class FocalLoss(nn.Module):

    def __init__(self, weight=None, reduction='mean', gamma=0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(weight=weight, reduction=reduction)

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [80]:
class CausalConv1d(nn.Module):
    """
    Input and output sizes will be the same.
    #[in_size+2*pad-dilation*(kernel_size-1)-1]/stride+1  
    if stride ==1:
    then 
        out_size = in_size+2*pad-dilation*(kernel_size-1)
    finially:
        out_size = in_size 
    recommand:
        stride = 1
        kernel_size % 2 =1
    """
    def __init__(self, in_size, out_size, kernel_size, dilation=1,stride=1):
        super(CausalConv1d, self).__init__()
        self.pad = (kernel_size-1) // 2 * dilation
        self.conv1 = nn.Conv1d(in_size, out_size, kernel_size, padding=self.pad, stride=stride,dilation=dilation)

    def forward(self, x):
        x = self.conv1(x)
        return x

In [81]:
class ResidualLayer(nn.Module):    
    def __init__(self, residual_size, skip_size, dilation):
        super(ResidualLayer, self).__init__()
        self.conv_filter = CausalConv1d(residual_size, residual_size,
                                         kernel_size=3, dilation=dilation)
        self.conv_gate = CausalConv1d(residual_size, residual_size,
                                         kernel_size=3, dilation=dilation)   
        self.resconv1_1 = nn.Conv1d(residual_size, residual_size, kernel_size=1)
        self.skipconv1_1 = nn.Conv1d(residual_size, skip_size, kernel_size=1)
        
   
    def forward(self, x):
        conv_filter = self.conv_filter(x)
        conv_gate = self.conv_gate(x)  
        fx = torch.tanh(conv_filter) * torch.sigmoid(conv_gate)
        fx = self.resconv1_1(fx) 
        skip = self.skipconv1_1(fx) 
        residual = fx + x  
        #residual=[batch,residual_size,seq_len]  skip=[batch,skip_size,seq_len]
        return skip, residual

In [82]:
class DilatedStack(nn.Module):
    def __init__(self, residual_size, skip_size, dilation_depth):
        super(DilatedStack, self).__init__()
        residual_stack = [ResidualLayer(residual_size, skip_size, 2**layer)
                         for layer in range(dilation_depth)]
        self.residual_stack = nn.ModuleList(residual_stack)
        
    def forward(self, x):
        skips = []
        for layer in self.residual_stack:
            skip, x = layer(x)
            skips.append(skip.unsqueeze(0))
            #skip =[1,batch,skip_size,seq_len]
        return torch.cat(skips, dim=0), x  # [layers,batch,skip_size,seq_len]

In [83]:
class WaveNet(pl.LightningModule):

    def __init__(self,args):
        super().__init__()
        self.args=args
        self.save_hyperparameters()
        self.input_size = args.input_size 
        #self.out_size = args.out_size
        self.num_class = args.num_class 
        self.residual_size =args.residual_size 
        self.skip_size =args.skip_size 
        self.dilation_cycles = args.dilation_cycles 
        self.dilation_depth = args.dilation_depth 
        
        self.input_conv = CausalConv1d(self.input_size,self.residual_size, kernel_size=3)        

        self.dilated_stacks = nn.ModuleList(

            [DilatedStack(self.residual_size, self.skip_size, self.dilation_depth)

             for cycle in range(self.dilation_cycles)]

        )

        self.convout_1 = nn.Conv1d(self.skip_size, self.skip_size, kernel_size=1)
        self.finial=nn.Linear(self.skip_size,self.num_class)
        
        #crition
        self.criterion = FocalLoss(gamma=1)

    def forward(self, x):

        x = x.permute(0,2,1)# [batch,input_feature_dim, seq_len] =[N,C_in,Lin]
     
        x = self.input_conv(x) # [batch,residual_size, seq_len]             

        skip_connections = []

        for cycle in self.dilated_stacks:

            skips, x = cycle(x)             
            skip_connections.append(skips)

        ## skip_connection=[total_layers,batch,skip_size,seq_len]
        skip_connections = torch.cat(skip_connections, dim=0)        

        # gather all output skip connections to generate output, discard last residual output

        out = skip_connections.sum(dim=0) # [batch,skip_size,seq_len]

        out = F.relu(out)
        out = self.convout_1(out) # [batch,out_size,seq_len]
        
        out=torch.mean(out,dim=2)#=[batch,out_size]

        out=self.finial(out) #[batch,out_size]=>[batch,num_class]
        return out
    def training_step(self, batch, batch_idx):
        X,y=batch["feature"],batch["label"]
        predict=self(X)
        target = torch.squeeze(y,1)
        loss=self.criterion(predict,target)
        self.log("train_loss",loss,on_step=False,on_epoch=True)
        return loss
    def validation_step(self,batch,batch_idx):
        X,y=batch["feature"],batch["label"]
        predict=self(X)
        target = torch.squeeze(y,1)
        loss=self.criterion(predict,target)
        self.log("val_loss",loss,on_step=False,on_epoch=True)
        return loss
    def predict_step(self, batch, batch_idx, dataloader_idx):
        X=batch["feature"]
        predict=self(X)
        return predict 
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(),lr=1e-3,weight_decay=1e-5)
        return optimizer

## 训练模型

## 测试模型

In [84]:
class Args:
    
    num_epochs=10
    batch_size=10

    input_size = feature_dim
    num_class = NUM_CLASS
    ##通道数，相当于特征数
    residual_size = 32
    skip_size = 256
    dilation_cycles = 3 
    dilation_depth =   8  #2**8 = 256 
                          #3*256 = 768 


In [85]:
##original paper seuqnece_length = 160000  dilation =[1,2,4,...,512]

In [86]:
args=Args()

In [89]:
def train_fold(fold):
    from pytorch_lightning.loggers import TensorBoardLogger 
    logger=TensorBoardLogger("tensorboard_logs",name=f"fold_{fold}")
    checkpoint_callback = ModelCheckpoint(
        filename=f"fold_{fold}_best_loss",
        monitor="train_loss",
        save_top_k=1,
        mode="min",
        save_last=False,
    )
    earystopping = EarlyStopping(monitor="val_loss")
    trainer = pl.Trainer(  
    gpus=-1 if torch.cuda.is_available() else None, 
    #precision=16,
    max_epochs=args.num_epochs,
    callbacks = [checkpoint_callback,earystopping],
    logger=logger
    )
    
    dm=MyDataModule(train_index_set,val_index_set,fold,args.batch_size)
    dm.prepare(X_train_vaild,y_train_vaild)
    model = WaveNet(args)
    
    trainer.fit(model,dm)

In [90]:
train_fold(0)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name           | Type         | Params
------------------------------------------------
0 | input_conv     | CausalConv1d | 416   
1 | dilated_stacks | ModuleList   | 377 K 
2 | convout_1      | Conv1d       | 65.8 K
3 | finial         | Linear       | 3.3 K 
4 | criterion      | FocalLoss    | 0     
------------------------------------------------
446 K     Trainable params
0         Non-trainable params
446 K     Total params
1.787     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

## 加载训练好的模型

In [34]:
def load_model(folds=5,path="tensorboard_logs/"):
    import os 
    models = []
    for i in range(folds):
        filename = os.path.join(path,f"fold_{i}/version_0/checkpoints/fold_{i}_best_loss.ckpt")
        model = WaveNet.load_from_checkpoint(filename)
        models.append(model)
    return models

In [35]:
models = load_model(1)

## 模型效果分析

In [70]:
from sklearn.metrics import f1_score,precision_score,recall_score 
def get_score(y_true,y_pred):
    y_true = np.array(y_true).reshape(-1,1)
    y_pred = np.array(y_pred).reshape(-1,1)
    f1 =f1_score(y_true,y_pred,average="macro")*100
    p =precision_score(y_true,y_pred,average="macro")*100
    r = recall_score(y_true,y_pred,average="macro")*100
    return str((reformat(p,2),reformat(r,2),reformat(f1,2))),reformat(f1,2)
def reformat(num,n):
    return float(format(num,"0."+str(n)+"f"))

把5折交叉验证中最好的模型作平均预测,针对X_test和y_test数据

In [71]:
##带有标签，因为所有数据一起做了归一化，无需再归一化
test_dataset = MyDataModule.get_input( 
    X_test,
    y_test,
    train_lag=True,
    test_lag=False)

In [72]:
test_dataloader = DataLoader(test_dataset,batch_size= 10,shuffle = True)

In [73]:
#features = torch.Size([batch_size,seq_len, feature_dim])
#label = torch.Size([batch_size,1])
#ouput = torch.Size([batch_size,num_class])

In [109]:
total_pro = []
total_true_labels = [] 
device = 'cuda'
for model in models:
    model.eval()
    pro_list= []
    for item in test_dataloader:
        features = item["feature"]
        label = item["label"]
        logist = model(features)
        pro = torch.softmax(logist,dim=1).detach().cpu().numpy()
        pro_list.extend(pro)
        
    total_pro.append(pro_list)
        #output=torch.softmax(logit,dim=1)
        #pred_label = torch.argmax(output,dim=1).cpu().numpy().tolist()
        #true_label = label.cpu().numpy().reshape(-1,)
        #results.extend(pred_label)
        #true_labels.extend(true_label)
    #total_results.append(results)
    #total_true_labels.append(true_labels)

In [110]:
total_pro = np.array(total_pro)
avg_pro = np.mean(total_pro,axis=0)
pred_labels = np.argmax(avg_pro,axis=1)

In [111]:
get_score(y_test,pred_labels)

  _warn_prf(average, modifier, msg_start, len(result))


('(2.34, 9.5, 3.29)', 3.29)

## classification report

In [112]:
from sklearn.metrics import classification_report

In [113]:
target_names = [str(item) for item in range(NUM_CLASS)]

In [118]:
if len(set(y_test))==NUM_CLASS:
    print(classification_report(y_test,pred_labels,target_names=target_names))

## ConfusionMatrix

In [11]:
from sklearn.metrics import confusion_matrix

In [10]:
unique_labels = np.unique(sample_label)

In [13]:
import itertools 
import matplotlib.pyplot as plt 
def plot_confusion_matrix(cm,classes,normalize = False,title="Confusion Matrix",
                          cmap = plt.cm.Blues):
    if normalize:
        cm = cm.astype("float")/cm.sum(axis = 1)[:np.newaxis]
        print("normalize confusion matrix")
    else:
        print("confusion matrix without normalize")
        print(cm)
    plt.imshow(cm,interpolation="nearest",cmap = cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation = 45)
    plt.yticks(tick_marks,classes)
        
    fmt = '.3f' if normalize else 'd'
    thresh =cm.max()/2.0 
    for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
        plt.text(j,i,format(cm[i,j],fmt),
                horizontalalignment = 'center',
                color = 'white' if cm[i,j] > thresh else 'black')
    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")