In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import numpy as np
traval_tra_features = np.load('traval_tra_features.npy').astype(np.float32)
traval_tra_targets = np.load('traval_tra_targets.npy').astype(np.int64)[:,0]
traval_val_features = np.load('traval_val_features.npy').astype(np.float32)
traval_val_targets = np.load('traval_val_targets.npy').astype(np.int64)[:,0]
traval_tra_features.shape, traval_val_features.shape

((687636, 2600), (76405, 2600))

In [2]:
import torch
from torch import optim
from torch import nn
import torch.nn.functional as F
from skorch import NeuralNetClassifier
import sklearn
from skorch.callbacks import EpochScoring, LRScheduler, Checkpoint
from torch.optim import Adam, SGD
import adamod



In [3]:
from skorch.dataset import Dataset
from skorch.helper import predefined_split
valid_ds = Dataset(traval_val_features, traval_val_targets)

In [4]:
class DownSample2x(nn.Sequential):
    def __init__(self, _in, _out):
        super().__init__(
            nn.Conv1d(_in, _out, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
        )

class SELayer(nn.Module):
    def __init__(self, _in, _hidden=64):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(_in, _hidden),
            nn.PReLU(),
            nn.Linear(_hidden, _in),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y
    
class ResConv1d(nn.Module):
    def __init__(self, _in, _out):
        super(ResConv1d, self).__init__()
        
        self.cal = nn.Sequential(
            nn.Conv1d(_in, _out, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm1d(_out),
            nn.ReLU(),
            nn.Conv1d(_out, _out, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm1d(_out),
        )
        self.se = SELayer(_out, _out)
        self.conv = nn.Conv1d(_in, _out, kernel_size=1, padding=0, stride=1)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(_out)
        
    def forward(self, x):
        res = self.cal(x)
        res = self.se(res)
        
        x = self.bn(self.conv(x))
        
        return self.relu(res + x)
        
class MyModule(nn.Module):
    def __init__(self, nonlin=F.relu):
        super(MyModule, self).__init__()
        
        self.d1 = DownSample2x(1, 64)
        self.c1 = ResConv1d(64, 64)
        
        self.d2 = DownSample2x(64, 64)
        self.c2 = ResConv1d(64, 64)
        
        self.d3 = DownSample2x(64, 64)
        self.c3 = ResConv1d(64, 64)
        
        self.d4 = DownSample2x(64, 64)
        self.c4 = ResConv1d(64, 64)
        
        self.d5 = DownSample2x(64, 64)
        self.c5 = ResConv1d(64, 64)
        
        self.d6 = DownSample2x(64, 64)
        self.c6 = ResConv1d(64, 64)
        
        self.dropout = nn.Dropout(0.5)
        self.cls = nn.Linear(64 * 40, 3)
        
    def preprocess(self, x, p=2, eps=1e-8):
        x = x / (x.norm(p=p, dim=1, keepdim=True)+eps)
        x = x.unsqueeze(1)
        return x
        
    def forward(self, x):
        bs = x.size(0)
        x = self.preprocess(x)
        
        x = self.d1(x)
        x = self.c1(x)
        
        x = self.d2(x)
        x = self.c2(x)
        
        x = self.d3(x)
        x = self.c3(x)

        x = self.d4(x)
        x = self.c4(x)
        
        x = self.d5(x)
        x = self.c5(x)
        
        x = self.d6(x)
        x = self.c6(x)
        
        x = x.reshape(bs, -1)
        x = self.dropout(x)

        return F.softmax(self.cls(x))

    
def microf1(net, ds, y=None):
    y_true = [y for _, y in ds]
    y_pred = net.predict(ds)
    return sklearn.metrics.f1_score(y_true, y_pred,average='micro')
def macrof1(net, ds, y=None):
    y_true = [y for _, y in ds]
    y_pred = net.predict(ds)
    return sklearn.metrics.f1_score(y_true, y_pred,average='macro')

class F1Loss(nn.Module):
    def __init__(self):
        super(F1Loss, self).__init__()

    def forward(self, input, target, eps=1e-10):
        loss = 0
        for idx, i in enumerate(torch.eye(3).cuda()):
            t = i.view(3,1)
            y_pred_ = input.matmul(t).squeeze()
            y_true_ = target==idx
            loss += 0.5 * (y_true_ * y_pred_).sum() / (y_true_ + y_pred_ + eps).sum()
        return -torch.log(loss+eps)

## General Training

In [5]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=16,
    lr=0.001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model11')],
    device='cuda'
)

net.fit(traval_tra_features, traval_tra_targets)



  epoch    macrof1    microf1    train_loss    valid_acc    valid_loss    cp       dur
-------  ---------  ---------  ------------  -----------  ------------  ----  --------
      1     [36m0.9652[0m     [32m0.9908[0m        [35m0.0342[0m       [31m0.9908[0m        [94m0.0298[0m     +  305.2410
      2     [36m0.9780[0m     [32m0.9940[0m        [35m0.0205[0m       [31m0.9940[0m        [94m0.0193[0m     +  305.7516
      3     [36m0.9789[0m     [32m0.9946[0m        [35m0.0177[0m       [31m0.9946[0m        [94m0.0174[0m     +  305.0816
      4     [36m0.9821[0m     [32m0.9954[0m        [35m0.0159[0m       [31m0.9954[0m        [94m0.0148[0m     +  305.0066
      5     0.9818     0.9954        [35m0.0150[0m       0.9954        0.0151        304.3461
      6     0.9820     [32m0.9956[0m        [35m0.0144[0m       [31m0.9956[0m        0.0150        304.3996
      7     0.9807     0.9954        [35m0.0136[0m       0.9954        [94m0.0146[0m

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (d1): DownSample2x(
      (0): Conv1d(1, 64, kernel_size=(2,), stride=(2,))
      (1): ReLU(inplace=True)
    )
    (c1): ResConv1d(
      (cal): Sequential(
        (0): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SELayer(
        (avg_pool): AdaptiveAvgPool1d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): PReLU(num_parameters=1)
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
 

## MacroLoss Fintuning

In [7]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.0001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    criterion=F1Loss,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model11', fn_prefix='f1')],
    device='cuda'
)

net.initialize() 
net.load_params(f_params='03_model11/params.pt')
net.partial_fit(traval_tra_features, traval_tra_targets)



  epoch    macrof1    microf1    train_loss    valid_acc    valid_loss    cp       dur
-------  ---------  ---------  ------------  -----------  ------------  ----  --------
      1     [36m0.9841[0m     [32m0.9961[0m        [35m0.3046[0m       [31m0.9961[0m        [94m0.3086[0m     +  313.4550
      2     [36m0.9843[0m     [32m0.9962[0m        [35m0.3020[0m       [31m0.9962[0m        [94m0.3085[0m     +  312.7837
      3     [36m0.9846[0m     [32m0.9962[0m        [35m0.3009[0m       [31m0.9962[0m        [94m0.3084[0m     +  312.8702
      4     [36m0.9846[0m     0.9962        [35m0.3004[0m       0.9962        [94m0.3081[0m     +  312.5561
      5     0.9842     0.9962        0.3009       0.9962        0.3086        312.9365
      6     0.9840     0.9961        0.3004       0.9961        0.3089        312.1712
      7     0.9839     0.9961        0.3005       0.9961        0.3085        313.2180
      8     0.9843     0.9962        [35m0.2996[0m   

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (d1): DownSample2x(
      (0): Conv1d(1, 64, kernel_size=(2,), stride=(2,))
      (1): ReLU(inplace=True)
    )
    (c1): ResConv1d(
      (cal): Sequential(
        (0): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SELayer(
        (avg_pool): AdaptiveAvgPool1d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): PReLU(num_parameters=1)
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
 

In [8]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=16,
    lr=0.001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model12')],
    device='cuda'
)

net.fit(traval_tra_features, traval_tra_targets)

net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.0001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    criterion=F1Loss,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model12', fn_prefix='f1')],
    device='cuda'
)

net.initialize() 
net.load_params(f_params='03_model12/params.pt')
net.partial_fit(traval_tra_features, traval_tra_targets)



  epoch    macrof1    microf1    train_loss    valid_acc    valid_loss    cp       dur
-------  ---------  ---------  ------------  -----------  ------------  ----  --------
      1     [36m0.9461[0m     [32m0.9853[0m        [35m0.0337[0m       [31m0.9853[0m        [94m0.0451[0m     +  304.6923
      2     [36m0.9726[0m     [32m0.9921[0m        [35m0.0203[0m       [31m0.9921[0m        [94m0.0247[0m     +  300.5658
      3     [36m0.9785[0m     [32m0.9947[0m        [35m0.0176[0m       [31m0.9947[0m        [94m0.0171[0m     +  301.8805
      4     [36m0.9791[0m     [32m0.9948[0m        [35m0.0161[0m       [31m0.9948[0m        0.0172     +  300.6212
      5     [36m0.9806[0m     [32m0.9951[0m        [35m0.0151[0m       [31m0.9951[0m        [94m0.0161[0m     +  297.3172
      6     [36m0.9816[0m     [32m0.9952[0m        [35m0.0144[0m       [31m0.9952[0m        [94m0.0159[0m     +  300.2840
      7     [36m0.9818[0m     0.9951     

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (d1): DownSample2x(
      (0): Conv1d(1, 64, kernel_size=(2,), stride=(2,))
      (1): ReLU(inplace=True)
    )
    (c1): ResConv1d(
      (cal): Sequential(
        (0): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SELayer(
        (avg_pool): AdaptiveAvgPool1d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): PReLU(num_parameters=1)
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
 

In [9]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=16,
    lr=0.001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model13')],
    device='cuda'
)

net.fit(traval_tra_features, traval_tra_targets)

net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.0001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    criterion=F1Loss,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model13', fn_prefix='f1')],
    device='cuda'
)

net.initialize() 
net.load_params(f_params='03_model13/params.pt')
net.partial_fit(traval_tra_features, traval_tra_targets)



  epoch    macrof1    microf1    train_loss    valid_acc    valid_loss    cp       dur
-------  ---------  ---------  ------------  -----------  ------------  ----  --------
      1     [36m0.9718[0m     [32m0.9923[0m        [35m0.0338[0m       [31m0.9923[0m        [94m0.0241[0m     +  301.0853
      2     [36m0.9722[0m     [32m0.9930[0m        [35m0.0205[0m       [31m0.9930[0m        [94m0.0219[0m     +  302.4394
      3     [36m0.9803[0m     [32m0.9949[0m        [35m0.0177[0m       [31m0.9949[0m        [94m0.0171[0m     +  302.6816
      4     [36m0.9814[0m     [32m0.9951[0m        [35m0.0162[0m       [31m0.9951[0m        [94m0.0160[0m     +  301.5645
      5     0.9811     [32m0.9954[0m        [35m0.0151[0m       [31m0.9954[0m        [94m0.0156[0m        301.1116
      6     [36m0.9832[0m     [32m0.9956[0m        [35m0.0143[0m       [31m0.9956[0m        [94m0.0148[0m     +  303.1260
      7     0.9830     [32m0.9957[0m     

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (d1): DownSample2x(
      (0): Conv1d(1, 64, kernel_size=(2,), stride=(2,))
      (1): ReLU(inplace=True)
    )
    (c1): ResConv1d(
      (cal): Sequential(
        (0): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SELayer(
        (avg_pool): AdaptiveAvgPool1d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): PReLU(num_parameters=1)
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
 

In [10]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=16,
    lr=0.001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model14')],
    device='cuda'
)

net.fit(traval_tra_features, traval_tra_targets)

net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.0001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    criterion=F1Loss,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False),
               Checkpoint(monitor='macrof1_best', dirname='03_model14', fn_prefix='f1')],
    device='cuda'
)

net.initialize() 
net.load_params(f_params='03_model14/params.pt')
net.partial_fit(traval_tra_features, traval_tra_targets)



  epoch    macrof1    microf1    train_loss    valid_acc    valid_loss    cp       dur
-------  ---------  ---------  ------------  -----------  ------------  ----  --------
      1     [36m0.9675[0m     [32m0.9908[0m        [35m0.0332[0m       [31m0.9908[0m        [94m0.0306[0m     +  303.0650
      2     [36m0.9749[0m     [32m0.9940[0m        [35m0.0203[0m       [31m0.9940[0m        [94m0.0190[0m     +  303.9570
      3     [36m0.9796[0m     [32m0.9949[0m        [35m0.0176[0m       [31m0.9949[0m        [94m0.0164[0m     +  300.4302
      4     0.9769     0.9940        [35m0.0161[0m       0.9940        0.0193        301.3334
      5     [36m0.9800[0m     [32m0.9951[0m        [35m0.0151[0m       [31m0.9951[0m        [94m0.0158[0m     +  302.5114
      6     [36m0.9815[0m     [32m0.9953[0m        [35m0.0144[0m       [31m0.9953[0m        [94m0.0156[0m     +  309.4205
      7     0.9809     0.9952        [35m0.0136[0m       0.9952     

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (d1): DownSample2x(
      (0): Conv1d(1, 64, kernel_size=(2,), stride=(2,))
      (1): ReLU(inplace=True)
    )
    (c1): ResConv1d(
      (cal): Sequential(
        (0): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SELayer(
        (avg_pool): AdaptiveAvgPool1d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): PReLU(num_parameters=1)
          (2): Linear(in_features=64, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (relu): ReLU()
 