# В этом ноутбуке эксперименты, связанные с инъекцией вектора eGeMAPS в полносвязный слой нейронной сети. <br>
### Гипотеза: это поможет улучшить результат по сравнению с только лишь спектрограммами.

In [1]:
from models.models_one_task_egemaps import AlexNetEgemaps2048, AlexNetEgemaps1792
from  datasets.iemocap import IemocapDataset, train_test_loaders
from constants import *
from torchsummary import summary
import torch
import torch.nn as nn
import skorch
from skorch.helper import predefined_split
from skorch.dataset import Dataset
from skorch.classifier import NeuralNetClassifier
import skorch.callbacks as callbacks
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from IPython.display import display
import pandas as pd

## Модель 
Используется AlexNet, как модель, показавшая один из наилучших результатов. Позже также проведу этот эксперимент на VGG-11. Отобразим результаты этих моделей.

In [2]:
df = pd.read_csv(os.path.join(RESULTS_FOLDER, 'iemocap_all_results.csv'), delimiter=';')
alex_results = df.loc[(df['model'] == 'AlexNet') & (df['preprocessing'] == False) & (df['augmentation'] == True) ]
display(alex_results.loc[alex_results['epochs'] == 172])
vgg_results = df.loc[(df['model'] == 'VggNet') & (df['preprocessing'] == False) & (df['augmentation'] == True)]
display(vgg_results.loc[vgg_results['epochs'] == 210])

Unnamed: 0,result,epochs,subset,metric,model,dataset,preprocessing,augmentation
12731,0.701568,172,train,acc,AlexNet,IEMOCAP-4,False,True
12909,0.683091,172,valid,acc,AlexNet,IEMOCAP-4,False,True
13087,0.741412,172,train,loss,AlexNet,IEMOCAP-4,False,True
13265,0.808721,172,valid,loss,AlexNet,IEMOCAP-4,False,True


Unnamed: 0,result,epochs,subset,metric,model,dataset,preprocessing,augmentation
6037,0.739362,210,train,acc,VggNet,IEMOCAP-4,False,True
6283,0.683091,210,valid,acc,VggNet,IEMOCAP-4,False,True
6529,0.67123,210,train,loss,VggNet,IEMOCAP-4,False,True
6775,0.794945,210,valid,loss,VggNet,IEMOCAP-4,False,True


## Вопрос: куда мы будем конкатенировать eGeMAPS? 
У модели AlexNet три полносвязных слоя: 6400 нейронов, 2048 нейронов, 512 нейронов, 4 нейрона (классы) <br>
Для начала я попробую конкатенировать слой с 2048 нейронов. Есть и другие варианты, но начнем с простого.<br>
Этот слой будет подвергнут нормализации (torch.nn.LayerNorm). 

### 1. Конкатенация со слоем 2048 нейронов, LayerNorm

In [3]:
model = AlexNetEgemaps2048(num_classes=4)
print(model)

AlexNetEgemaps2048(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(5, 5))
  (fc1): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=6400, out_features=2048, bias=Tru

## Датасет
Используется IEMOCAP без препроцессинга и без аугментацией данных.

In [2]:
train_ds = IemocapDataset( 
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='train', preprocessing=False, 
    augmentation=False, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks='emotion', egemaps=True 
)
valid_ds = IemocapDataset(  
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='test', preprocessing=False, 
    augmentation=False, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks='emotion', egemaps=True 
)



## Обучение
Будем обучать с помощью Skorch, используем оптимизатор Adam, learning rate 1e-5, 300 эпох (сохраняем все гиперпараметры такими же, как и у бейслайна)

In [5]:
filename = 'AlexNetEgemaps2048LN--{}_augmentation-{}.md'.format(train_ds.name, str(train_ds.augmentation).lower())
best_model_file_path = os.path.join(RESULTS_FOLDER, filename)
callback_train_acc = callbacks.EpochScoring(scoring="accuracy", 
                                            lower_is_better=False, 
                                            on_train=True, 
                                            name='train_acc')
callback_save_best = callbacks.Checkpoint(monitor='valid_loss_best', 
                                          f_params=None, 
                                          f_optimizer=None, 
                                          f_criterion=None, 
                                          f_history=None, 
                                          f_pickle=best_model_file_path,  
                                          event_name='event_cp')
callback_early_stop = callbacks.EarlyStopping(monitor='valid_loss', patience=30, 
                                              threshold_mode='rel', lower_is_better=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [6]:
net = skorch.classifier.NeuralNetClassifier(
    model, criterion=nn.CrossEntropyLoss, optimizer=torch.optim.Adam,
    lr=1e-5, max_epochs=300, batch_size=32, train_split=predefined_split(valid_ds), 
    device=device, iterator_train__shuffle=True, 
    callbacks=[
        callback_train_acc,
        callback_save_best,
        callback_early_stop
    ]
)

In [7]:
net.fit(train_ds, None)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      dur
-------  -----------  ------------  -----------  ------------  ----  -------
      1       [36m0.3519[0m        [32m1.3542[0m       [35m0.4211[0m        [31m1.2350[0m     +  18.6267
      2       [36m0.4516[0m        [32m1.2440[0m       [35m0.5185[0m        [31m1.1522[0m     +  18.3658
      3       [36m0.5003[0m        [32m1.1453[0m       [35m0.5745[0m        [31m1.0373[0m     +  18.2437
      4       [36m0.5428[0m        [32m1.0839[0m       [35m0.5823[0m        [31m1.0026[0m     +  18.2562
      5       [36m0.5622[0m        [32m1.0618[0m       0.5633        1.0266        18.3714
      6       0.5608        [32m1.0459[0m       [35m0.5857[0m        [31m0.9779[0m     +  18.3271
      7       [36m0.5719[0m        [32m1.0378[0m       0.5845        0.9885        18.5022
      8       [36m0.5767[0m        [32m1.0237[0m       [35m0.6114[0m        [31m0.9656[0m   

     88       [36m0.7427[0m        [32m0.6662[0m       0.6842        0.8163        18.4531
     89       [36m0.7492[0m        [32m0.6466[0m       [35m0.6898[0m        0.8071        18.5458
     90       0.7402        0.6645       0.6719        0.8440        18.4693
     91       [36m0.7531[0m        [32m0.6443[0m       0.6741        0.8162        18.4278
     92       [36m0.7623[0m        [32m0.6320[0m       0.6876        0.8144        18.3892
     93       0.7595        [32m0.6264[0m       0.6775        0.8075        18.3617
     94       0.7612        [32m0.6188[0m       0.6898        0.8254        18.3706
     95       0.7618        [32m0.6184[0m       [35m0.6920[0m        0.8247        18.4175
     96       0.7595        [32m0.6153[0m       0.6506        0.9086        18.2834
     97       0.7592        0.6183       0.6909        0.8360        18.3771
     98       [36m0.7696[0m        [32m0.6055[0m       0.6697        0.8883        18.6372
     99  

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=AlexNetEgemaps2048(
    (features): Sequential(
      (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(5, 5))
    (fc1): Sequen

###  2. Изменяем структуру модели: второй полносвязный слой делаем не 2048 нейронов, а 1792. eGeMAPS пропускаем через полносвязный слой 256 нейронов, и его конкатенируем со слоем с 1792 нейронов. <br>
Эту модель протестим без LayerNorm и с LayerNorm.


#### 2.1. Без LayerNorm

In [3]:
model = AlexNetEgemaps1792(4)
print(model)

AlexNetEgemaps1792(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(5, 5))
  (spec_fc): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=6400, out_features=1792, bias

In [4]:
filename = 'AlexNetEgemaps1792--{}_augmentation-{}.md'.format(train_ds.name, str(train_ds.augmentation).lower())
best_model_file_path = os.path.join(RESULTS_FOLDER, filename)
callback_train_acc = callbacks.EpochScoring(scoring="accuracy", 
                                            lower_is_better=False, 
                                            on_train=True, 
                                            name='train_acc')
callback_save_best = callbacks.Checkpoint(monitor='valid_loss_best', 
                                          f_params=None, 
                                          f_optimizer=None, 
                                          f_criterion=None, 
                                          f_history=None, 
                                          f_pickle=best_model_file_path,  
                                          event_name='event_cp')
callback_early_stop = callbacks.EarlyStopping(monitor='valid_loss', patience=30, 
                                              threshold_mode='rel', lower_is_better=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

In [5]:
net = skorch.classifier.NeuralNetClassifier(
    model, criterion=nn.CrossEntropyLoss, optimizer=torch.optim.Adam,
    lr=1e-5, max_epochs=300, batch_size=32, train_split=predefined_split(valid_ds), 
    device=device, iterator_train__shuffle=True, 
    callbacks=[
        callback_train_acc,
        callback_save_best,
        callback_early_stop
    ]
)

In [6]:
net.fit(train_ds, None)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      dur
-------  -----------  ------------  -----------  ------------  ----  -------
      1       [36m0.3401[0m        [32m1.3461[0m       [35m0.3807[0m        [31m1.3145[0m     +  18.3485
      2       [36m0.3788[0m        [32m1.3119[0m       0.3807        [31m1.2991[0m     +  18.2514
      3       [36m0.4065[0m        [32m1.2798[0m       [35m0.4938[0m        [31m1.1809[0m     +  18.2304
      4       [36m0.5176[0m        [32m1.1303[0m       [35m0.5442[0m        [31m1.0659[0m     +  18.2034
      5       [36m0.5496[0m        [32m1.0674[0m       [35m0.5543[0m        [31m1.0241[0m     +  18.2525
      6       [36m0.5605[0m        [32m1.0487[0m       [35m0.5677[0m        [31m1.0010[0m     +  18.3884
      7       [36m0.5669[0m        [32m1.0409[0m       0.5610        1.0231        18.2834
      8       [36m0.5798[0m        [32m1.0304[0m       [35m0.5857[0m        

     91       [36m0.6719[0m        [32m0.8233[0m       [35m0.6540[0m        [31m0.8501[0m     +  18.3029
     92       0.6716        0.8269       0.6484        0.8624        18.3355
     93       [36m0.6727[0m        [32m0.8152[0m       0.6495        [31m0.8494[0m     +  18.3730
     94       0.6719        0.8175       0.6484        0.8561        18.1030
     95       [36m0.6781[0m        0.8161       0.6461        0.8898        18.1880
     96       0.6688        0.8217       0.6372        0.8578        18.1495
     97       [36m0.6809[0m        [32m0.8089[0m       0.6417        0.8684        18.2003
     98       0.6697        0.8109       0.6428        0.8745        18.2144
     99       0.6716        0.8192       0.6495        0.8604        18.1024
    100       0.6761        [32m0.7956[0m       0.6417        0.8670        18.2522
    101       0.6795        0.8002       0.6450        0.8695        18.1520
    102       [36m0.6828[0m        [32m0.7869[0m  

    187       [36m0.7788[0m        [32m0.5664[0m       0.6473        0.8608        18.4243
    188       0.7735        0.5835       0.6607        0.8648        18.1953
    189       [36m0.7830[0m        [32m0.5494[0m       0.6484        0.8884        18.2986
Stopping since valid_loss has not improved in the last 30 epochs.


<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=AlexNetEgemaps1792(
    (features): Sequential(
      (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(5, 5))
    (spec_fc): Se