In [2]:
import models.models_multi_task as md_multi
from models.multitask_training_session import TrainingSession
import datasets.iemocap as ds
from constants import *
from torchsummary import summary
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle

# Проба многозадачного обучения IEMOCAP

## Модель:

Подготовил следующую архитектуру, основанную на AlexNet:

In [3]:
model = md_multi.AlexNetMultiTask(num_emotions=4, num_speakers=10, num_genders=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
model = model.to(device)
print(model)
summary(model, (1, 224, 224), batch_size=256)
model=None
torch.cuda.empty_cache()

AlexNetMultiTask(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(4, 4), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(5, 5))
  (joint_classifier): Sequential(
    (0): Linear(in_features=6400, out_features=2048, bias=True)
    (1): Dropout(p=0.5, i

## Проверяю модель и свой код для обучения на работоспособность

Мой код для обучения построен вокруг класса TrainingSession в модуле multitask_training_session.py <br>
У этого класса есть метод overfit_one_batch, этот метод позволяет проверить модель на малой выборке данных, это полезно для того, чтобы посмотреть, как модель переобучается. 
Если она переобучается хорошо, то хотя бы градиентный спуск работает нормально.

In [11]:
iemocap_224_noprep_train = ds.IemocapDataset(  # Без препроцессинга, тренировочный
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='train', preprocessing=False, 
    augmentation=False, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks=('emotion', 'speaker', 'gender') 
)
iemocap_224_noprep_test = ds.IemocapDataset(  # Без препроцессинга, тестовый
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='test', preprocessing=False, 
    augmentation=False, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks=('emotion', 'speaker', 'gender') 
)



In [12]:
train_ds = iemocap_224_noprep_train
test_ds = iemocap_224_noprep_test

In [13]:
model = md_multi.AlexNetMultiTask(4, 10, 2)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.1)

In [14]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=10, batch_size=1)  # Размер батча = 1

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 0.0273 | speaker = 0.0464 | gender = 0.0146 |
# Train accuracies | emotion = 0.98 | speaker = 0.98 | gender = 0.98 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 0.0000 | speaker = 0.0000 | gender = 0.0000 |
# Train accuracies | emotion = 1.0 | speaker = 1.0 | gender = 1.0 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 0.0000 | speaker = 0.0000 | gender = 0.0000 |
# Train accuracies | emotion = 1.0 | speaker = 1.0 | gender = 1.0 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 0.0000 | speaker = 0.0000 | gender = 0.0000 |
# Train accuracies | emotion = 1.0 | speaker = 1.0 | gender = 1.0 |
Ep

In [15]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=10, batch_size=2)  # Размер батча = 2 

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 420152862531831552.0000 | speaker = 421003817559338368.0000 | gender = 0.0000 |
# Train accuracies | emotion = 0.51 | speaker = 0.51 | gender = 1.0 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 4388.1059 | speaker = 4390.1507 | gender = 0.0000 |
# Train accuracies | emotion = 0.51 | speaker = 0.52 | gender = 1.0 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 2762.5823 | speaker = 2763.4643 | gender = 0.0133 |
# Train accuracies | emotion = 0.48 | speaker = 0.47 | gender = 0.99 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 1984.8288 | speaker = 1985.2761 | gender = 0.0000 |
# Train accura

In [16]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=10, batch_size=3)  # Размер батча = 3

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 498.5154 | speaker = 596.9092 | gender = 848.8498 |
# Train accuracies | emotion = 0.54 | speaker = 0.37333333333333335 | gender = 0.6 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 258.0081 | speaker = 302.0014 | gender = 79.1601 |
# Train accuracies | emotion = 0.5733333333333334 | speaker = 0.38666666666666666 | gender = 0.5333333333333333 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 221.8909 | speaker = 232.0677 | gender = 40.7528 |
# Train accuracies | emotion = 0.6 | speaker = 0.36 | gender = 0.56 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 0 s
# Epoch losses | emotion = 176.5353 | speaker = 181.5020 | gender

In [18]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=10, batch_size=4)  # Размер батча = 4 

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 24.4942 | speaker = 38.9670 | gender = 28.9682 |
# Train accuracies | emotion = 0.46 | speaker = 0.255 | gender = 0.515 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 17.2385 | speaker = 18.4951 | gender = 2.6695 |
# Train accuracies | emotion = 0.54 | speaker = 0.255 | gender = 0.51 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 16.7553 | speaker = 19.1201 | gender = 2.0950 |
# Train accuracies | emotion = 0.49 | speaker = 0.22 | gender = 0.495 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 13.4779 | speaker = 15.1130 | gender = 0.9554 |
# Train accuracies | emotion = 0.525 | speaker = 0.

In [19]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=10, batch_size=10)  # Размер батча = 10 

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 24.4231 | speaker = 17.9875 | gender = 0.7383 |
# Train accuracies | emotion = 0.346 | speaker = 0.184 | gender = 0.58 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 3.1070 | speaker = 3.3992 | gender = 0.8530 |
# Train accuracies | emotion = 0.392 | speaker = 0.19 | gender = 0.598 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 1.9072 | speaker = 2.3282 | gender = 0.8951 |
# Train accuracies | emotion = 0.404 | speaker = 0.192 | gender = 0.602 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 2.5902 | speaker = 3.0032 | gender = 0.6916 |
# Train accuracies | emotion = 0.392 | speaker = 0.2 | 

In [20]:
ts = TrainingSession(name='FirstTry',
                      model=model,
                      train_dataset=train_ds,
                      test_dataset=test_ds,
                      criterion=criterion,
                      optimizer=optimizer,
                      num_epochs=100,
                      batch_size=32,
                      device=device,
                     path_to_weights=WEIGHTS_FOLDER,
                     path_to_results=RESULTS_FOLDER)
ts.overfit_one_batch(num_epochs=20, batch_size=20)  # Размер батча = 20

INITIALIZING TRAINING SESSION...
Loaders ready
TRAINING SESSION FirstTry__IEMOCAP-4_four_prep-false_224_train INITIALIZED
Trying to load checkpoint from file
File not found, starting from scratch...
Epoch #0
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 1.5812 | speaker = 2.2304 | gender = 0.7151 |
# Train accuracies | emotion = 0.286 | speaker = 0.196 | gender = 0.502 |
Epoch #1
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 1.6121 | speaker = 2.3852 | gender = 0.6948 |
# Train accuracies | emotion = 0.299 | speaker = 0.197 | gender = 0.499 |
Epoch #2
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 1.4405 | speaker = 2.1315 | gender = 0.7169 |
# Train accuracies | emotion = 0.306 | speaker = 0.2 | gender = 0.499 |
Epoch #3
TRAIN SIZE 2858
TEST SIZE 714
# Time passed: 1 s
# Epoch losses | emotion = 1.7569 | speaker = 2.4243 | gender = 2.9411 |
# Train accuracies | emotion = 0.298 | speaker = 0.2 | ge

# Я НЕ ПОНИМАЮ, ПОЧЕМУ ТАК ПРОИСХОДИТ.