In [13]:
import models.models_multi_task as md_multi
from models.multitask_training_session import *
from datasets.iemocap import IemocapDataset
from datasets.ramas import RamasDataset
from constants import *
from torchsummary import summary
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import pandas as pd 
from torch.utils.data import DataLoader

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

# МНОГОЗАДАЧНАЯ МОДЕЛЬ, БИНАРНЫЙ РАМАС

In [14]:
a = torch.load('models/best_models/VGGAverageWeighting__Ramas224BinaryMultiNormal_224_train__best_model.pt')
state_dict = a['state_dict']
model = md_multi.vgg(num_emotions=2, num_speakers=12, num_genders=2, type=11, bn=True)
model.load_state_dict(state_dict=state_dict)
device = torch.device('cuda')
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [15]:
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS_BINARY, 'Ramas224BinaryMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='multi', type='binary')



In [16]:
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)

In [17]:
def get_predictions_multi(model, testloader, device):
    predictions = {
        'e_true': [],
        'e_pred': [],
        's_true': [],
        's_pred': [],
        'g_true': [],
        'g_pred': [],
    }
    dataset_size = len(testloader.dataset)
    model.eval()
    for i, (data, target) in enumerate(testloader):
        data = data.to(device)
        target_emotion, target_speaker, target_gender = target
        target_emotion = np.asscalar(target_emotion.numpy()[0])
        target_speaker = np.asscalar(target_speaker.numpy()[0])
        target_gender = np.asscalar(target_gender.numpy()[0]) 
        predictions['e_true'].append(target_emotion)
        predictions['s_true'].append(target_speaker)
        predictions['g_true'].append(target_gender)
        with torch.no_grad():
            predicted_emotion, predicted_speaker, predicted_gender = model(data)
        _, pred_labels_emotion = torch.max(predicted_emotion.data, 1)
        _, pred_labels_speaker = torch.max(predicted_speaker.data, 1)
        _, pred_labels_gender = torch.max(predicted_gender.data, 1)
        pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])
        pred_labels_speaker = np.asscalar(pred_labels_speaker.cpu().numpy()[0])
        pred_labels_gender = np.asscalar(pred_labels_gender.cpu().numpy()[0])
        predictions['e_pred'].append(pred_labels_emotion)
        predictions['s_pred'].append(pred_labels_speaker)
        predictions['g_pred'].append(pred_labels_gender)
    return predictions

In [18]:
emotions = {
    0: 'Angry',
    1: 'Not Angry'
}
genders = {
    0: 'Male',
    1: 'Female'
}

In [19]:
predictions = get_predictions_multi(model=model, testloader=testloader, device=torch.device('cuda'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]

s_true = predictions['s_true']
s_pred = predictions['s_pred']

g_true = predictions['g_true']
g_true = [genders[_] for _ in g_true]
g_pred = predictions['g_pred']
g_pred = [genders[_] for _ in g_pred]

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  target_speaker = np.asscalar(target_speaker.numpy()[0])
  target_gender = np.asscalar(target_gender.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])
  pred_labels_speaker = np.asscalar(pred_labels_speaker.cpu().numpy()[0])
  pred_labels_gender = np.asscalar(pred_labels_gender.cpu().numpy()[0])


In [20]:
metrics = {
    'accuracy': {
        'emotion': accuracy_score(e_true, e_pred),
        'speaker': accuracy_score(s_true, s_pred),
        'gender': accuracy_score(g_true, g_pred)
    },
    'precision': {
        'emotion': precision_score(e_true, e_pred, average='macro'),
        'speaker': precision_score(s_true, s_pred, average='macro'),
        'gender': precision_score(g_true, g_pred, average='macro')
    },
    'recall': {
        'emotion': recall_score(e_true, e_pred, average='macro'),
        'speaker': recall_score(s_true, s_pred, average='macro'),
        'gender': recall_score(g_true, g_pred, average='macro')
    },
    'f1': {
        'emotion': f1_score(e_true, e_pred, average='macro'),
        'speaker': f1_score(s_true, s_pred, average='macro'),
        'gender': f1_score(g_true, g_pred, average='macro')
    }
}

In [21]:
metrics

{'accuracy': {'emotion': 0.7705479452054794,
  'speaker': 0.839041095890411,
  'gender': 0.928082191780822},
 'precision': {'emotion': 0.7637545669460564,
  'speaker': 0.8314524087422526,
  'gender': 0.9310606060606061},
 'recall': {'emotion': 0.7426954071290849,
  'speaker': 0.8380314534584783,
  'gender': 0.9272774205660113},
 'f1': {'emotion': 0.7493048347621061,
  'speaker': 0.8325344151028249,
  'gender': 0.92783759929391}}

In [22]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.78      0.87      0.82       179
   Not Angry       0.74      0.62      0.68       113

    accuracy                           0.77       292
   macro avg       0.76      0.74      0.75       292
weighted avg       0.77      0.77      0.77       292



In [23]:
print(classification_report(s_true, s_pred))

              precision    recall  f1-score   support

           0       0.63      0.75      0.69        16
           1       0.83      0.90      0.86        21
           2       0.94      0.89      0.91        18
           3       0.89      0.89      0.89        28
           4       0.78      0.75      0.76        28
           5       0.71      0.85      0.77        20
           6       0.83      0.80      0.82        25
           7       0.81      0.85      0.83        20
           8       0.90      0.82      0.86        22
           9       0.76      0.86      0.81        22
          10       0.97      0.86      0.91        43
          11       0.92      0.83      0.87        29

    accuracy                           0.84       292
   macro avg       0.83      0.84      0.83       292
weighted avg       0.85      0.84      0.84       292



In [24]:
print(classification_report(g_true, g_pred))

              precision    recall  f1-score   support

      Female       0.96      0.89      0.92       143
        Male       0.90      0.97      0.93       149

    accuracy                           0.93       292
   macro avg       0.93      0.93      0.93       292
weighted avg       0.93      0.93      0.93       292



## -----------------------------

# МНОГОЗАДАЧНАЯ МОДЕЛЬ, ДИСКРЕТНЫЙ РАМАС

In [3]:
a = torch.load('models/best_models/VGGAverageWeighting__Ramas224DescreteMulti_224_train__best_model.pt')
state_dict = a['state_dict']
model = md_multi.vgg(num_emotions=8, num_speakers=12, num_genders=2, type=11, bn=True)
model.load_state_dict(state_dict=state_dict)
device = torch.device('cuda:1')
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [5]:
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS, 'Ramas224DescreteMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='multi', type='descrete')



In [6]:
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)

In [7]:
emotions = {
    0: 'Angry',
    1: 'Disgusted',
    2: 'Happy',
    3: 'Neutral',
    4: 'Sad',
    5: 'Scared',
    6: 'Shame',
    7: 'Surprised'
}
genders = {
    0: 'Male',
    1: 'Female'
}

In [10]:
predictions = get_predictions_multi(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]

s_true = predictions['s_true']
s_pred = predictions['s_pred']

g_true = predictions['g_true']
g_true = [genders[_] for _ in g_true]
g_pred = predictions['g_pred']
g_pred = [genders[_] for _ in g_pred]

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  target_speaker = np.asscalar(target_speaker.numpy()[0])
  target_gender = np.asscalar(target_gender.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])
  pred_labels_speaker = np.asscalar(pred_labels_speaker.cpu().numpy()[0])
  pred_labels_gender = np.asscalar(pred_labels_gender.cpu().numpy()[0])


In [11]:
metrics = {
    'accuracy': {
        'emotion': accuracy_score(e_true, e_pred),
        'speaker': accuracy_score(s_true, s_pred),
        'gender': accuracy_score(g_true, g_pred)
    },
    'precision': {
        'emotion': precision_score(e_true, e_pred, average='macro'),
        'speaker': precision_score(s_true, s_pred, average='macro'),
        'gender': precision_score(g_true, g_pred, average='macro')
    },
    'recall': {
        'emotion': recall_score(e_true, e_pred, average='macro'),
        'speaker': recall_score(s_true, s_pred, average='macro'),
        'gender': recall_score(g_true, g_pred, average='macro')
    },
    'f1': {
        'emotion': f1_score(e_true, e_pred, average='macro'),
        'speaker': f1_score(s_true, s_pred, average='macro'),
        'gender': f1_score(g_true, g_pred, average='macro')
    }
}
metrics

{'accuracy': {'emotion': 0.4672131147540984,
  'speaker': 0.8176229508196722,
  'gender': 0.9364754098360656},
 'precision': {'emotion': 0.5483558954441596,
  'speaker': 0.810842651693496,
  'gender': 0.9373272665992585},
 'recall': {'emotion': 0.4238572838034187,
  'speaker': 0.804147882111432,
  'gender': 0.9360706424022449},
 'f1': {'emotion': 0.4354711201257131,
  'speaker': 0.8017041963597779,
  'gender': 0.9363789673778193}}

In [12]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.49      0.54      0.52       166
   Disgusted       0.31      0.34      0.33       100
       Happy       0.42      0.84      0.56       221
     Neutral       0.72      0.71      0.72        69
         Sad       0.69      0.34      0.45       119
      Scared       0.51      0.20      0.28       193
       Shame       0.86      0.27      0.41        22
   Surprised       0.38      0.15      0.22        86

    accuracy                           0.47       976
   macro avg       0.55      0.42      0.44       976
weighted avg       0.50      0.47      0.44       976



In [13]:
print(classification_report(s_true, s_pred))

              precision    recall  f1-score   support

           0       0.57      0.70      0.63        40
           1       0.71      0.82      0.76        71
           2       0.91      0.87      0.89        71
           3       0.84      0.88      0.86        97
           4       0.77      0.72      0.75        69
           5       0.75      0.73      0.74        74
           6       0.84      0.89      0.87        95
           7       0.85      0.71      0.78        70
           8       0.98      0.64      0.77        80
           9       0.73      0.93      0.82        75
          10       0.91      0.84      0.87       112
          11       0.87      0.91      0.89       122

    accuracy                           0.82       976
   macro avg       0.81      0.80      0.80       976
weighted avg       0.83      0.82      0.82       976



In [14]:
print(classification_report(g_true, g_pred))

              precision    recall  f1-score   support

      Female       0.95      0.92      0.93       478
        Male       0.92      0.96      0.94       498

    accuracy                           0.94       976
   macro avg       0.94      0.94      0.94       976
weighted avg       0.94      0.94      0.94       976



# ---------------------------------

# VGG, ДИСКРЕТНЫЙ РАМАС

In [16]:
with open('models/best_models/VGGNet--Ramas224Descrete_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [19]:
model = net.module_
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS, 'Ramas224DescreteMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='emotion', type='descrete')
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Angry',
    1: 'Disgusted',
    2: 'Happy',
    3: 'Neutral',
    4: 'Sad',
    5: 'Scared',
    6: 'Shame',
    7: 'Surprised'
}
genders = {
    0: 'Male',
    1: 'Female'
}



In [6]:
def get_predictions_one(model, testloader, device):
    predictions = {
        'e_true': [],
        'e_pred': [],
    }
    dataset_size = len(testloader.dataset)
    model.to(device)
    model.eval()
    for i, (data, target) in enumerate(testloader):
        data = data.to(device)
        target_emotion = target
        target_emotion = np.asscalar(target_emotion.numpy()[0])
        predictions['e_true'].append(target_emotion)
        with torch.no_grad():
            predicted_emotion = model(data)
        _, pred_labels_emotion = torch.max(predicted_emotion.data, 1)
        pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])
        predictions['e_pred'].append(pred_labels_emotion)
    return predictions

In [24]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


In [25]:
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

{'accuracy': 0.4774590163934426,
 'precision': 0.43051105490958685,
 'recall': 0.42669564893143114,
 'f1': 0.38814970531461634}

In [26]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.48      0.55      0.52       166
   Disgusted       0.30      0.60      0.40       100
       Happy       0.53      0.57      0.55       221
     Neutral       0.51      0.80      0.62        69
         Sad       0.57      0.50      0.53       119
      Scared       0.55      0.37      0.45       193
       Shame       0.00      0.00      0.00        22
   Surprised       0.50      0.02      0.04        86

    accuracy                           0.48       976
   macro avg       0.43      0.43      0.39       976
weighted avg       0.49      0.48      0.45       976



# ----------------------------------------------

# VGG, БИНАРНЫЙ РАМАС

In [3]:
with open('models/best_models/VGGNet--Ramas224Binary_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [4]:
model = net.module_
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS_BINARY, 'Ramas224BinaryMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='emotion', type='binary')
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Angry',
    1: 'Not Angry'
}
genders = {
    0: 'Male',
    1: 'Female'
}



In [7]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


{'accuracy': 0.7294520547945206,
 'precision': 0.7147094926350246,
 'recall': 0.7075443713847827,
 'f1': 0.7103429224374993}

In [8]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.77      0.80      0.78       179
   Not Angry       0.66      0.61      0.64       113

    accuracy                           0.73       292
   macro avg       0.71      0.71      0.71       292
weighted avg       0.73      0.73      0.73       292



# ----------------------------------------------------------

# VGG, IEMOCAP

In [2]:
with open('models/best_models/VggNet--IEMOCAP-4_four_prep-false_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [3]:
test_ds = IemocapDataset(  # Без препроцессинга, тестовый
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='test', preprocessing=False, 
    augmentation=True, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks='emotion'
)



In [5]:
model = net.module_
testloader = DataLoader(test_ds, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Anger',
    1: 'Happiness',
    2: 'Neutral',
    3: 'Sadness'
}
genders = {
    0: 'Female',
    1: 'Male'
}

In [8]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


{'accuracy': 0.6954087346024636,
 'precision': 0.6742877063071562,
 'recall': 0.6311857240118967,
 'f1': 0.6307661952876085}

In [9]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Anger       0.82      0.83      0.83       220
   Happiness       0.56      0.20      0.30       118
     Neutral       0.66      0.75      0.70       340
     Sadness       0.65      0.74      0.69       215

    accuracy                           0.70       893
   macro avg       0.67      0.63      0.63       893
weighted avg       0.69      0.70      0.68       893



# -----------------------------------------------------------------

# ALEXNET, ДИСКРЕТНЫЙ РАМАС

In [13]:
with open('models/best_models/AlexNet--Ramas224Descrete_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [14]:
model = net.module_
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS, 'Ramas224DescreteMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='emotion', type='descrete')
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Angry',
    1: 'Disgusted',
    2: 'Happy',
    3: 'Neutral',
    4: 'Sad',
    5: 'Scared',
    6: 'Shame',
    7: 'Surprised'
}
genders = {
    0: 'Male',
    1: 'Female'
}



In [15]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


{'accuracy': 0.41598360655737704,
 'precision': 0.29468686972631836,
 'recall': 0.3533301378754125,
 'f1': 0.31536628357622454}

In [16]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.48      0.54      0.51       166
   Disgusted       0.00      0.00      0.00       100
       Happy       0.44      0.47      0.45       221
     Neutral       0.74      0.65      0.69        69
         Sad       0.40      0.77      0.53       119
      Scared       0.30      0.39      0.34       193
       Shame       0.00      0.00      0.00        22
   Surprised       0.00      0.00      0.00        86

    accuracy                           0.42       976
   macro avg       0.29      0.35      0.32       976
weighted avg       0.34      0.42      0.37       976



  _warn_prf(average, modifier, msg_start, len(result))


# ----------------------------------------------

# ALEXNET, БИНАРНЫЙ РАМАС

In [9]:
with open('models/best_models/AlexNet--Ramas224Binary_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [10]:
model = net.module_
ramas_224_test = RamasDataset(RAMAS_PATH_TO_WAVS_BINARY, 'Ramas224DescreteMulti',
                 spectrogram_shape=224,
                 augmentation=False, padding='repeat', mode='test',  tasks='emotion', type='binary')
testloader = DataLoader(ramas_224_test, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Angry',
    1: 'Not Angry'
}
genders = {
    0: 'Male',
    1: 'Female'
}



In [11]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


{'accuracy': 0.7123287671232876,
 'precision': 0.6972242692213215,
 'recall': 0.6984723389528847,
 'f1': 0.6978119455943228}

In [12]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Angry       0.77      0.76      0.76       179
   Not Angry       0.63      0.64      0.63       113

    accuracy                           0.71       292
   macro avg       0.70      0.70      0.70       292
weighted avg       0.71      0.71      0.71       292



# -----------------------------------------------

# ALEXNET, IEMOCAP

In [21]:
with open('models/best_models/AlexNet--IEMOCAP-4_four_prep-false_224_train_augmentation-true.md', 'rb') as f:
    net = pickle.load(f)

In [22]:
test_ds = IemocapDataset(  # Без препроцессинга, тестовый
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='test', preprocessing=False, 
    augmentation=True, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks='emotion'
)



In [23]:
model = net.module_
testloader = DataLoader(test_ds, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Anger',
    1: 'Happiness',
    2: 'Neutral',
    3: 'Sadness'
}
genders = {
    0: 'Female',
    1: 'Male'
}

In [24]:
predictions = get_predictions_one(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]
metrics = {
    'accuracy': accuracy_score(e_true, e_pred),
    'precision': precision_score(e_true, e_pred, average='macro'),
    'recall': recall_score(e_true, e_pred, average='macro'),
    'f1': f1_score(e_true, e_pred, average='macro'),
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])


{'accuracy': 0.6875699888017918,
 'precision': 0.6697203123673713,
 'recall': 0.6239613611596501,
 'f1': 0.613368479215771}

In [25]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Anger       0.85      0.79      0.82       220
   Happiness       0.55      0.15      0.24       118
     Neutral       0.69      0.71      0.70       340
     Sadness       0.59      0.85      0.70       215

    accuracy                           0.69       893
   macro avg       0.67      0.62      0.61       893
weighted avg       0.69      0.69      0.67       893



# -------------------------------------------------------

# МНОГОЗАДАЧНАЯ, IEMOCAP

In [3]:
a = torch.load('models/training_sessions/VGGAverageWeighting__IEMOCAP-4_four_prep-false_224_train.pt')
state_dict = a['state_dict']
model = md_multi.vgg(num_emotions=4, num_speakers=10, num_genders=2, type=11, bn=True)
model.load_state_dict(state_dict=state_dict)
device = torch.device('cuda:1')
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [6]:
test_ds = IemocapDataset(  # Без препроцессинга, тестовый
    PATH_TO_PICKLE, IEMOCAP_PATH_TO_WAVS, IEMOCAP_PATH_TO_EGEMAPS, IEMOCAP_PATH_FOR_PARSER, 
    base_name='IEMOCAP-4', label_type='four', mode='test', preprocessing=False, 
    augmentation=True, padding='repeat', spectrogram_shape=224, spectrogram_type='melspec', tasks=('emotion', 'speaker', 'gender')
)
testloader = DataLoader(test_ds, batch_size=1, shuffle=True, num_workers=4)
emotions = {
    0: 'Anger',
    1: 'Happiness',
    2: 'Neutral',
    3: 'Sadness'
}
genders = {
    0: 'Female',
    1: 'Male'
}



In [8]:
predictions = get_predictions_multi(model=model, testloader=testloader, device=torch.device('cuda:1'))
e_true = predictions['e_true']
e_true = [emotions[_] for _ in e_true]
e_pred = predictions['e_pred']
e_pred = [emotions[_] for _ in e_pred]

s_true = predictions['s_true']
s_pred = predictions['s_pred']

g_true = predictions['g_true']
g_true = [genders[_] for _ in g_true]
g_pred = predictions['g_pred']
g_pred = [genders[_] for _ in g_pred]
metrics = {
    'accuracy': {
        'emotion': accuracy_score(e_true, e_pred),
        'speaker': accuracy_score(s_true, s_pred),
        'gender': accuracy_score(g_true, g_pred)
    },
    'precision': {
        'emotion': precision_score(e_true, e_pred, average='macro'),
        'speaker': precision_score(s_true, s_pred, average='macro'),
        'gender': precision_score(g_true, g_pred, average='macro')
    },
    'recall': {
        'emotion': recall_score(e_true, e_pred, average='macro'),
        'speaker': recall_score(s_true, s_pred, average='macro'),
        'gender': recall_score(g_true, g_pred, average='macro')
    },
    'f1': {
        'emotion': f1_score(e_true, e_pred, average='macro'),
        'speaker': f1_score(s_true, s_pred, average='macro'),
        'gender': f1_score(g_true, g_pred, average='macro')
    }
}
metrics

  target_emotion = np.asscalar(target_emotion.numpy()[0])
  target_speaker = np.asscalar(target_speaker.numpy()[0])
  target_gender = np.asscalar(target_gender.numpy()[0])
  pred_labels_emotion = np.asscalar(pred_labels_emotion.cpu().numpy()[0])
  pred_labels_speaker = np.asscalar(pred_labels_speaker.cpu().numpy()[0])
  pred_labels_gender = np.asscalar(pred_labels_gender.cpu().numpy()[0])


{'accuracy': {'emotion': 0.7122060470324748,
  'speaker': 0.7816349384098544,
  'gender': 0.9686450167973124},
 'precision': {'emotion': 0.6846588096760864,
  'speaker': 0.7737766685940236,
  'gender': 0.9689705365657783},
 'recall': {'emotion': 0.6657777987390894,
  'speaker': 0.7713062396222561,
  'gender': 0.9686600254812853},
 'f1': {'emotion': 0.6727979791355514,
  'speaker': 0.7674540844137657,
  'gender': 0.9686402584632673}}

In [9]:
print(classification_report(e_true, e_pred))

              precision    recall  f1-score   support

       Anger       0.82      0.76      0.79       220
   Happiness       0.50      0.39      0.44       118
     Neutral       0.70      0.78      0.74       340
     Sadness       0.72      0.73      0.73       215

    accuracy                           0.71       893
   macro avg       0.68      0.67      0.67       893
weighted avg       0.71      0.71      0.71       893



In [10]:
print(classification_report(s_true, s_pred))

              precision    recall  f1-score   support

           0       0.81      0.68      0.74        90
           1       0.78      0.90      0.84        71
           2       0.61      0.70      0.65        63
           3       0.78      0.71      0.75        70
           4       0.82      0.85      0.84       114
           5       0.72      0.52      0.60        66
           6       0.74      0.91      0.82        94
           7       0.80      0.84      0.82        97
           8       0.81      0.89      0.85       103
           9       0.86      0.71      0.78       125

    accuracy                           0.78       893
   macro avg       0.77      0.77      0.77       893
weighted avg       0.78      0.78      0.78       893



In [11]:
print(classification_report(g_true, g_pred))

              precision    recall  f1-score   support

      Female       0.96      0.98      0.97       446
        Male       0.98      0.96      0.97       447

    accuracy                           0.97       893
   macro avg       0.97      0.97      0.97       893
weighted avg       0.97      0.97      0.97       893

