<a href="https://colab.research.google.com/github/dr-antimonious/GRU-Emotion-Classification/blob/main/RUSU_Projekt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Treniranje

In [None]:
##### IMPORTI, KONSTANTE, UTILITIES

%%capture

import os
import gc
import sys
import shutil
import math
import numpy as np
import tarfile
import librosa
import transformers
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from random import sample
import seaborn as sns

##### PyTorch

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.nn.utils.rnn import pad_sequence

import torchvision
from torchvision import transforms, utils

!pip install torchinfo
from torchinfo import summary

%matplotlib inline
%load_ext tensorboard

DRIVE =                 '/content/drive/MyDrive/LSSED/'   # KONSTANTA ZA DRIVE FOLDER GDJE JE POHRANJENO SVE ZA PROJEKT
WAV2VEC2_LAST =         '/content/Wav2Vec2_LAST/'         # KONSTANTA ZA FOLDER GDJE SU RASPAKIRANI WA2VEC2 FEATURES ZADNJEG SLOJA
WAV2VEC2_LAST_TRAIN =   WAV2VEC2_LAST + 'TRAIN/'            # ZA TRENIRANJE
WAV2VEC2_LAST_EVAL =    WAV2VEC2_LAST + 'EVAL/'             # ZA EVALUACIJU
WAV2VEC2_SAMPLE =       '/content/content/SAMPLE/'        # KONSTANTA ZA FOLDER GDJE SU RASPAKIRANI UZORCI WAV2VEC2 FEATURES-A
TRAIN_METADATA =        DRIVE + 'train_metadata.csv'      # KONSTANTA ZA TRAIN DATA FILE
EVAL_METADATA =         DRIVE + 'eval_metadata.csv'       # KONSTANTA ZA EVALUATION DATA FILE
SAMPLE_METADATA =       DRIVE + 'sample_metadata.csv'     # KONSTANTA ZA SAMPLE DATA FILE
WAV2VEC2_LAST_PACKED =  DRIVE + 'Wav2Vec2_LAST.tar.gz'    # KONSTANTA ZA ZAPAKIRANE WAV2VEC2 FEATURES ZADNJEG SLOJA
WAV2VEC2_SAM_PACKED =   DRIVE + 'Wav2Vec2_SAMPLE.tar.gz'  # KONSTANTA ZA ZAPAKIRANE UZORKE WAV2VEC2 FEATURES-A
WAV2VEC2_NAME =         'facebook/wav2vec2-large-xlsr-53' # KONSTANTA ZA IME WAV2VEC2 PRE-TRAINED MODELA
SAMPLING_RATE =         48000                             # KONSTANTA ZA SAMPLING RATE SNIMAKA
BATCH_SIZE =            128                               # KONSTANTA ZA VELIČINU BATCHA
DEVICE =                torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

WAV2VEC2_LAST_TUPLE =   (WAV2VEC2_LAST, WAV2VEC2_LAST_TRAIN, WAV2VEC2_LAST_EVAL, WAV2VEC2_LAST_PACKED, TRAIN_METADATA, EVAL_METADATA)
WAV2VEC2_SAM_TUPLE =    (WAV2VEC2_SAMPLE, WAV2VEC2_SAMPLE, WAV2VEC2_SAMPLE, WAV2VEC2_SAM_PACKED, SAMPLE_METADATA, SAMPLE_METADATA)

#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False

In [None]:
##### BRISANJE GOOGLE COLAB DEFAULT DATOTEKA

try:
  del_files = os.listdir('/content/sample_data/')
  for string in del_files:
    if os.path.exists("/content/sample_data/"+string):
      os.remove("/content/sample_data/"+string)
    else:
      print("Failed to remove "+string)

  if os.path.exists("/content/sample_data/"):
    os.rmdir("/content/sample_data/")
  else:
    print("Failed to remove sample_data directory")
except:
  pass

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

In [None]:
##### ODABIR DATASET-A

print('---------- PLEASE CHOOSE A DATASET TO BE USED IN THIS SESSION ----------')
print('Type "LAST" for LSSED dataset of features extracted from the final layer')
print('Type "TEST" for 1.2k sample LSSED dataset - ONLY FOR TESTING PURPOSES')
print('---------- PLEASE CHOOSE A DATASET TO BE USED IN THIS SESSION ----------')

while True:
  choice = input('Type in your choice: ')
  choice = choice.upper()

  if choice == 'LAST':
    WAV2VEC2_TUPLE = WAV2VEC2_LAST_TUPLE
    break
  elif choice == 'TEST':
    WAV2VEC2_TUPLE = WAV2VEC2_SAM_TUPLE
    break
  else:
    print(choice, 'is not a correct input.')

print('You chose:', choice)
print(WAV2VEC2_TUPLE)

WAV2VEC2, WAV2VEC2_TRAIN, WAV2VEC2_EVAL, WAV2VEC2_PACKED, TRAIN_METADATA_PATH, EVAL_METADATA_PATH = WAV2VEC2_TUPLE

In [None]:
##### RASPAKIRAVANJE DATASET-A ZA KORIŠTENJE U COLAB-U

file = tarfile.open(WAV2VEC2_PACKED, 'r:gz')
file.extractall('/content/')
file.close()

del file

In [None]:
##### INICIJALIZACIJA DATAFRAME-OVA KOJI POVEZUJU VVID-OVE FILE-OVA I EMOCIJE (LABELE)

train_metadata =  pd.read_csv(TRAIN_METADATA_PATH).drop("Unnamed: 0", axis = 1).reset_index().drop("index", axis = 1)
eval_metadata =   pd.read_csv(EVAL_METADATA_PATH).drop("Unnamed: 0", axis = 1).reset_index().drop("index", axis = 1)

In [None]:
##### DEFINIRANJE KATEGORIČKIH VRIJEDNOSTI

train_metadata['Age'] =     train_metadata['Age'].astype('category')
train_metadata['Gender'] =  train_metadata['Gender'].astype('category')
train_metadata['Emotion'] = train_metadata['Emotion'].astype('category')

eval_metadata['Age'] =      eval_metadata['Age'].astype('category')
eval_metadata['Gender'] =   eval_metadata['Gender'].astype('category')
eval_metadata['Emotion'] =  eval_metadata['Emotion'].astype('category')

In [None]:
##### KODIRANJE KATEGORIČKIH VRIJEDNOSTI

encoded_train =  pd.get_dummies(train_metadata['Emotion'])
encoded_eval =   pd.get_dummies(eval_metadata['Emotion'])

train_metadata = pd.concat([train_metadata, encoded_train], axis = 1)
eval_metadata = pd.concat([eval_metadata, encoded_eval], axis = 1)

In [None]:
##### KONVERTIRANJE IZ MAT FORMATA U ARRAY

def mat_to_array(mat_format):

  feature_array = [[element for element in upperElement] for upperElement in mat_format['w2v2']]
  return feature_array

In [None]:
##### UČITAVANJE FEATURE ARRAYA

def load_feature_array(mat_file):

  mat_format = loadmat(mat_file)
  feature_array = mat_to_array(mat_format)
  feature_array = torch.FloatTensor(feature_array)
  return feature_array

In [None]:
##### DEFINIRANJE CUSTOM DATASETA

class LSSED_Dataset(Dataset):
  """LSSED dataset."""

  def __init__(self, metadata, directory, transform = None):
    """
    Arguments:
        metadata (DataFrame):             Pandas DataFrame containing dataset information.
        directory (string):               Path to the directory with the feature array files.
        transform (class | list | None):  Data transformation options.
    """
    self.metadata = metadata
    self.directory = directory
    self.transform = transform

  def __len__(self):
    return len(self.metadata)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    path = self.directory + self.metadata['Vvid'][idx]
    emotion = [self.metadata['Angry'][idx], self.metadata['Happy'][idx], self.metadata['Neutral'][idx], self.metadata['Sad'][idx], self.metadata['Disgusted'][idx]]

    sample = {'path': path, 'emotion': emotion}

    return sample

In [None]:
##### INICIJALIZACIJA DATASETOVA

train_dataset = LSSED_Dataset(train_metadata, WAV2VEC2_TRAIN)
eval_dataset =  LSSED_Dataset(eval_metadata, WAV2VEC2_EVAL)

In [None]:
##### NOVA METODA ZA PADDING

def collate_fn(batch):

  data = [load_feature_array(batch[i]['path']) for i in range(batch.__len__())]
  target = [batch[i]['emotion'] for i in range(batch.__len__())]
  length = [data[i].shape[0] for i in range(batch.__len__())]

  data = pad_sequence(data, True, 0)
  target = torch.as_tensor(target)

  return (data, target, length)

In [None]:
##### INICIJALIZACIJA DATALOADERA

train_dataloader =  DataLoader(train_dataset, BATCH_SIZE, True, num_workers = 12, collate_fn = collate_fn)
eval_dataloader =   DataLoader(eval_dataset, BATCH_SIZE, False, num_workers = 12, collate_fn = collate_fn)

In [None]:
##### DEFINIRANJE KLASIFIKATORA

class EmotionClassifier(nn.Module):
  def __init__(self):
    super().__init__()

    self.norm = nn.LayerNorm(normalized_shape = 1024)
    self.rnn1 = nn.LSTM(input_size = 1024, hidden_size = 1024,
                        num_layers = 3, batch_first = True,
                        bidirectional = False)
    self.linear1 = nn.Linear(1024, 5)

  def forward(self, x, length):
    out = self.norm(x)
    out, _ = self.rnn1(out)

    # Many-to-one RNN mod
    try:
      _ = out.shape[2]
      indices = [i for i in range(out.shape[0])]
      out = out[indices, np.subtract(length, 1), :]
    except:
      out = out[np.subtract(length, 1), :]

    out = self.linear1(out)
    return out

In [None]:
##### INICIJALIZACIJA KLASIFIKATORA

model = EmotionClassifier().to(DEVICE)

In [None]:
def eval_model(model, loss_module, val_data_loader):
  ##### Set model to eval mode
  model.eval()

  ##### Initializing necessary variables
  true_preds, num_preds, eval_loss = 0.0, 0.0, 0.0
  true_zeros, true_ones, true_twos, true_threes, true_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # TP
  false_zeros, false_ones, false_twos, false_threes, false_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # FP
  missed_zeros, missed_ones, missed_twos, missed_threes, missed_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # FN

  ##### Deactivate gradients
  with torch.no_grad():
    for data_inputs, labels, lengths in val_data_loader:

      ##### Moving data to device
      data_inputs = data_inputs.to(DEVICE)
      labels = labels.to(DEVICE)
      _, labels = torch.max(labels.data, dim = 1)

      ##### Running the model on the input data
      output = model(data_inputs, lengths)

      ##### Calculating loss
      loss = loss_module(output, labels)

      ##### Calculating precision
      _, pred_labels = torch.max(output.data, dim = 1)

      true_preds += (pred_labels == labels).sum().item()

      true_zeros +=   torch.sum((pred_labels == 0) & (labels == 0)).item()
      true_ones +=    torch.sum((pred_labels == 1) & (labels == 1)).item()
      true_twos +=    torch.sum((pred_labels == 2) & (labels == 2)).item()
      true_threes +=  torch.sum((pred_labels == 3) & (labels == 3)).item()
      true_fours +=   torch.sum((pred_labels == 4) & (labels == 4)).item()

      false_zeros +=  torch.sum((pred_labels == 0) & (labels != 0)).item()
      false_ones +=   torch.sum((pred_labels == 1) & (labels != 1)).item()
      false_twos +=   torch.sum((pred_labels == 2) & (labels != 2)).item()
      false_threes += torch.sum((pred_labels == 3) & (labels != 3)).item()
      false_fours +=  torch.sum((pred_labels == 4) & (labels != 4)).item()

      missed_zeros +=   torch.sum((pred_labels != 0) & (labels == 0)).item()
      missed_ones +=    torch.sum((pred_labels != 1) & (labels == 1)).item()
      missed_twos +=    torch.sum((pred_labels != 2) & (labels == 2)).item()
      missed_threes +=  torch.sum((pred_labels != 3) & (labels == 3)).item()
      missed_fours +=   torch.sum((pred_labels != 4) & (labels == 4)).item()

      num_preds += labels.shape[0]

      eval_loss += loss.item() * len(data_inputs)

      ##### Cleaning up
      del data_inputs, labels, output, loss, pred_labels

  ##### Various processing
  eval_loss /= len(val_data_loader.dataset)
  eval_acc = true_preds / num_preds

  zeros_weight =  1164. / len(val_data_loader.dataset)
  ones_weight =   1181. / len(val_data_loader.dataset)
  twos_weight =   1200. / len(val_data_loader.dataset)
  threes_weight = 1205. / len(val_data_loader.dataset)
  fours_weight =  632. / len(val_data_loader.dataset)

  prec_zeros =  true_zeros / (true_zeros + false_zeros + 1e-10)
  prec_ones =   true_ones / (true_ones + false_ones + 1e-10)
  prec_twos =   true_twos / (true_twos + false_twos + 1e-10)
  prec_threes = true_threes / (true_threes + false_threes + 1e-10)
  prec_fours =  true_fours / (true_fours + false_fours + 1e-10)

  rec_zeros =   true_zeros / (true_zeros + missed_zeros + 1e-10)
  rec_ones =    true_ones / (true_ones + missed_ones + 1e-10)
  rec_twos =    true_twos / (true_twos + missed_twos + 1e-10)
  rec_threes =  true_threes / (true_threes + missed_threes + 1e-10)
  rec_fours =   true_fours / (true_fours + missed_fours + 1e-10)

  f1_zeros =  (2 * prec_zeros * rec_zeros) / (prec_zeros + rec_zeros + 1e-10)
  f1_ones =   (2 * prec_ones * rec_ones) / (prec_ones + rec_ones + 1e-10)
  f1_twos =   (2 * prec_twos * rec_twos) / (prec_twos + rec_twos + 1e-10)
  f1_threes = (2 * prec_threes * rec_threes) / (prec_threes + rec_threes + 1e-10)
  f1_fours =  (2 * prec_fours * rec_fours) / (prec_fours + rec_fours + 1e-10)

  eval_prec =  zeros_weight * prec_zeros + ones_weight * prec_ones + twos_weight * prec_twos + threes_weight * prec_threes + fours_weight * prec_fours
  eval_rec =   zeros_weight * rec_zeros + ones_weight * rec_ones + twos_weight * rec_twos + threes_weight * rec_threes + fours_weight * rec_fours
  eval_f1 =    zeros_weight * f1_zeros + ones_weight * f1_ones + twos_weight * f1_twos + threes_weight * f1_threes + fours_weight * f1_fours

  print("Eval zeros:", str(true_zeros + false_zeros))
  print("Eval ones:", str(true_ones + false_ones))
  print("Eval twos:", str(true_twos + false_twos))
  print("Eval threes:", str(true_threes + false_threes))
  print("Eval fours:", str(true_fours + false_fours))

  return eval_loss, eval_acc, eval_prec, eval_rec, eval_f1

In [None]:
def train_model_with_logger(model, loss_module, optimizer, scheduler, train_data_loader, val_data_loader, num_epochs=100, start_epoch=0, logging_dir='runs/our_experiment', name='task_1'):

  ##### Set model to train mode
  writer = SummaryWriter(logging_dir)
  %tensorboard --logdir runs/our_experiment
  model.train()

  ##### Saving each epoch as checkpoint
  if os.path.exists('/content/' + name + '/') == False:
    os.mkdir('/content/' + name + '/')

  ##### Training loop
  for epoch in range(start_epoch, num_epochs):

    ##### Initializing necessary variables
    true_preds, num_preds, epoch_loss = 0.0, 0.0, 0.0
    true_zeros, true_ones, true_twos, true_threes, true_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # TP
    false_zeros, false_ones, false_twos, false_threes, false_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # FP
    missed_zeros, missed_ones, missed_twos, missed_threes, missed_fours = 0.0, 0.0, 0.0, 0.0, 0.0 # FN

    for data_inputs, labels, lengths in tqdm(train_data_loader, 'Epoch %d'%(epoch + 1)):

      ##### Moving data to device
      data_inputs = data_inputs.to(DEVICE)
      labels = labels.to(DEVICE)
      _, labels = torch.max(labels.data, dim = 1)

      ##### Running the model on the input data
      output = model(data_inputs, lengths)

      ##### Calculating loss
      loss = loss_module(output, labels)

      ##### Calculating precision
      _, pred_labels = torch.max(output.data, dim = 1)

      true_preds += (pred_labels == labels).sum().item()

      true_zeros +=   torch.sum((pred_labels == 0) & (labels == 0)).item()
      true_ones +=    torch.sum((pred_labels == 1) & (labels == 1)).item()
      true_twos +=    torch.sum((pred_labels == 2) & (labels == 2)).item()
      true_threes +=  torch.sum((pred_labels == 3) & (labels == 3)).item()
      true_fours +=   torch.sum((pred_labels == 4) & (labels == 4)).item()

      false_zeros +=  torch.sum((pred_labels == 0) & (labels != 0)).item()
      false_ones +=   torch.sum((pred_labels == 1) & (labels != 1)).item()
      false_twos +=   torch.sum((pred_labels == 2) & (labels != 2)).item()
      false_threes += torch.sum((pred_labels == 3) & (labels != 3)).item()
      false_fours +=  torch.sum((pred_labels == 4) & (labels != 4)).item()

      missed_zeros +=   torch.sum((pred_labels != 0) & (labels == 0)).item()
      missed_ones +=    torch.sum((pred_labels != 1) & (labels == 1)).item()
      missed_twos +=    torch.sum((pred_labels != 2) & (labels == 2)).item()
      missed_threes +=  torch.sum((pred_labels != 3) & (labels == 3)).item()
      missed_fours +=   torch.sum((pred_labels != 4) & (labels == 4)).item()

      num_preds += labels.shape[0]

      ##### Propagation
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      epoch_loss += loss.item() * len(data_inputs)

      ##### Cleaning up
      del data_inputs, labels, output, loss, pred_labels

    ##### Metrics
    epoch_loss /= len(train_data_loader.dataset)
    epoch_acc =   true_preds / num_preds

    zeros_weight =  4663. / len(train_data_loader.dataset)
    ones_weight =   4724. / len(train_data_loader.dataset)
    twos_weight =   4800. / len(train_data_loader.dataset)
    threes_weight = 4822. / len(train_data_loader.dataset)
    fours_weight =  2536. / len(train_data_loader.dataset)

    prec_zeros =  true_zeros / (true_zeros + false_zeros + 1e-10)
    prec_ones =   true_ones / (true_ones + false_ones + 1e-10)
    prec_twos =   true_twos / (true_twos + false_twos + 1e-10)
    prec_threes = true_threes / (true_threes + false_threes + 1e-10)
    prec_fours =  true_fours / (true_fours + false_fours + 1e-10)

    rec_zeros =   true_zeros / (true_zeros + missed_zeros + 1e-10)
    rec_ones =    true_ones / (true_ones + missed_ones + 1e-10)
    rec_twos =    true_twos / (true_twos + missed_twos + 1e-10)
    rec_threes =  true_threes / (true_threes + missed_threes + 1e-10)
    rec_fours =   true_fours / (true_fours + missed_fours + 1e-10)

    f1_zeros =  (2 * prec_zeros * rec_zeros) / (prec_zeros + rec_zeros + 1e-10)
    f1_ones =   (2 * prec_ones * rec_ones) / (prec_ones + rec_ones + 1e-10)
    f1_twos =   (2 * prec_twos * rec_twos) / (prec_twos + rec_twos + 1e-10)
    f1_threes = (2 * prec_threes * rec_threes) / (prec_threes + rec_threes + 1e-10)
    f1_fours =  (2 * prec_fours * rec_fours) / (prec_fours + rec_fours + 1e-10)

    epoch_prec =  zeros_weight * prec_zeros + ones_weight * prec_ones + twos_weight * prec_twos + threes_weight * prec_threes + fours_weight * prec_fours
    epoch_rec =   zeros_weight * rec_zeros + ones_weight * rec_ones + twos_weight * rec_twos + threes_weight * rec_threes + fours_weight * rec_fours
    epoch_f1 =    zeros_weight * f1_zeros + ones_weight * f1_ones + twos_weight * f1_twos + threes_weight * f1_threes + fours_weight * f1_fours

    eval_loss, eval_acc, eval_prec, eval_rec, eval_f1 = eval_model(model, loss_module, val_data_loader)

    print(f"Training loss: {epoch_loss:4.5f}")
    print(f"Training accuracy: {100.0*epoch_acc:4.5f}%")
    print(f"Training precision: {100.0*epoch_prec:4.5f}%")
    print(f"Training recall: {100.0*epoch_rec:4.5f}%")
    print(f"Training F1-score: {100.0*epoch_f1:4.5f}%")
    print(f"Eval loss: {eval_loss:4.5f}")
    print(f"Eval accuracy: {100.0*eval_acc:4.5f}%")
    print(f"Eval precision: {100.0*eval_prec:4.5f}%")
    print(f"Eval recall: {100.0*eval_rec:4.5f}%")
    print(f"Eval F1-score: {100.0*eval_f1:4.5f}%")

    print("Train zeros:", str(true_zeros + false_zeros))
    print("Train ones:", str(true_ones + false_ones))
    print("Train twos:", str(true_twos + false_twos))
    print("Train threes:", str(true_threes + false_threes))
    print("Train fours:", str(true_fours + false_fours))

    writer.add_scalar('training_loss', epoch_loss, global_step = epoch + 1)
    writer.add_scalar('training_acc', epoch_acc, global_step = epoch + 1)
    writer.add_scalar('training_prec', epoch_prec, global_step = epoch + 1)
    writer.add_scalar('training_rec', epoch_rec, global_step = epoch + 1)
    writer.add_scalar('training_f1_score', epoch_f1, global_step = epoch + 1)
    writer.add_scalar('eval_loss', eval_loss, global_step = epoch + 1)
    writer.add_scalar('eval_acc', eval_acc, global_step = epoch + 1)
    writer.add_scalar('eval_prec', eval_prec, global_step = epoch + 1)
    writer.add_scalar('eval_rec', eval_rec, global_step = epoch + 1)
    writer.add_scalar('eval_f1_score', eval_f1, global_step = epoch + 1)

    state_dict = model.state_dict()
    torch.save(state_dict, "/content/" + name + "/model_" + name + '_' + str(epoch+1) + ".pt")
    model.train()
    scheduler.step()

  writer.close()

In [None]:
loss_module = nn.CrossEntropyLoss(weight = torch.FloatTensor(
    [21545./4663., 21545./4724., 21545./4800.,
     21545./4822., 21545./2536.]).to(DEVICE),
                                  label_smoothing = 0.1)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.00005,
                             amsgrad = True, fused = True)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                            step_size = 25, gamma = 0.5)

In [None]:
train_model_with_logger(model, loss_module, optimizer, scheduler, train_dataloader, eval_dataloader, num_epochs = 150, name = 'LSSED_RNN_experiment')

# Svaštara

In [None]:
del model
torch.cuda.empty_cache()
gc.collect()

501

# Za aplikaciju

In [None]:
##### PODEŠAVANJE WAV2VEC2

FEATURE_EXTRACTOR = transformers.Wav2Vec2FeatureExtractor.from_pretrained(WAV2VEC2_NAME)
WAV2VEC2_MODEL =    transformers.Wav2Vec2Model.from_pretrained(WAV2VEC2_NAME).to(DEVICE)

In [None]:
##### DEFINIRANJE KLASIFIKATORA

class EmotionClassifier(nn.Module):
  def __init__(self):
    super().__init__()

    self.rnn1 = nn.RNN(input_size = 1024, hidden_size = 1024, num_layers = 8, batch_first = True, bidirectional = False)
    self.linear1 = nn.Linear(1024, 4)

  def forward(self, x, length):
    out, _ = self.rnn1(x)

    # Many-to-one RNN mod
    try:
      _ = out.shape[2]
      indices = [i for i in range(out.shape[0])]
      out = out[indices, np.subtract(length, 1), :]
    except:
      out = out[np.subtract(length, 1), :]

    out = self.linear1(out)
    return out

In [None]:
##### DEFINIRANJE CIJELOG MODELA

class Emotioner(nn.Module):
  def __init__(self, feature_extractor, wav2vec2_model, emotion_classifier, sampling_rate = 16000):
    super().__init__()
    self.feature_extractor = feature_extractor
    self.wav2vec2_model = wav2vec2_model
    self.emotion_classifier = emotion_classifier
    self.sampling_rate = sampling_rate
    self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

  def set_feature_extractor(self, feature_extractor):
    self.feature_extractor = feature_extractor
    return self

  def set_wav2vec2_model(self, wav2vec2_model):
    self.wav2vec2_model = wav2vec2_model
    return self

  def set_emotion_classifier(self, emotion_classifier):
    self.emotion_classifier = emotion_classifier
    return self

  def set_sampling_rate(self, sampling_rate):
    self.sampling_rate = sampling_rate
    return self

  def set_device(self, device):
    self.device = device
    return self

  def extract_features(self, wav_array, sampling_rate, device):
    wavs_token = self.feature_extractor(raw_speech = [wav_array], sampling_rate = sampling_rate, do_normalize = True, return_tensors = 'pt').to(device)
    outputs = self.wav2vec2_model(**wavs_token, output_hidden_states = True)
    w2vlastfeat = outputs['last_hidden_state'].squeeze()
    feature_array = torch.FloatTensor(w2vlastfeat)
    feature_array = feature_array.to(device)
    return feature_array

  def forward(self, wav_array):
    features = self.extract_features(wav_array, self.sampling_rate, self.device)
    output = self.emotion_classifier(feature_array, feature_array.shape[0])
    _, pred_label = torch.max(output.data, dim = 1)
    return pred_label

# Pre-processing stuff

In [None]:
##### NE KORISTI SE
#
##### THRESHOLDING PREDUGIH I PADDINGANJE PREKRATKIH ARRAYA FEATURA, KONVERTIRANJE ARRAYA U TENSOR
#
#def feature_array_padding(feature_array):
#
#  if np.shape(feature_array)[0] <= THRESHOLD:
#    feature_array = np.pad(feature_array, ((0, THRESHOLD - np.shape(feature_array)[0]), (0, 0)), 'constant', constant_values=(0, 0))
#  else:
#    feature_array = feature_array[:THRESHOLD]
#
#  feature_array = torch.FloatTensor(feature_array)
#  feature_array = torch.unsqueeze(feature_array, 0)
#
#  return feature_array

In [None]:
##### PAKIRANJE SAMPLE DATASET-A
#
#DIR = WAV2VEC2_LAST_TEST
#files = os.listdir(DIR)
#tar_SAMPLE = tarfile.open('/content/Wav2Vec2_SAMPLE.tar.gz', 'x:gz')
#for file in tqdm(files):
#   tar_SAMPLE.add(DIR + file, file)
#tar_SAMPLE.close()

In [None]:
#import shutil
#
#shutil.copy('/content/Wav2Vec2_SAMPLE.tar.gz', WAV2VEC2_SAM_PACKED)
#shutil.copy('/content/sample_metadata.csv', SAMPLE_METADATA)

In [None]:
#import shutil
#shutil.copy('/content/LSSED_Experiment/model_LSSED_Experiment_1.pt', DRIVE + 'model_LSSED_Experiment_1.pt')

In [None]:
##### NE POKRETATI - SPAJANJE DIJELOVA PREUZETOG DATASET-A NAZAD U CJELINU
#
# NIJE PYTHON NEGO CMD NAREDBA
#
#copy /B wav2vec2.tar.gz.0 + wav2vec2.tar.gz.1 + wav2vec2.tar.gz.2 D:\wav2vec2.tar.gz

In [None]:
##### NE POKRETATI - OTPAKIRAVANJE ORIGINALNOG DATASETA NA RAČUNALU
#
#file = tarfile.open("D:\\wav2vec2.tar.gz")
#members = file.getmembers()
#progress = tqdm(members)
#
#for member in progress:
#    file.extract(member, "D:\\wav2vec2")
#    progress.set_description(f"Extracting {member.name}")
#
#file.close()

In [None]:
##### NE POKRETATI - ČIŠĆENJE DATASETA OD EMOCIJA 'OTHER', 'FEAR', I 'SURPRISE'
#
#BASE_PATH = "D:\\wav2vec2\\148Dataset\\data-fan.weiquan\\datasets\\1003\\feature\\wav2vec2\\"
#
#df = pd.read_csv("D:\\wav2vec2_metadata.csv")
#removal = df[(df['Emotion'] == 'Other') | (df['Emotion'] == 'Fear') | (df['Emotion'] == 'Surprise')]['Vvid'].to_list()
#print(removal.__len__())
#
#for item in tqdm(removal):
#    os.remove(BASE_PATH+item)
#    os.remove(BASE_PATH+item+"_12")
#
##### PROČIŠĆENO I OD 'unknown' DOBI - SADA POSTOJE SAMO 'Midlife', 'Younger', 'Old'

In [None]:
##### NE POKRETATI - NORMALIZACIJA DATASETA I PODJELA NA TRAIN, EVAL, TEST
#
#BASE_PATH = "D:\\wav2vec2\\148Dataset\\data-fan.weiquan\\datasets\\1003\\feature\\wav2vec2\\"
#EMOTIONS = ['Angry', 'Bored', 'Disappointed', 'Disgusted', 'Excited', 'Happy', 'Neutral', 'Sad']
#GENDERS = ['Female', 'Male']
#AGES = ['Midlife', 'Old', 'Younger']
#TRAIN_COUNTS = [1016, 344, 1015, 782, 176, 747, 420, 125, 555, 354, 67, 515, 1015, 542, 1015, 1015, 292, 1015, 508, 109, 735, 398, 75, 394, 782, 156, 1015, 561, 108, 767, 1015, 945, 1015, 1015, 634, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 1015, 822, 1015, 1015, 369, 1015]
#EVAL_COUNTS = [290, 98, 290, 223, 50, 213, 120, 35, 158, 101, 19, 147, 290, 155, 290, 290, 83, 290, 145, 31, 210, 113, 21, 112, 223, 44, 290, 160, 31, 219, 290, 270, 290, 290, 181, 290, 290, 290, 290, 290, 290, 290, 290, 235, 290, 290, 105, 290]
#TEST_COUNTS = [145, 49, 145, 112, 25, 107, 60, 18, 79, 50, 9, 73, 145, 77, 145, 145, 42, 145, 72, 16, 105, 57, 11, 56, 112, 22, 145, 80, 15, 110, 145, 135, 145, 145, 90, 145, 145, 145, 145, 145, 145, 145, 145, 117, 145, 145, 53, 145]
#
#files = os.listdir(BASE_PATH)
#df = pd.read_csv("D:\\wav2vec2_metadata.csv").drop("Unnamed: 0", axis = 1)
#
#i = 1
#training_vvids = np.array(sample(df[(df['Emotion'] == 'Angry') & (df['Gender'] == 'Female') & (df['Age'] == 'Midlife')]['Vvid'].to_list(), TRAIN_COUNTS[0]))
#for emotion in EMOTIONS:
#    for gender in GENDERS:
#        for age in AGES:
#            if not (emotion == 'Angry' and gender == 'Female' and age == 'Midlife'):
#                training_vvids = np.append(training_vvids, np.array(sample(df[(df['Emotion'] == emotion) & (df['Gender'] == gender) & (df['Age'] == age)]['Vvid'].to_list(), TRAIN_COUNTS[i])))
#                i += 1
#training_df = df.loc[df['Vvid'].isin(training_vvids)].reset_index().drop("index", axis = 1)
#print(training_df)
#print(training_df.describe())
#training_df.to_csv("D:\\train_metadata.csv")
#
#no_train_df = df.loc[df['Vvid'].isin([vvid for vvid in files if vvid not in training_vvids])]
#eval_vvids = np.array(sample(no_train_df[(no_train_df['Emotion'] == 'Angry') & (no_train_df['Gender'] == 'Female') & (no_train_df['Age'] == 'Midlife')]['Vvid'].to_list(), EVAL_COUNTS[0]))
#
#i = 1
#for emotion in EMOTIONS:
#    for gender in GENDERS:
#        for age in AGES:
#            if not (emotion == 'Angry' and gender == 'Female' and age == 'Midlife'):
#                eval_vvids = np.append(eval_vvids, np.array(sample(no_train_df[(no_train_df['Emotion'] == emotion) & (no_train_df['Gender'] == gender) & (no_train_df['Age'] == age)]['Vvid'].to_list(), EVAL_COUNTS[i])))
#                i += 1
#eval_df = no_train_df.loc[no_train_df['Vvid'].isin(eval_vvids)].reset_index().drop("index", axis = 1)
#print(eval_df)
#print(eval_df.describe())
#val_df.to_csv("D:\\eval_metadata.csv")
#
#no_test_train_df = no_train_df.loc[no_train_df['Vvid'].isin([vvid for vvid in files if vvid not in eval_vvids])].reset_index()
#test_vvids = np.array(sample(no_test_train_df[(no_test_train_df['Emotion'] == 'Angry') & (no_test_train_df['Gender'] == 'Female') & (no_test_train_df['Age'] == 'Midlife')]['Vvid'].to_list(), TEST_COUNTS[0]))
#
#i = 1
#for emotion in EMOTIONS:
#    for gender in GENDERS:
#        for age in AGES:
#            if not (emotion == 'Angry' and gender == 'Female' and age == 'Midlife'):
#               test_vvids = np.append(test_vvids, np.array(sample(no_test_train_df[(no_test_train_df['Emotion'] == emotion) & (no_test_train_df['Gender'] == gender) & (no_test_train_df['Age'] == age)]['Vvid'].to_list(), TEST_COUNTS[i])))
#               i += 1
#test_df = no_test_train_df.loc[no_test_train_df['Vvid'].isin(test_vvids)].reset_index().drop("index", axis = 1).drop("level_0", axis = 1)
#print(test_df)
#print(test_df.describe())
#test_df.to_csv("D:\\test_metadata.csv")
#
#all_vvids = np.append(training_vvids, eval_vvids)
#all_vvids = np.append(all_vvids, test_vvids)
#print(np.shape(all_vvids))
#
#unused_vvids = df['Vvid'].to_numpy()
#unused_vvids = np.setdiff1d(unused_vvids, all_vvids)
#print(np.shape(unused_vvids))
#
#for item in tqdm(unused_vvids):
#    os.remove(BASE_PATH+item)
#    os.remove(BASE_PATH+item+"_12")

In [None]:
##### NE POKRETATI - ANALIZIRANJE DATASETA
#
#BASE_PATH = "D:\\wav2vec2\\148Dataset\\data-fan.weiquan\\datasets\\1003\\feature\\wav2vec2\\"
#
#df = pd.concat([pd.read_csv("D:\\train_metadata.csv"), pd.read_csv("D:\\eval_metadata.csv"), pd.read_csv("D:\\test_metadata.csv")]).reset_index()
#print(df['Vvid'][0])
#
#new_el = loadmat(BASE_PATH + df['Vvid'][0])
#new_el = [[element for element in upperElement] for upperElement in new_el['w2v2']]
#lengths = np.array([new_el.__len__()])
#
#for i in tqdm(range(1, df.__len__())):
#    new_el = loadmat(BASE_PATH + df['Vvid'][i])
#    new_el = [[element for element in upperElement] for upperElement in new_el['w2v2']]
#    lengths = np.append(lengths, new_el.__len__())
#
#np.save("D:\\filtered_dataset_feature_counts.npy", lengths)
#plt.hist(lengths, np.unique(lengths).__len__())
#plt.show()

In [None]:
##### NE POKRETATI - PAKIRANJE OBRAĐENOG DATASETA ZA UPLOADANJE
#
#TRAIN = "TRAIN\\"
#EVAL = "EVAL\\"
#TEST = "TEST\\"
#
#WAV2VEC2_LAST = "D:\\Wav2Vec2_LAST\\"
#W2V2_LAST_TRAIN = WAV2VEC2_LAST + TRAIN
#W2V2_LAST_EVAL = WAV2VEC2_LAST + EVAL
#W2V2_LAST_TEST = WAV2VEC2_LAST + TEST
#W2V2_LAST_DIRECTORIES = [W2V2_LAST_TRAIN, W2V2_LAST_EVAL, W2V2_LAST_TEST]
#
#WAV2VEC2_12 = "D:\\Wav2Vec2_12\\"
#W2V2_12_TRAIN = WAV2VEC2_12 + TRAIN
#W2V2_12_EVAL = WAV2VEC2_12 + EVAL
#W2V2_12_TEST = WAV2VEC2_12 + TEST
#W2V2_12_DIRECTORIES = [W2V2_12_TRAIN, W2V2_12_EVAL, W2V2_12_TEST]
#
#tar_12 = tar.open("D:\\Wav2Vec2_12.tar.gz", 'x:gz')
#for dir in W2V2_12_DIRECTORIES:
#    files = os.listdir(dir)
#    print("Processing " + dir + " ...")
#    for file in tqdm(files):
#        tar_12.add(dir + file)
#tar_12.close()
#
#tar_LAST = tar.open("D:\\Wav2Vec2_LAST.tar.gz", 'x:gz')
#for dir in W2V2_LAST_DIRECTORIES:
#    files = os.listdir(dir)
#    print("Processing " + dir + " ...")
#    for file in tqdm(files):
#        tar_LAST.add(dir + file)
#tar_LAST.close()