In [1]:
import os
import torch

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


emotions_dict = {
    '0': 'neutral',
    '1': 'calm',
    '2': 'happy',
    '3': 'sad',
    '4': 'angry',
    '5': 'fearful',
    '6': 'disgust',
    '7': 'surprised'
}


sub_name = 'MLT-DNet'
data_name = 'RAVDESS'

minibatch = 32
num_epochs = 500
EARLY_CNT = 500

learning_rate = 4e-3

In [2]:
from cv2 import transform
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
from einops import rearrange
import torch.nn.functional as F
from torchinfo import summary
from tqdm.auto import tqdm
import math

In [3]:
def get_padding(kernel_size: int, stride: int, dilation: int) -> int:
    if stride > 1 and dilation > 1:
        raise ValueError('Only stride OR dilation may be greater than 1')
    return (kernel_size // 2) * dilation

In [4]:
class Model(nn.Module):
    """Some Information about Model"""
    def __init__(self, num_emotions):
        super(Model, self).__init__()

        self.dcnn = nn.Sequential(
            nn.Conv1d(1, 1, kernel_size=3, stride=1, dilation=3, padding=get_padding(kernel_size=3, stride=1, dilation=3)),
            nn.Dropout(0.25)
        )
        
        self.rbsc = nn.Sequential(
            nn.Conv1d(1, 1, kernel_size=3, stride=1, dilation=3, padding=get_padding(kernel_size=3, stride=1, dilation=3)),
            nn.BatchNorm1d(1),
            nn.LeakyReLU(),
            
            nn.Conv1d(1, 1, kernel_size=3, stride=1, dilation=3, padding=get_padding(kernel_size=3, stride=1, dilation=3)),
            nn.BatchNorm1d(1),
            nn.LeakyReLU(),
            
            nn.Conv1d(1, 1, kernel_size=3, stride=1, dilation=3, padding=get_padding(kernel_size=3, stride=1, dilation=3)),
            nn.BatchNorm1d(1),
            nn.LeakyReLU(),
        )
        
        self.gru1 = nn.GRU(input_size=15000, hidden_size=1024, num_layers=4, batch_first=True, bidirectional=True, dropout=0.25)
        self.gru2 = nn.GRU(input_size=2048, hidden_size=1024, num_layers=4, batch_first=True, bidirectional=True, dropout=0.25)
        self.gru3 = nn.GRU(input_size=2048, hidden_size=1024, num_layers=4, batch_first=True, bidirectional=True, dropout=0.25)
        
        
        self.fcn = nn.Sequential(
            nn.Linear(17048, num_emotions),
            nn.LeakyReLU(),
            nn.Dropout(0.25)
        )
        
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        dcnn_out = self.dcnn(x)
        
        rbsc_out = self.rbsc(dcnn_out)
        rbsc_out += dcnn_out
        
        gru_out, _ = self.gru1(dcnn_out)
        gru_out, _ = self.gru2(gru_out)
        gru_out, _ = self.gru3(gru_out)
        
        rbsc_out = rbsc_out.squeeze(1)
        gru_out = gru_out.squeeze(1)
        
        complete_embd = torch.cat([rbsc_out, gru_out], dim=1)
        
        output_logits = self.fcn(complete_embd)
        output_softmax = self.softmax(output_logits)
        
        return output_logits, output_softmax

In [5]:
def make_train_step(model, criterion, optimizer):

    # define the training step of the training phase
    def train_step(X, Y):

        # forward pass
        output_logits, output_softmax = model(X)
        predictions = torch.argmax(output_softmax, dim=1)
        accuracy = torch.sum(Y == predictions)/float(len(Y))

        # compute loss on logits because nn.CrossEntropyLoss implements log softmax
        loss = criterion(output_logits, Y)

        # compute gradients for the optimizer to use
        loss.backward()

        # update network parameters based on gradient stored (by calling loss.backward())
        optimizer.step()

        # zero out gradients for next pass
        # pytorch accumulates gradients from backwards passes (convenient for RNNs)
        optimizer.zero_grad()

        return loss.item(), accuracy*100
    return train_step

In [6]:
def make_validate_fnc(model, criterion):
    def validate(X, Y):

        # don't want to update any network parameters on validation passes: don't need gradient
        # wrap in torch.no_grad to save memory and compute in validation phase:
        with torch.no_grad():

            # set model to validation phase i.e. turn off dropout and batchnorm layers
            model.eval()
            
            # get the model's predictions on the validation set
            output_logits, output_softmax = model(X)
            predictions = torch.argmax(output_softmax, dim=1)

            # calculate the mean accuracy over the entire validation set
            accuracy = torch.sum(Y == predictions)/float(len(Y))

            # compute error from logits (nn.crossentropy implements softmax)
            loss = criterion(output_logits, Y)

        return loss.item(), accuracy*100, predictions
    return validate

In [7]:
def make_save_checkpoint():
    def save_checkpoint(optimizer, model, epoch, filename):
        checkpoint_dict = {
            'optimizer': optimizer.state_dict(),
            'model': model.state_dict(),
            'epoch': epoch
        }
        torch.save(checkpoint_dict, filename)
    return save_checkpoint


def load_checkpoint(optimizer, model, filename):
    checkpoint_dict = torch.load(filename)
    epoch = checkpoint_dict['epoch']
    model.load_state_dict(checkpoint_dict['model'])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint_dict['optimizer'])
    return epoch

In [8]:
def criterion(predictions, targets):
    return nn.CrossEntropyLoss()(input=predictions, target=targets)

In [9]:
from einops import rearrange

def train(model, num_epochs, X_train, Y_train, X_valid, Y_valid, pkl_name, fold, optimizer):
    
    # instantiate lists to hold scalar performance metrics to plot later
    train_losses = []
    valid_losses = []

    # get training set size to calculate # iterations and minibatch indices
    train_size = X_train.shape[0]

    # instantiate the checkpoint save function
    save_checkpoint = make_save_checkpoint()

    # instantiate the training step function
    train_step = make_train_step(model, criterion, optimizer=optimizer)
    # instantiate the validation loop function
    validate = make_validate_fnc(model, criterion)

    cur_valid_loss = 0x3f3f3f
    early_stop_cnt = 0
    
    for epoch in tqdm(range(num_epochs), desc=f'{fold}'):

        if early_stop_cnt >= EARLY_CNT:
            break
        
        # set model to train phase
        model.train()

        # shuffle entire training set in each epoch to randomize minibatch order
        train_indices = np.random.permutation(train_size)

        # shuffle the training set for each epoch:
        X_train = X_train[train_indices, :, :]
        Y_train = Y_train[train_indices]

        # instantiate scalar values to keep track of progress after each epoch so we can stop training when appropriate
        epoch_acc = 0
        epoch_loss = 0
        num_iterations = int(train_size / minibatch)

        # create a loop for each minibatch of 32 samples:
        for i in range(num_iterations):

            # we have to track and update minibatch position for the current minibatch
            # if we take a random batch position from a set, we almost certainly will skip some of the data in that set
            # track minibatch position based on iteration number:
            batch_start = i * minibatch
            # ensure we don't go out of the bounds of our training set:
            batch_end = min(batch_start + minibatch, train_size)
            # ensure we don't have an index error
            actual_batch_size = batch_end-batch_start

            # get training minibatch with all channnels and 2D feature dims
            X = X_train[batch_start:batch_end, :, :]
            # get training minibatch labels
            Y = Y_train[batch_start:batch_end]
            
            # instantiate training tensors
            X_tensor = torch.tensor(X, device=device).float()
            Y_tensor = torch.tensor(Y, dtype=torch.long, device=device)
            
            # Pass input tensors thru 1 training step (fwd+backwards pass)
            loss, acc = train_step(X_tensor, Y_tensor)

            # aggregate batch accuracy to measure progress of entire epoch
            epoch_acc += acc * actual_batch_size / train_size
            epoch_loss += loss * actual_batch_size / train_size

            # keep track of the iteration to see if the model's too slow
            print('\r'+f'Epoch {epoch}: iteration {i}/{num_iterations}', end='')

        X_valid_tensor = torch.tensor(X_valid, device=device).float()
        Y_valid_tensor = torch.tensor(Y_valid, dtype=torch.long, device=device)
        
        # calculate validation metrics to keep track of progress; don't need predictions now
        valid_loss, valid_acc, _ = validate(X_valid_tensor, Y_valid_tensor)
        
        # accumulate scalar performance metrics at each epoch to track and plot later
        train_losses.append(epoch_loss)
        valid_losses.append(valid_loss)
        
        if cur_valid_loss > valid_loss:
            save_checkpoint(optimizer, model, epoch, pkl_name)
            cur_valid_loss = valid_loss
        else:
            early_stop_cnt += 1

        # keep track of each epoch's progress
        print(f'\nEpoch {epoch} --- loss:{epoch_loss:.3f}, Epoch accuracy:{epoch_acc:.2f}%, Validation loss:{valid_loss:.3f}, Validation accuracy:{valid_acc:.2f}%')

        
    print(f"\n\n[*] done !")
    print(f'[*] Best training loss - {min(train_losses)}')
    print(f'[*] Best validation loss - {min(valid_losses)}')
    
    return train_losses, valid_losses

In [10]:
import csv
import numpy as np
from PIL import Image
from skimage.io import imread
from skimage.transform import resize

def get_fold_dataset(npy_name):
    with open(npy_name, 'rb') as f:
        X_train = np.load(f)
        y_train = np.load(f)
        X_test = np.load(f)
        y_test = np.load(f)
    
    return X_train, y_train, X_test, y_test

In [11]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

train_losses, valid_losses = [], []

print(f'[*] model training - {sub_name}')

fold_path = 'PCNSE-SADRN-CTC dataset/1d_'

for fold in range(5):
    cur_fold_path = fold_path + 'fold'+str(fold)+'.npy'
    X_train, y_train, X_test, y_test = get_fold_dataset(cur_fold_path)
    
    X_train = np.expand_dims(X_train, axis=1)
    X_test = np.expand_dims(X_test, axis=1)
    
    pkl_name = f'./model/{data_name}-{sub_name}-{fold}.pkl'
    model = Model(num_emotions=len(emotions_dict)).to('cuda')
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)
    
    train_losses, valid_losses = train(model, num_epochs, X_train, y_train, X_test, y_test, pkl_name, fold, optimizer)
    
    plt.title('Loss Curve')
    plt.ylabel('Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.plot(train_losses[:],'b')
    plt.plot(valid_losses[:],'r')
    plt.legend(['Training loss','Validation loss'])
    plt.show()
    
    break

[*] model training - MLT-DNet


0:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch 0: iteration 34/35
Epoch 0 --- loss:269.815, Epoch accuracy:12.81%, Validation loss:197.940, Validation accuracy:14.67%
Epoch 1: iteration 34/35
Epoch 1 --- loss:97.466, Epoch accuracy:26.93%, Validation loss:52.770, Validation accuracy:13.67%
Epoch 2: iteration 34/35
Epoch 2 --- loss:12.034, Epoch accuracy:28.77%, Validation loss:11.909, Validation accuracy:13.33%
Epoch 3: iteration 34/35
Epoch 3 --- loss:6.027, Epoch accuracy:28.42%, Validation loss:5.900, Validation accuracy:14.00%
Epoch 4: iteration 34/35
Epoch 4 --- loss:3.872, Epoch accuracy:26.84%, Validation loss:4.769, Validation accuracy:13.33%
Epoch 5: iteration 34/35
Epoch 5 --- loss:3.533, Epoch accuracy:27.28%, Validation loss:4.381, Validation accuracy:13.67%
Epoch 6: iteration 34/35
Epoch 6 --- loss:3.524, Epoch accuracy:26.32%, Validation loss:4.134, Validation accuracy:14.00%
Epoch 7: iteration 34/35
Epoch 7 --- loss:3.312, Epoch accuracy:27.72%, Validation loss:4.302, Validation accuracy:13.00%
Epoch 8: iterati

Epoch 66: iteration 34/35
Epoch 66 --- loss:0.978, Epoch accuracy:52.37%, Validation loss:6.375, Validation accuracy:13.67%
Epoch 67: iteration 34/35
Epoch 67 --- loss:1.095, Epoch accuracy:51.14%, Validation loss:5.002, Validation accuracy:13.67%
Epoch 68: iteration 34/35
Epoch 68 --- loss:1.041, Epoch accuracy:49.30%, Validation loss:4.592, Validation accuracy:14.00%
Epoch 69: iteration 34/35
Epoch 69 --- loss:0.995, Epoch accuracy:51.40%, Validation loss:5.341, Validation accuracy:14.67%
Epoch 70: iteration 34/35
Epoch 70 --- loss:1.104, Epoch accuracy:54.65%, Validation loss:4.961, Validation accuracy:14.00%
Epoch 71: iteration 34/35
Epoch 71 --- loss:1.082, Epoch accuracy:48.77%, Validation loss:4.193, Validation accuracy:14.33%
Epoch 72: iteration 34/35
Epoch 72 --- loss:1.039, Epoch accuracy:53.77%, Validation loss:5.352, Validation accuracy:15.00%
Epoch 73: iteration 34/35
Epoch 73 --- loss:0.944, Epoch accuracy:53.95%, Validation loss:5.132, Validation accuracy:13.33%
Epoch 74

Epoch 132: iteration 34/35
Epoch 132 --- loss:0.487, Epoch accuracy:71.32%, Validation loss:6.345, Validation accuracy:12.00%
Epoch 133: iteration 34/35
Epoch 133 --- loss:0.646, Epoch accuracy:73.77%, Validation loss:7.097, Validation accuracy:12.67%
Epoch 134: iteration 34/35
Epoch 134 --- loss:0.536, Epoch accuracy:72.19%, Validation loss:6.962, Validation accuracy:12.00%
Epoch 135: iteration 34/35
Epoch 135 --- loss:0.493, Epoch accuracy:73.33%, Validation loss:7.364, Validation accuracy:12.67%
Epoch 136: iteration 34/35
Epoch 136 --- loss:0.435, Epoch accuracy:74.82%, Validation loss:8.117, Validation accuracy:12.00%
Epoch 137: iteration 34/35
Epoch 137 --- loss:0.480, Epoch accuracy:74.65%, Validation loss:7.657, Validation accuracy:12.33%
Epoch 138: iteration 34/35
Epoch 138 --- loss:0.398, Epoch accuracy:76.40%, Validation loss:7.932, Validation accuracy:12.33%
Epoch 139: iteration 34/35
Epoch 139 --- loss:0.438, Epoch accuracy:74.91%, Validation loss:8.681, Validation accuracy

Epoch 197: iteration 34/35
Epoch 197 --- loss:0.287, Epoch accuracy:81.67%, Validation loss:9.442, Validation accuracy:13.67%
Epoch 198: iteration 34/35
Epoch 198 --- loss:0.357, Epoch accuracy:81.32%, Validation loss:9.598, Validation accuracy:13.00%
Epoch 199: iteration 34/35
Epoch 199 --- loss:0.422, Epoch accuracy:81.67%, Validation loss:8.232, Validation accuracy:13.67%
Epoch 200: iteration 34/35
Epoch 200 --- loss:0.328, Epoch accuracy:78.25%, Validation loss:8.157, Validation accuracy:13.33%
Epoch 201: iteration 34/35
Epoch 201 --- loss:0.356, Epoch accuracy:79.56%, Validation loss:9.341, Validation accuracy:13.67%
Epoch 202: iteration 34/35
Epoch 202 --- loss:0.316, Epoch accuracy:80.79%, Validation loss:8.528, Validation accuracy:14.67%
Epoch 203: iteration 34/35
Epoch 203 --- loss:0.337, Epoch accuracy:80.44%, Validation loss:9.311, Validation accuracy:12.00%
Epoch 204: iteration 34/35
Epoch 204 --- loss:0.356, Epoch accuracy:80.35%, Validation loss:8.255, Validation accuracy

Epoch 262: iteration 34/35
Epoch 262 --- loss:0.279, Epoch accuracy:81.05%, Validation loss:6.808, Validation accuracy:16.00%
Epoch 263: iteration 34/35
Epoch 263 --- loss:0.271, Epoch accuracy:82.89%, Validation loss:7.140, Validation accuracy:15.67%
Epoch 264: iteration 34/35
Epoch 264 --- loss:0.299, Epoch accuracy:81.14%, Validation loss:8.548, Validation accuracy:15.67%
Epoch 265: iteration 34/35
Epoch 265 --- loss:0.284, Epoch accuracy:81.58%, Validation loss:8.714, Validation accuracy:15.67%
Epoch 266: iteration 34/35
Epoch 266 --- loss:0.268, Epoch accuracy:82.72%, Validation loss:7.671, Validation accuracy:16.00%
Epoch 267: iteration 34/35
Epoch 267 --- loss:0.350, Epoch accuracy:81.23%, Validation loss:7.860, Validation accuracy:15.00%
Epoch 268: iteration 34/35
Epoch 268 --- loss:0.277, Epoch accuracy:82.11%, Validation loss:6.883, Validation accuracy:14.33%
Epoch 269: iteration 34/35
Epoch 269 --- loss:0.470, Epoch accuracy:81.40%, Validation loss:5.403, Validation accuracy

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

device = 'cuda'
model = Model(num_emotions=len(emotions_dict)).to('cuda')

test_accs = []

for fold in range(5):
    pkl_name = f'./model/{data_name}-{sub_name}-{fold}.pkl'
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)
    
    load_checkpoint(optimizer, model, pkl_name)
    
    validate = make_validate_fnc(model, criterion)
    
    cur_fold_path = os.path.join(fold_path, 'fold'+str(fold)+'.npy')
    X_train, y_train, X_test, y_test = get_fold_dataset(cur_fold_path)
    
    X_test_tensor = torch.tensor(X_test,device=device).float()
    # X_test_tensor = X_test_tensor.permute(0, 3, 1, 2)
    y_test_tensor = torch.tensor(y_test,dtype=torch.long,device=device)
    
    test_loss, test_acc, predicted_emotions = validate(X_test_tensor, y_test_tensor)
    print(f'[*] {fold} Test accuracy is {test_acc:.2f}%')
    test_accs.append(test_acc)
    
    predicted_emotions = predicted_emotions.cpu().numpy()
    # use labels from test set
    emotions_groundtruth = y_test_tensor.cpu().numpy()
    
    # build confusion matrix and normalized confusion matrix
    conf_matrix = confusion_matrix(emotions_groundtruth, predicted_emotions)
    conf_matrix_norm = confusion_matrix(emotions_groundtruth, predicted_emotions,normalize='true')

    # set labels for matrix axes from emotions
    emotion_names = [emotion for emotion in emotions_dict.values()]

    # make a confusion matrix with labels using a DataFrame
    confmatrix_df = pd.DataFrame(conf_matrix, index=emotion_names, columns=emotion_names)
    confmatrix_df_norm = pd.DataFrame(conf_matrix_norm, index=emotion_names, columns=emotion_names)

    # plot confusion matrices
    plt.figure(figsize=(16,6))
    sn.set(font_scale=1.8) # emotion label and title size
    plt.subplot(1,2,1)
    plt.title('Confusion Matrix')
    sn.heatmap(confmatrix_df, annot=True, annot_kws={"size": 18}) #annot_kws is value font
    plt.subplot(1,2,2)
    plt.title('Normalized Confusion Matrix')
    sn.heatmap(confmatrix_df_norm, annot=True, annot_kws={"size": 13}) #annot_kws is value font

    plt.show()
    
    break

In [None]:
def Average(lst):
    return sum(lst) / len(lst)

print('[*] K-Fold training done !')
print(f'[*] Best fold acc : {max(test_accs):.2f}%')
print(f'[*] Worst fold acc : {min(test_accs):.2f}%')
print(f'[*] Average fold acc : {Average(test_accs):.2f}%')

In [None]:
from sklearn.metrics import classification_report

device = torch.device('cpu')
model = model.to(device)

test_accs = []

for fold in range(5):
    pkl_name = f'./model/{data_name}-{sub_name}-{fold}.pkl'
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)
    
    load_checkpoint(optimizer, model, pkl_name)
    
    validate = make_validate_fnc(model, criterion)
    
    cur_fold_path = os.path.join(fold_path, 'fold'+str(fold)+'.npy')
    X_train, y_train, X_test, y_test = get_fold_dataset(cur_fold_path)
    
    X_test_tensor = torch.tensor(X_test).float()
    # X_test_tensor = X_test_tensor.permute(0, 3, 1, 2)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)
    
    test_loss, test_acc, predicted_emotions = validate(X_test_tensor, y_test_tensor)
    print(f'[*] {fold} Test accuracy is {test_acc:.2f}%')
    test_accs.append(test_acc)
    
    predicted_emotions = predicted_emotions.cpu().numpy()
    # use labels from test set
    emotions_groundtruth = y_test_tensor

    print(classification_report(emotions_groundtruth, predicted_emotions))
    
    break