In [None]:
import os
import torch

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


emotions_dict = {
    '0': 'neutral',
    '1': 'calm',
    '2': 'happy',
    '3': 'sad',
    '4': 'angry',
    '5': 'fearful',
    '6': 'disgust',
    '7': 'surprised'
}


sub_name = 'GM-TCNet'
data_name = 'RAVDESS'
fold_path = sub_name + ' dataset'

loss_name = 'CE loss'

minibatch = 32
num_epochs = 300
EARLY_CNT = 99999

learning_rate = 0.001

In [None]:
from cv2 import transform
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
from einops import rearrange
import torch.nn.functional as F
from torchinfo import summary
from tqdm.auto import tqdm
import math
from sklearn.metrics import classification_report
import random
from torchaudio.functional import compute_deltas

In [None]:
def set_seed(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
    if torch.backends.cudnn.is_available:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    print(f'# SEEDING DONE : seed is {seed_value} ')

set_seed(2022)

In [None]:
def get_padding(kernel_size: int, stride: int, dilation: int) -> int:
    if stride > 1 and dilation > 1:
        raise ValueError('Only stride OR dilation may be greater than 1')
    return (kernel_size // 2) * dilation

In [None]:
class CausalConv1d(nn.Module):
    def __init__(self, in_chan, out_chan, kernel_size, dilation=1):
        super(CausalConv1d, self).__init__()
        
        pad = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(in_chan, out_chan, kernel_size=kernel_size, padding=pad, dilation=dilation)

    def forward(self, x):
        out = self.conv(x)
        return out
    
# model = CausalConv1d(39, 39, kernel_size=3, dilation=2)
# print(summary(model, (32, 39, 188)))

In [None]:
class residual_block(nn.Module):
    """Some Information about residual_block"""
    def __init__(self, i, filters, kernel_size, dropout_rate=0):
        super(residual_block, self).__init__()

        self.filters = filters
        self.kernel_size = kernel_size
        self.dilatation = i

        self.conv_1_1 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU()
        )
        self.conv_s1_1 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU(),
            nn.Sigmoid()
        )
        
        self.conv_1_2 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU()
        )
        self.conv_s1_2 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU(),
            nn.Sigmoid()
        )
        
        self.conv_1_3 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU()
        )
        self.conv_s1_3 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i),
            nn.ReLU(),
            nn.Sigmoid()
        )
        
        
        self.conv_2_1 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU()
        )
        self.conv_s2_1 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU(),
            nn.Sigmoid()
        )
        
        self.conv_2_2 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU()
        )
        self.conv_s2_2 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU(),
            nn.Sigmoid()
        )
        
        self.conv_2_3 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU()
        )
        self.conv_s2_3 = nn.Sequential(
            CausalConv1d(filters, filters, kernel_size=kernel_size, dilation=i*2),
            nn.ReLU(),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        conv_1_1 = self.conv_1_1(x)
        conv_s1_1 = self.conv_s1_1(x)
        output_1_1 = conv_1_1 * conv_s1_1
        
        conv_1_2 = self.conv_1_2(x)
        conv_s1_2 = self.conv_s1_2(x)
        output_1_2 = conv_1_2 * conv_s1_2
        
        conv_1_3 = self.conv_1_3(x)
        conv_s1_3 = self.conv_s1_3(x)
        output_1_3 = conv_1_3 * conv_s1_3
        
        output_1_sum = output_1_1 + output_1_2 + output_1_3
        output_1_mean = torch.div(output_1_sum, 3)
        
        output_1 = output_1_sum * output_1_mean
        
        conv_2_1 = self.conv_2_1(output_1)
        conv_s2_1 = self.conv_s2_1(output_1)
        output_2_1 = conv_2_1 * conv_s2_1
        
        conv_2_2 = self.conv_2_2(output_1)
        conv_s2_2 = self.conv_s2_2(output_1)
        output_2_2 = conv_2_2 * conv_s2_2
        
        conv_2_3 = self.conv_2_3(output_1)
        conv_s2_3 = self.conv_s2_3(output_1)
        output_2_3 = conv_2_3 * conv_s2_3
        
        output_2_sum = output_2_1 + output_2_2 + output_2_3
        output_2_mean = torch.div(output_2_sum, 3)
        
        output_2 = output_2_sum * output_2_mean
        output_2 = output_2[:,:,:x.shape[-1]]
        
        res_x = x + output_2

        return res_x, output_2

In [None]:
class GTCM(nn.Module):
    def __init__(self, filters=39):
        super(GTCM, self).__init__()

        self.dilations = [2 ** i for i in range(2)]
        
        self.conv = CausalConv1d(filters, filters, 1)
        
        dilations = [2 ** i for i in range(2)]
        
        self.residual_blocks = []
        for i in dilations:
            res = residual_block(i=i, filters=filters, kernel_size=2, dropout_rate=0.0)
            res = res.to(device)
            self.residual_blocks.append(res)
        
        self.leaky_relu = nn.LeakyReLU(negative_slope=0.05)

    def forward(self, x):
        x = self.conv(x)
        
        skip_connections = []
        for i in range(len(self.residual_blocks)):
            x, skip_out = (self.residual_blocks[i](x))
            skip_connections.append(skip_out)
        
        skip_conn_embd = (skip_connections[0] + skip_connections[1])
        
        x = self.leaky_relu(skip_conn_embd)
        return x
    

class Model(nn.Module):
    def __init__(self, num_emotions):
        super(Model, self).__init__()

        self.tcn = GTCM(39)
        
        self.fc = nn.Linear(39, num_emotions)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        tcn_embd = self.tcn(x)
        gap_embd = F.avg_pool1d(tcn_embd, tcn_embd.size()[2:])
        out = torch.flatten(gap_embd, start_dim=1)
        
        output_logits = self.fc(out)
        output_softmax = self.softmax(output_logits)
        
        return output_logits, output_softmax
    
# model = Model(8)
# print(summary(model, (32, 39, 188)))

In [None]:
def make_train_step(model, criterion, optimizer):

    # define the training step of the training phase
    def train_step(X, Y):

        # forward pass
        output_logits, output_softmax = model(X)
        predictions = torch.argmax(output_softmax, dim=1)
        accuracy = torch.sum(Y == predictions)/float(len(Y))

        # compute loss on logits because nn.CrossEntropyLoss implements log softmax
        loss = criterion(output_logits, Y)

        # compute gradients for the optimizer to use
        loss.backward()

        # update network parameters based on gradient stored (by calling loss.backward())
        optimizer.step()

        # zero out gradients for next pass
        # pytorch accumulates gradients from backwards passes (convenient for RNNs)
        optimizer.zero_grad()

        return loss.item(), accuracy*100
    return train_step

In [None]:
def make_validate_fnc(model, criterion):
    def validate(X, Y):

        # don't want to update any network parameters on validation passes: don't need gradient
        # wrap in torch.no_grad to save memory and compute in validation phase:
        with torch.no_grad():

            # set model to validation phase i.e. turn off dropout and batchnorm layers
            model.eval()
            
            # get the model's predictions on the validation set
            output_logits, output_softmax = model(X)
            predictions = torch.argmax(output_softmax, dim=1)

            # calculate the mean accuracy over the entire validation set
            accuracy = torch.sum(Y == predictions)/float(len(Y))

            # compute error from logits (nn.crossentropy implements softmax)
            loss = criterion(output_logits, Y)

        return loss.item(), accuracy*100, predictions
    return validate

In [None]:
def make_save_checkpoint():
    def save_checkpoint(optimizer, model, epoch, filename):
        checkpoint_dict = {
            'optimizer': optimizer.state_dict(),
            'model': model.state_dict(),
            'epoch': epoch
        }
        torch.save(checkpoint_dict, filename)
    return save_checkpoint


def load_checkpoint(optimizer, model, filename):
    checkpoint_dict = torch.load(filename)
    epoch = checkpoint_dict['epoch']
    model.load_state_dict(checkpoint_dict['model'])
    if optimizer is not None:
        optimizer.load_state_dict(checkpoint_dict['optimizer'])
    return epoch

In [None]:
def criterion(predictions, targets):
    return nn.CrossEntropyLoss()(input=predictions, target=targets)

In [None]:
def scheduler(epoch, lr):
    if epoch < 50:
        return lr
    else:
        if epoch % 20 == 0:
            lr = lr * math.exp(-0.15)
    return lr

In [None]:
from einops import rearrange

def train(model, num_epochs, X_train, Y_train, X_valid, Y_valid, pkl_name, fold, optimizer):
    
    # instantiate lists to hold scalar performance metrics to plot later
    train_losses = []
    valid_losses = []

    # get training set size to calculate # iterations and minibatch indices
    train_size = X_train.shape[0]

    # instantiate the checkpoint save function
    save_checkpoint = make_save_checkpoint()

    # instantiate the training step function
    train_step = make_train_step(model, criterion, optimizer=optimizer)
    # instantiate the validation loop function
    validate = make_validate_fnc(model, criterion)

    cur_valid_loss = 0x3f3f3f
    early_stop_cnt = 0
    
    for epoch in tqdm(range(num_epochs), desc=f'Fold-{fold+1}'):

        if early_stop_cnt >= EARLY_CNT:
            break
        
        # set model to train phase
        model.train()

        # shuffle entire training set in each epoch to randomize minibatch order
        train_indices = np.random.permutation(train_size)

        # shuffle the training set for each epoch:
        X_train = X_train[train_indices, :, :]
        Y_train = Y_train[train_indices]

        # instantiate scalar values to keep track of progress after each epoch so we can stop training when appropriate
        epoch_acc = 0
        epoch_loss = 0
        num_iterations = int(train_size / minibatch)

        # create a loop for each minibatch of 32 samples:
        for i in range(num_iterations):

            # we have to track and update minibatch position for the current minibatch
            # if we take a random batch position from a set, we almost certainly will skip some of the data in that set
            # track minibatch position based on iteration number:
            batch_start = i * minibatch
            # ensure we don't go out of the bounds of our training set:
            batch_end = min(batch_start + minibatch, train_size)
            # ensure we don't have an index error
            actual_batch_size = batch_end-batch_start

            # get training minibatch with all channnels and 2D feature dims
            X = X_train[batch_start:batch_end, :, :]
            # get training minibatch labels
            Y = Y_train[batch_start:batch_end]
            
            # instantiate training tensors
            X_tensor = torch.tensor(X, device=device).float()
            Y_tensor = torch.tensor(Y, dtype=torch.long, device=device)
            
            # Pass input tensors thru 1 training step (fwd+backwards pass)
            loss, acc = train_step(X_tensor, Y_tensor)

            # aggregate batch accuracy to measure progress of entire epoch
            epoch_acc += acc * actual_batch_size / train_size
            epoch_loss += loss * actual_batch_size / train_size

            # keep track of the iteration to see if the model's too slow
            print('\r'+f'Epoch {epoch}: iteration {i+1}/{num_iterations}', end='')

        # learning rate schedule
        new_lr = scheduler(epoch=epoch, lr=optimizer.param_groups[0]['lr'])
        optimizer.param_groups[0]['lr'] = new_lr

        X_valid_tensor = torch.tensor(X_valid, device=device).float()
        Y_valid_tensor = torch.tensor(Y_valid, dtype=torch.long, device=device)
        
        # calculate validation metrics to keep track of progress; don't need predictions now
        valid_loss, valid_acc, _ = validate(X_valid_tensor, Y_valid_tensor)
        
        # accumulate scalar performance metrics at each epoch to track and plot later
        train_losses.append(epoch_loss)
        valid_losses.append(valid_loss)
        
        if cur_valid_loss > valid_loss:
            save_checkpoint(optimizer, model, epoch, pkl_name)
            cur_valid_loss = valid_loss
        else:
            early_stop_cnt += 1

        # keep track of each epoch's progress
        print(f'\nEpoch {epoch}, lr={new_lr} --- loss:{epoch_loss:.3f}, Epoch accuracy:{epoch_acc:.2f}%, Validation loss:{valid_loss:.3f}, Validation accuracy:{valid_acc:.2f}%')

        
    print(f"\n\n[*] done !")
    print(f'[*] Best training loss - {min(train_losses)}')
    print(f'[*] Best validation loss - {min(valid_losses)}')
    
    return train_losses, valid_losses

In [None]:
import csv
import numpy as np
from PIL import Image
from skimage.io import imread
from skimage.transform import resize

def get_fold_dataset(npy_name):
    with open(npy_name, 'rb') as f:
        X_train = np.load(f)
        y_train = np.load(f)
        X_test = np.load(f)
        y_test = np.load(f)
    
    return X_train, y_train, X_test, y_test

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd

from discord_notice import start, end

train_losses, valid_losses = [], []

print(f'[*] model training - {sub_name}')

start()
for fold in tqdm(range(5)):
    cur_fold_path = os.path.join(fold_path, 'fold'+str(fold)+'.npy')
    X_train, y_train, X_test, y_test = get_fold_dataset(cur_fold_path)
    
    pkl_name = f'./model/{data_name}-{sub_name}-{fold}.pkl'
    model = Model(num_emotions=len(emotions_dict)).to('cuda')
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-07, amsgrad=False, weight_decay=0)
    
    train_losses, valid_losses = train(model, num_epochs, X_train, y_train, X_test, y_test, pkl_name, fold, optimizer)
    
    plt.title('Loss Curve')
    plt.ylabel('Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.plot(train_losses[:],'b')
    plt.plot(valid_losses[:],'r')
    plt.legend(['Training loss','Validation loss'])
    plt.show()
end()

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sn
import pandas as pd

device = 'cuda'
model = Model(num_emotions=len(emotions_dict)).to('cuda')

test_accs = []

emotions_groundtruth_list = np.array([])
predicted_emotions_list = np.array([])

for fold in range(5):
    pkl_name = f'./model/{data_name}-{sub_name}-{fold}.pkl'
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)
    
    load_checkpoint(optimizer, model, pkl_name)
    
    validate = make_validate_fnc(model, criterion)
    
    cur_fold_path = os.path.join(fold_path, f'fold{fold}.npy')
    X_train, y_train, X_test, y_test = get_fold_dataset(cur_fold_path)
    
    X_test_tensor = torch.tensor(X_test,device=device).float()
    y_test_tensor = torch.tensor(y_test,dtype=torch.long,device=device)
    
    test_loss, test_acc, predicted_emotions = validate(X_test_tensor, y_test_tensor)
    
    test_accs.append(test_acc)
    
    # set labels for matrix axes from emotions
    emotion_names = [emotion for emotion in emotions_dict.values()]
    
    predicted_emotions = predicted_emotions.cpu().numpy()
    # use labels from test set
    emotions_groundtruth = y_test_tensor.cpu().numpy()
    
    emotions_groundtruth_list = np.append(emotions_groundtruth_list, emotions_groundtruth)
    predicted_emotions_list = np.append(predicted_emotions_list, predicted_emotions)

In [None]:
os.makedirs('result', exist_ok=True)

Report = classification_report(emotions_groundtruth_list, predicted_emotions_list)
print(Report)

with open(f"result/{sub_name}_{loss_name}_Report.txt", "w") as f:
    f.write(Report)

# build confusion matrix and normalized confusion matrix
conf_matrix = confusion_matrix(emotions_groundtruth_list, predicted_emotions_list)
conf_matrix_norm = confusion_matrix(emotions_groundtruth_list, predicted_emotions_list,normalize='true')

# make a confusion matrix with labels using a DataFrame
confmatrix_df = pd.DataFrame(conf_matrix, index=emotion_names, columns=emotion_names)
confmatrix_df_norm = pd.DataFrame(conf_matrix_norm, index=emotion_names, columns=emotion_names)

# plot confusion matrices
plt.figure(figsize=(16,6))
sn.set(font_scale=1.8) # emotion label and title size
plt.subplot(1,2,1)
plt.title('Confusion Matrix')
sn.heatmap(confmatrix_df, annot=True, annot_kws={"size": 13}, fmt='g') #annot_kws is value font
plt.subplot(1,2,2)
plt.title('Normalized Confusion Matrix')
sn.heatmap(confmatrix_df_norm, annot=True, annot_kws={"size": 13}) #annot_kws is value font
plt.savefig(f"result/{sub_name}_{loss_name}_confmatrix.pdf")
plt.show()

In [None]:
def Average(lst):
    return sum(lst) / len(lst)

print('[*] K-Fold training done !')
print(f'[*] Best fold acc : {max(test_accs):.2f}%')
print(f'[*] Worst fold acc : {min(test_accs):.2f}%')
print(f'[*] Average fold acc : {Average(test_accs):.2f}%')