# Transfer Learning From English to Spanish
In this notebook we are going to check how is the pre-trained network working over spanish data on english pretrained models. 

In [1]:
from sklearn.model_selection import KFold
from data_Loader import dataLoaderPickle, justDatasetLoaderPickle, dataCreator, dataSaver, dataMLTCreator
from networks_v2 import AttentionModel, AnxietyFromDepression, HydraNetTL, Scripted2Unscripted
from optimizer import train2, test2, optimizerNet, test, train, testMLT, trainMLT
from optimizer import IterMeter

import torch
import torch.nn as nn
from comet_ml import Experiment

import os
import numpy as np

from torch.utils.data import DataLoader, random_split, SubsetRandomSampler

  from .autonotebook import tqdm as notebook_tqdm


## Building dataset

In [None]:
train_loader = "C:\\Users\\ericq\\OneDrive\\Escriptori\\TELECOM\\MSC MATT\\TFM\\The research question\\The Solution\\CNN+RNN\\Raw_Data_v2\\RADAR-MDD-CIBER-s1\\RADAR-MDD-CIBER-s1"
test_loader = ""

name =[]
data = []
audio = []

name = [x[0] for x in os.walk(train_loader)]

name.pop(0)
dataset_esp, spectrograms, labels_dep, labels_anx = dataMLTCreator(train_loader, pickleName="Scripted_Esp_PHQ8_MLT1", task="scripted")

In [None]:
train_loader = "C:\\Users\\ericq\\OneDrive\\Escriptori\\TELECOM\\MSC MATT\\TFM\\The research question\\The Solution\\CNN+RNN\\Raw_Data_v2\\RADAR-MDD-IISPV-s1\\RADAR-MDD-IISPV-s1" 

dataset_esp2, spectrograms2, labels_dep2, labels_anx2 = dataMLTCreator(train_loader, pickleName="Scripted_Esp_PHQ8_MLT2", task="scripted")


In [None]:
import numpy as np
from torch.utils.data import TensorDataset

spectrograms = np.concatenate((spectrograms, spectrograms2))
labels_dep = np.concatenate((labels_dep, labels_dep2))
labels_anx = np.concatenate((labels_anx, labels_anx2))

dataset_esp_tot = TensorDataset(
        torch.from_numpy(spectrograms),
        torch.from_numpy(labels_dep),
        torch.from_numpy(labels_anx)
    )

dataSaver(dataset_esp_tot, "Scripted_Esp_MLT_Tot")

## Analyze data

In [2]:
dataset_esp_tot = justDatasetLoaderPickle("Scripted_Esp_MLT_Tot")

In [3]:
tar = []
import matplotlib.pyplot as plt
for x, y,z in dataset_esp_tot:
    tar.append(y)

n, bins, patches = plt.hist(x=tar,bins='auto',color='#0504aa',alpha=0.7, rwidth=0.85)
plt.grid(axis='y',alpha=0.75)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Is my data balanced?')
plt.text(23,45,r'$\mu15, b=3$')
maxfreq = n.max()
plt.ylim(ymax=np.ceil(maxfreq/10)*10 if maxfreq % 10 else maxfreq + 10)
plt.show()

## Building The Model

In [3]:
#Constants
learning_Rate = 0.0005
batch_size = 128
epochs = 200
experiment = Experiment(api_key='dummy_key', disabled=True)

hparams = {
    "n_cnn_layers": 6,
    "n_rnn_layers": 1,
    "rnn_dim": 256,
    "h_rnn_layers": 128,
    "n_class": 5,
    "n_feats": 64,
    "stride": 2,
    "dropout": 0.3,
    "learning_rate": learning_Rate,
    "batch_size": batch_size,
    "epochs": epochs
}

experiment.log_parameters(hparams)
use_cuda = torch.cuda.is_available()
torch.manual_seed(7)
device = torch.device("cuda" if use_cuda else "cpu")



In [15]:
filename = "C:\\Users\\ericq\\OneDrive\\Escriptori\\TELECOM\\MSC MATT\\TFM\\The research question\\The Solution\\CNN+RNN\\model_PHQ8_Scripted_Eng.pt"
infile = open(filename, 'rb')
model_pretrained = torch.load(infile)
infile.close()

In [16]:
k=5
splits=KFold(n_splits=k,shuffle=True,random_state=42)
foldperf={}

#Optimizing Model
weights = [1.0, 0.95, 0.9, 0.75, 0.95]
class_weights = torch.FloatTensor(weights)
criterion_dep = nn.CrossEntropyLoss(weight=class_weights).to(device)
criterion_anx = nn.CrossEntropyLoss().to(device)
iter_meter = IterMeter()

In [5]:
from sklearn.metrics import confusion_matrix
import seaborn as sn 
import pandas as pd
import torch.nn.functional as F 
import matplotlib.pyplot as plt

def trainXD(epochs, model_mlt, train_loader, optimizer, criterion, iter_meter):
    acc_dep_list = []
    loss_list = []
    label_dep_list = []
    predicted_list_dep = []
    loss_tot_list = []
    accuracy_dep = []

    #Training
    for epoch in range(epochs):
        model_mlt.train()
        total_training_loss = 0

        for x,y,z in train_loader:
            inputs = x
            label_dep = y.long()
            label_dep_list.extend(label_dep.detach().numpy())

            optimizer.zero_grad()
            phq8 = model_mlt(inputs.float())
            phq8 = phq8.squeeze(0)

            predicted_list_dep.extend((torch.max(torch.exp(F.log_softmax(phq8,dim=1)),1)[1]).detach().numpy())

            loss = criterion_dep(phq8,label_dep)

            loss.backward()
            optimizer.step()
            iter_meter.step()
            total_training_loss += loss

            loss_list.append(loss.item())
            #Track accuracy
            total_dep = label_dep.size(0)
            _, predicted = torch.max(phq8.data,1)
            correct_dep = (predicted == label_dep).sum().item()
            acc_dep_list.append(correct_dep/total_dep)  

        print('Train Epoch: {} \tLoss: {:.4f}\tDepression Accuracy: {:.4f}'.format(
            epoch,
            np.mean(loss_list),
            np.mean(acc_dep_list)
        ))      
        loss_tot_list.append(np.mean(loss_list))
        accuracy_dep.append(np.mean(acc_dep_list))
        
    #Printing Confusion Matrix
    classes_dep = ('0','1','2','3','4')
    cf_matrix_dep = confusion_matrix(label_dep_list, predicted_list_dep)
    df_cm = pd.DataFrame(cf_matrix_dep/np.sum(cf_matrix_dep)*10,index = [i for i in classes_dep], columns=[i for i in classes_dep])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Depression Training')

    fig, axs = plt.subplots(2)
    axs[0].plot(range(epochs), loss_tot_list)
    axs[0].set_title('Training Loss')
    axs[0].set(xlabel= 'Epoch', ylabel='Loss')
    axs[1].plot(range(epochs), accuracy_dep)
    axs[1].set_title('Training Depression Accuracy')
    axs[1].set(xlabel= 'Epoch', ylabel='Accuracy')
    plt.show()

In [6]:
def testXD(epochs, model_mlt, train_loader, optimizer, criterion, iter_meter):
    acc_dep_list = []
    loss_list = []
    label_dep_list = []
    predicted_list_dep = []
    loss_tot_list = []
    accuracy_dep = []

    #Training
    for epoch in range(epochs):
        model_mlt.train()
        total_training_loss = 0

        for x,y,z in train_loader:
            inputs = x
            label_dep = y.long()
            label_dep_list.extend(label_dep.detach().numpy())

            optimizer.zero_grad()
            phq8 = model_mlt(inputs.float())
            phq8 = phq8.squeeze(0)

            predicted_list_dep.extend((torch.max(torch.exp(F.log_softmax(phq8,dim=1)),1)[1]).detach().numpy())

            loss = criterion_dep(phq8,label_dep)

            loss.backward()
            optimizer.step()
            iter_meter.step()
            total_training_loss += loss

            loss_list.append(loss.item())
            #Track accuracy
            total_dep = label_dep.size(0)
            _, predicted = torch.max(phq8.data,1)
            correct_dep = (predicted == label_dep).sum().item()
            acc_dep_list.append(correct_dep/total_dep)  

        print('Test Epoch: {} \tLoss: {:.4f}\tDepression Accuracy: {:.4f}'.format(
            epoch,
            np.mean(loss_list),
            np.mean(acc_dep_list)
        ))      
        loss_tot_list.append(np.mean(loss_list))
        accuracy_dep.append(np.mean(acc_dep_list))
        
    #Printing Confusion Matrix
    classes_dep = ('0','1','2','3','4')
    cf_matrix_dep = confusion_matrix(label_dep_list, predicted_list_dep)
    df_cm = pd.DataFrame(cf_matrix_dep/np.sum(cf_matrix_dep)*10,index = [i for i in classes_dep], columns=[i for i in classes_dep])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Depression Testing')

    fig, axs = plt.subplots(2)
    axs[0].plot(range(epochs), loss_tot_list)
    axs[0].set_title('Test Loss')
    axs[0].set(xlabel= 'Epoch', ylabel='Loss')
    axs[1].plot(range(epochs), accuracy_dep)
    axs[1].set_title('Test Depression Accuracy')
    axs[1].set(xlabel= 'Epoch', ylabel='Accuracy')
    plt.show()

In [17]:
epochs = 300

for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(dataset_esp_tot)))):
    print('Fold {}'.format(fold+1))

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    train_loaderUns = DataLoader(dataset_esp_tot, batch_size=batch_size, sampler=train_sampler)
    test_loaderUns = DataLoader(dataset_esp_tot, batch_size=batch_size, sampler=test_sampler)

    model_TF = Scripted2Unscripted(model_pretrained)

    optimizer, scheduler = optimizerNet(model_TF, hparams)

    trainXD(epochs, model_TF, train_loaderUns, optimizer, criterion_dep, iter_meter)
    testXD(epochs, model_TF, test_loaderUns, optimizer, criterion_anx, iter_meter)

Fold 1
Train Epoch: 0 	Loss: 1.7341	Depression Accuracy: 0.2599
Train Epoch: 1 	Loss: 1.6473	Depression Accuracy: 0.2753
Train Epoch: 2 	Loss: 1.6010	Depression Accuracy: 0.2836
Train Epoch: 3 	Loss: 1.5710	Depression Accuracy: 0.2927
Train Epoch: 4 	Loss: 1.5483	Depression Accuracy: 0.3019
Train Epoch: 5 	Loss: 1.5302	Depression Accuracy: 0.3093
Train Epoch: 6 	Loss: 1.5161	Depression Accuracy: 0.3160
Train Epoch: 7 	Loss: 1.5058	Depression Accuracy: 0.3202
Train Epoch: 8 	Loss: 1.4957	Depression Accuracy: 0.3249
Train Epoch: 9 	Loss: 1.4868	Depression Accuracy: 0.3297
Train Epoch: 10 	Loss: 1.4783	Depression Accuracy: 0.3339
Train Epoch: 11 	Loss: 1.4709	Depression Accuracy: 0.3382
Train Epoch: 12 	Loss: 1.4637	Depression Accuracy: 0.3422
Train Epoch: 13 	Loss: 1.4572	Depression Accuracy: 0.3460
Train Epoch: 14 	Loss: 1.4513	Depression Accuracy: 0.3495
Train Epoch: 15 	Loss: 1.4458	Depression Accuracy: 0.3530
Train Epoch: 16 	Loss: 1.4406	Depression Accuracy: 0.3565
Train Epoch: 17 	

In [12]:
acc_dep_list = []
loss_list = []
label_dep_list = []
predicted_list_dep = []
loss_tot_list = []
accuracy_dep = []

for x,y,z in test_loaderUns:
    inputs = x
    label_dep = y.long()
    label_dep_list.extend(label_dep.detach().numpy())

    #optimizer.zero_grad()
    phq8 = model_TF(inputs.float())
    phq8 = phq8.squeeze(0)

    predicted_list_dep.extend((torch.max(torch.exp(F.log_softmax(phq8,dim=1)),1)[1]).detach().numpy())

    loss = criterion_dep(phq8,label_dep)

    #loss.backward()
    #optimizer.step()
    iter_meter.step()
    #total_training_loss += loss

    loss_list.append(loss.item())
    #Track accuracy
    total_dep = label_dep.size(0)
    _, predicted = torch.max(phq8.data,1)
    correct_dep = (predicted == label_dep).sum().item()
    acc_dep_list.append(correct_dep/total_dep)  

print('Loss: {:.4f}\tDepression Accuracy: {:.4f}'.format(
    np.mean(loss_list),
    np.mean(acc_dep_list)
))      
loss_tot_list.append(np.mean(loss_list))
accuracy_dep.append(np.mean(acc_dep_list))

#Printing Confusion Matrix
classes_dep = ('0','1','2','3','4')
cf_matrix_dep = confusion_matrix(label_dep_list, predicted_list_dep)
df_cm = pd.DataFrame(cf_matrix_dep/np.sum(cf_matrix_dep)*10,index = [i for i in classes_dep], columns=[i for i in classes_dep])
plt.figure(figsize=(12,7))
sn.heatmap(df_cm, annot=True)
plt.savefig('Confusion Matrix Depression Testing')
plt.show()

Loss: 0.0992	Depression Accuracy: 0.9977


In [10]:
from networks_v2 import HydraNet

for fold, (train_idx, val_idx) in enumerate(splits.split(np.arange(len(dataset_esp_tot)))):
    print('Fold {}'.format(fold+1))

    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(dataset_esp_tot, batch_size=batch_size, sampler=train_sampler)
    test_loader = DataLoader(dataset_esp_tot, batch_size=batch_size, sampler=test_sampler)

    model_mlt = HydraNet(
        hparams['n_cnn_layers'],
        hparams['rnn_dim'],
        hparams['h_rnn_layers'],
        hparams['n_rnn_layers'],
        hparams['n_class'],
        hparams['stride'],
        hparams['dropout']
    ).to(device).float()
    optimizer, scheduler = optimizerNet(model_mlt, hparams)

    acc_dep_list = []
    acc_anx_list = []
    loss_list = []
    label_dep_list = []
    label_anx_list = []
    predicted_list_anx = []
    predicted_list_dep = []
    loss_tot_list = []
    accuracy_dep = []
    accuracy_anx = []

    #Training
    for epoch in range(epochs):
        model_mlt.train()
        total_training_loss = 0

        for x,y,z in train_loader:
            inputs = x
            label_dep = y.long()
            label_anx = z.long()
            label_dep_list.extend(label_dep.detach().numpy())
            label_anx_list.extend(label_anx.detach().numpy())

            optimizer.zero_grad()
            phq8, gad7 = model_mlt(inputs.float())
            phq8 = phq8.squeeze(0)
            gad7 = gad7.squeeze(0)

            predicted_list_dep.extend((torch.max(torch.exp(F.log_softmax(phq8,dim=1)),1)[1]).detach().numpy())
            predicted_list_anx.extend((torch.max(torch.exp(F.log_softmax(gad7,dim=1)),1)[1]).detach().numpy())

            loss_dep = criterion_dep(phq8,label_dep)
            loss_anx = criterion_anx(gad7, label_anx)

            loss = loss_dep + loss_anx
            loss.backward()
            optimizer.step()
            iter_meter.step()
            total_training_loss += loss

            loss_list.append(loss.item())
            #Track accuracy
            total_dep = label_dep.size(0)
            _, predicted = torch.max(phq8.data,1)
            correct_dep = (predicted == label_dep).sum().item()
            acc_dep_list.append(correct_dep/total_dep)  

            total_anx = label_anx.size(0)
            _, predicted = torch.max(gad7.data,1)
            correct_anx = (predicted==label_anx).sum().item()
            acc_anx_list.append(correct_anx/total_anx)

        print('Train Epoch: {} \tLoss: {:.4f}\tDepression Accuracy: {:.4f}\tAnxiety Accuracy: {:.4f}'.format(
            epoch,
            np.mean(loss_list),
            np.mean(acc_dep_list),
            np.mean(acc_anx_list)
        ))      
        loss_tot_list.append(np.mean(loss_list))
        accuracy_dep.append(np.mean(acc_dep_list))
        accuracy_anx.append(np.mean(acc_anx_list))
        
    #Printing Confusion Matrix
    classes_dep = ('0','1','2','3','4')
    cf_matrix_dep = confusion_matrix(label_dep_list, predicted_list_dep)
    df_cm = pd.DataFrame(cf_matrix_dep/np.sum(cf_matrix_dep)*10,index = [i for i in classes_dep], columns=[i for i in classes_dep])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Depression Training')

    classes_anx = ('0','1','2','3')
    cf_matrix_anx = confusion_matrix(label_anx_list, predicted_list_anx)
    df_cm = pd.DataFrame(cf_matrix_anx/np.sum(cf_matrix_anx)*10,index = [i for i in classes_anx], columns=[i for i in classes_anx])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Anxiety Training')

    fig, axs = plt.subplots(3)
    axs[0].plot(range(epochs), loss_tot_list)
    axs[0].set_title('Training Loss')
    axs[0].set(xlabel= 'Epoch', ylabel='Loss')
    axs[1].plot(range(epochs), accuracy_dep)
    axs[1].set_title('Training Depression Accuracy')
    axs[1].set(xlabel= 'Epoch', ylabel='Accuracy')
    axs[2].plot(range(epochs), accuracy_anx)
    axs[2].set_title('Training Anxiety Accuracy')
    axs[2].set(xlabel='Epoch', ylabel='Accuracy')
    plt.show()

    #Test
    acc_dep_list = []
    acc_anx_list = []
    loss_list = []
    label_dep_list = []
    label_anx_list = []
    predicted_list_anx = []
    predicted_list_dep = []
    loss_tot_list = []
    accuracy_dep = []
    accuracy_anx = []
    for epoch in range(epochs):
        model_mlt.train()
        total_training_loss = 0

        for x,y,z in test_loader:
            inputs = x
            label_dep = y.long()
            label_anx = z.long()
            label_dep_list.extend(label_dep.detach().numpy())
            label_anx_list.extend(label_anx.detach().numpy())

            optimizer.zero_grad()
            phq8, gad7 = model_mlt(inputs.float())
            phq8 = phq8.squeeze(0)
            gad7 = gad7.squeeze(0)

            predicted_list_dep.extend((torch.max(torch.exp(F.log_softmax(phq8,dim=1)),1)[1]).detach().numpy())
            predicted_list_anx.extend((torch.max(torch.exp(F.log_softmax(gad7,dim=1)),1)[1]).detach().numpy())

            loss_dep = criterion_dep(phq8,label_dep)
            loss_anx = criterion_anx(gad7, label_anx)

            loss = loss_dep + loss_anx
            #loss.backward()
            #optimizer.step()
            #iter_meter.step()
            total_training_loss += loss

            loss_list.append(loss.item())
            #Track accuracy
            total_dep = label_dep.size(0)
            _, predicted = torch.max(phq8.data,1)
            correct_dep = (predicted == label_dep).sum().item()
            acc_dep_list.append(correct_dep/total_dep)  

            total_anx = label_anx.size(0)
            _, predicted = torch.max(gad7.data,1)
            correct_anx = (predicted==label_anx).sum().item()
            acc_anx_list.append(correct_anx/total_anx)

        print('Test Epoch: {} \tLoss: {:.4f}\tDepression Accuracy: {:.4f}\tAnxiety Accuracy: {:.4f}'.format(
            epoch,
            np.mean(loss_list),
            np.mean(acc_dep_list),
            np.mean(acc_anx_list)
        ))      
        loss_tot_list.append(np.mean(loss_list))
        accuracy_dep.append(np.mean(acc_dep_list))
        accuracy_anx.append(np.mean(acc_anx_list))
        
    #Printing Confusion Matrix
    classes_dep = ('0','1','2','3','4')
    cf_matrix_dep = confusion_matrix(label_dep_list, predicted_list_dep)
    df_cm = pd.DataFrame(cf_matrix_dep/np.sum(cf_matrix_dep)*10,index = [i for i in classes_dep], columns=[i for i in classes_dep])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Depression Test')

    classes_anx = ('0','1','2','3')
    cf_matrix_anx = confusion_matrix(label_anx_list, predicted_list_anx)
    df_cm = pd.DataFrame(cf_matrix_anx/np.sum(cf_matrix_anx)*10,index = [i for i in classes_anx], columns=[i for i in classes_anx])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot=True)
    plt.savefig('Confusion Matrix Anxiety Test')

    fig, axs = plt.subplots(3)
    axs[0].plot(range(epochs), loss_tot_list)
    axs[0].set_title('Test Loss')
    axs[0].set(xlabel= 'Epoch', ylabel='Loss')
    axs[1].plot(range(epochs), accuracy_dep)
    axs[1].set_title('Test Depression Accuracy')
    axs[1].set(xlabel= 'Epoch', ylabel='Accuracy')
    axs[2].plot(range(epochs), accuracy_anx)
    axs[2].set_title('Test Anxiety Accuracy')
    axs[2].set(xlabel='Epoch', ylabel='Accuracy')
    plt.show()   

Fold 1
Train Epoch: 0 	Loss: 2.9652	Depression Accuracy: 0.2291	Anxiety Accuracy: 0.3232
Train Epoch: 1 	Loss: 2.9418	Depression Accuracy: 0.2453	Anxiety Accuracy: 0.3377
Train Epoch: 2 	Loss: 2.9263	Depression Accuracy: 0.2583	Anxiety Accuracy: 0.3436
Train Epoch: 3 	Loss: 2.9137	Depression Accuracy: 0.2665	Anxiety Accuracy: 0.3460
Train Epoch: 4 	Loss: 2.9030	Depression Accuracy: 0.2721	Anxiety Accuracy: 0.3500
Train Epoch: 5 	Loss: 2.8927	Depression Accuracy: 0.2783	Anxiety Accuracy: 0.3529
Train Epoch: 6 	Loss: 2.8845	Depression Accuracy: 0.2821	Anxiety Accuracy: 0.3542
Train Epoch: 7 	Loss: 2.8771	Depression Accuracy: 0.2857	Anxiety Accuracy: 0.3560
Train Epoch: 8 	Loss: 2.8700	Depression Accuracy: 0.2899	Anxiety Accuracy: 0.3571
Train Epoch: 9 	Loss: 2.8636	Depression Accuracy: 0.2925	Anxiety Accuracy: 0.3587
Train Epoch: 10 	Loss: 2.8581	Depression Accuracy: 0.2952	Anxiety Accuracy: 0.3602
Train Epoch: 11 	Loss: 2.8527	Depression Accuracy: 0.2977	Anxiety Accuracy: 0.3616
Train E

KeyboardInterrupt: 