In [1]:
import os
import glob
import time
import wandb
import imageio
import torch as T
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from models.models import RNN
from utilities.util import TrTstSplit, GetInputOutputSplit
from utilities.MocapDataset import MocapDatasetLand

# Initializing Data Loader

In [2]:
base_dir = '../../IEMOCAP_full_release/'
os.chdir(base_dir)
dirs = glob.glob('*.csv')
train, test = TrTstSplit(dirs)
head, aud, land, emo = GetInputOutputSplit(train)
dataset = MocapDatasetLand(head, aud, land, emo)
dataset_size = len(aud)
dataloader = T.utils.data.DataLoader(dataset, batch_size=1, num_workers=2)
wandb.init(
    # set the wandb project where this run will be logged
    project="head_nods",
    
    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.001,
    "architecture": "LSTM",
    "dataset": "IEMOCAP",
    "epochs": 1000,
    }
)
device = T.device("cuda:0" if T.cuda.is_available() else "cpu")
rnn = RNN(28,256,1,106).to(device)
criterion = T.nn.MSELoss()
optimizer = T.optim.Adam(rnn.parameters(), lr=0.0001)
scheduler = T.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.1)
min_loss = 100

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


['Session4.csv', 'Session3.csv', 'Session5.csv', 'Session2.csv']


[34m[1mwandb[0m: Currently logged in as: [33mghosh-bishal[0m ([33mbigzen0[0m). Use [1m`wandb login --relogin`[0m to force relogin




# Training

In [3]:
for epoch in range(500):
    loop = tqdm(dataloader)
    rnn.train()
    running_loss = 0.0
    running_corrects = 0.0
    for idx, (audio, pose, name) in enumerate(loop):
        audio = audio.to(device)
        pose = pose.to(device)
        optimizer.zero_grad()
        with T.set_grad_enabled(True):
            outputs = rnn(T.squeeze(audio))
            loss = criterion(outputs, T.squeeze(pose))
            loss.backward()
            optimizer.step()

        # statistics
        running_loss += loss.item()
        loop.set_description(f"Epoch [{epoch}/{500}]")
        loop.set_postfix(loss=loss.item())
    scheduler.step()

    epoch_loss = running_loss / dataset_size
    wandb.log({ "loss": epoch_loss})
    if min_loss>epoch_loss:
        min_loss=epoch_loss
        T.save({'epoch': epoch,
                'model_state_dict': rnn.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss}, '../MSc_Thesis/MSc_Thesis/models/face_bestmodel.pth'.format(epoch))

  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [0/500]: 100%|██████████| 4009/4009 [00:35<00:00, 112.52it/s, loss=18.1] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [1/500]: 100%|██████████| 4009/4009 [00:32<00:00, 123.14it/s, loss=9.99] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [2/500]: 100%|██████████| 4009/4009 [00:33<00:00, 120.15it/s, loss=8.1]  
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [3/500]: 100%|██████████| 4009/4009 [00:33<00:00, 118.80it/s, loss=8.65] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [4/500]: 100%|██████████| 4009/4009 [00:33<00:00, 119.89it/s, loss=8.43] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [5/500]: 100%|██████████| 4009/4009 [00:33<00:00, 118.23it/s, loss=9.26] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [6/500]: 100%|██████████| 4009/4009 [00:34<00:00, 116.97it/s, loss=7.31] 
  y = T.Tensor(vals)
  y = T.Tensor(vals)
Epoch [7/500]: 100%|██████████| 4009/4009 [00:34<00:00, 116.14it/s, loss=7.67] 
  y = T.Tensor(vals)
  y

# Testing

In [1]:

rnn = RNN(28,256,1,106).to(device)
rnn.load_state_dict(T.load('../MSc_Thesis/MSc_Thesis/models/face_bestmodel.pth')['model_state_dict'])
rnn.eval()
for audio, pose, name in dataloader:
        audio = audio.to(device)
        pose = pose.to(device)
        outputs = rnn(T.squeeze(audio))
        loss = criterion(outputs, T.squeeze(pose))
        running_loss += loss.item()
        outputs = outputs.to('cpu').detach().numpy()
        pose = np.squeeze(pose.to('cpu').detach().numpy())
        if count<5:
            name = name[0].split('/')
            name = name[-1].split('.')[0]
            with imageio.get_writer('../MSc_Thesis/MSc_Thesis/'+name+'.gif', mode='I') as writer:
                for i in range(pose.shape[0]):
                    plt.scatter(pose[i,0::2], pose[i,1::2], c='r',label='ground_truth')
                    plt.scatter(outputs[i,0::2], outputs[i,1::2],c='b', label='prediction')
                    ax=plt.gca()
                    ax.set_aspect('equal', adjustable='box')
                    plt.savefig(name+'.png')
                    plt.close()
                    image = imageio.imread(name+'.png')
                    writer.append_data(image)
            writer.close()
        count=count+1

test_loss = running_loss / dataset_size
print(test_loss)

NameError: name 'RNN' is not defined