In [1]:
import os
import random
from datetime import datetime
import torch
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
from data.datasets import AMIGOS, series_collate
from architecture.MainNetwork import MainNetwork
from utils import *

In [2]:
torch.backends.cudnn.enabled = False
loader_kwargs = {'num_workers': 4, 'pin_memory': True, 'shuffle': True, 'drop_last': True}
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
torch.cuda.set_device(0)
torch.cuda.current_device()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
savemodel = '/scratch/ec22150/rmse/models/'
if not os.path.exists(savemodel):
    os.makedirs(savemodel)
root_path = 'data/face_segments'
labels_path = 'data/Data_Preprocessed_segmented.json'
vids_dir = 'data/vids_segments'
remove_mov = 'data/ignore_mov.json'
num_class = 4096
batch_size = 4
learning_rate = 1e-05
epochs = 20
alpha = 2
beta = 1
scale_factor = 1
gamma = scale_factor
downsample = 8
normalize_val = {
    'AR': {'min': -0.42818420244970845, 'range': 0.40530026133943436 - -0.42818420244970845},
    'ECG': {'min': -2281.0594032292756, 'range': 2340.911172156569 - -2281.0594032292756},
}



In [3]:
log_dir = '/scratch/ec22150/rmse/log'
log_writer = SummaryWriter(os.path.join(*[log_dir, 'AMIGOS', 'Train', datetime.now().strftime('%b%d_%H-%M-%S')]))

x_transform = transforms.Compose([
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize([0.4168, 0.3074, 0.2607], [0.2426, 0.1997, 0.1870])
])

y_transform = transforms.Compose([
    torch.FloatTensor
])

train_dataset = AMIGOS(
    root_path=root_path,
    labels_path=labels_path,
    vids_dir=vids_dir,
    remove_mov=remove_mov,
    x_transform=x_transform,
    y_transform=y_transform,
    downsample=downsample,
    normalize_val=normalize_val
)

x_transform = transforms.Compose([
    transforms.Normalize([0.4168, 0.3074, 0.2607], [0.2426, 0.1997, 0.1870])
])

val_dataset = AMIGOS(
    root_path=root_path,
    labels_path=labels_path,
    vids_dir=vids_dir,
    remove_mov=remove_mov,
    x_transform=x_transform,
    y_transform=y_transform,
    downsample=downsample,
    normalize_val=normalize_val
)

In [4]:
from IPython.display import clear_output
output_names = ['AR', 'ECG']
for uid in train_dataset.data.keys():
    if uid in [11, 29, 34, 37, 9, 35, 5, 12, 40, 30, 26, 17, 6, 25, 15, 4, 38, 22, 14, 27, 13, 23, 21, 36, 2, 20]:
        continue
    train_idx = [idx[0] for idx in train_dataset.idxs if idx[1] != uid]
    val_idx = [idx[0] for idx in train_dataset.idxs if idx[1] == uid]

    actual_train = random.sample(train_idx, len(train_idx) // 5)
    print('Training UID {} with {} samples'.format(uid, len(actual_train)))

    train_set = data.Subset(train_dataset, actual_train)
    val_set = data.Subset(val_dataset, val_idx)
    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=batch_size,
        collate_fn=series_collate,
        **loader_kwargs
    )
    test_loader = torch.utils.data.DataLoader(
        val_set,
        batch_size=batch_size,
        collate_fn=series_collate,
        **loader_kwargs
    )


    # training
    lr = learning_rate
    model = MainNetwork(num_class).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-3)
    scaler = torch.cuda.amp.GradScaler()
    loss_hist = []
    
    iter_idx = 0
    for epoch in range(epochs):
        model.train()
        if (epoch % 5 == 0) and (epoch != 0):
            lr *= 0.1
            optimizer.param_groups[0]['lr'] = lr
        num_iter = len(train_loader)
        for batch_idx, (inputs, labels1, labels2, _) in enumerate(train_loader):
            iter_idx = (epoch * num_iter) + batch_idx
            inputs, labels1, labels2 = inputs.cuda(), labels1.cuda(), labels2.cuda()
            losses = []
            optimizer.zero_grad()
            with torch.autocast(device.type):
                outputs = model(inputs)
                labels = [labels1, labels2]
                for i in range(len(labels)):
                    if output_names[i] == 'ECG':
                        mae, mse, rmse, pcc, ccc = eval_metrics(outputs[i].permute(0, 2, 1).reshape((batch_size * 2560, 2)), labels[i].permute(0, 2, 1).reshape((batch_size * 2560, 2)))
                    else:
                        mae, mse, rmse, pcc, ccc = eval_metrics(outputs[i], labels[i])
                    loss = rmse
                    losses.append(loss)

                    logging('Train-{}'.format(uid), output_names[i], log_writer, loss, mae, mse, rmse, pcc, ccc, iter_idx)
                    print('\n Epoch [{}/{}] Iter[{}/{}] Mode: {} \t loss: {:.2f} \t MAE: {:.2f} \t MSE: {:.2f} \t RMSE: {:.2f} \t PCC:{} \t CCC:{} '.format(
                        epoch,
                        epochs,
                        batch_idx + 1,
                        num_iter,
                        output_names[i],
                        loss.item(),
                        mae.item(),
                        mse.item(),
                        rmse.item(),
                        ['%.2f' % elem for elem in pcc.tolist()],
                        ['%.2f' % elem for elem in ccc.tolist()],
                    ))

            loss = beta * losses[0] + gamma * losses[1]
            log_writer.add_scalar('TotalLoss/{}-{}'.format('Train', uid), loss, iter_idx)
            print('\n Epoch [{}/{}] Iter[{}/{}]\t TotalLoss: {:.2f}'.format(
                epoch,
                epochs,
                batch_idx + 1,
                num_iter,
                loss.item()
            ))
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            # release GPU memory
            # del inputs, labels, labels1, labels2, outputs
            # torch.cuda.empty_cache()
        
        # validation
        y_pred_AR, y_true_AR, y_pred_ECG, y_true_ECG = run_val(model, test_loader, batch_size)
        losses = val_log(log_writer, alpha, scale_factor, y_pred_AR, y_true_AR, y_pred_ECG, y_true_ECG, epoch, uid=uid)
        loss = beta * losses[0] + gamma * losses[1]
        log_writer.add_scalar('TotalLoss/{}-{}'.format('Validation', uid), loss, epoch)
        print('\n Epoch [{}/{}] \t TotalLoss: {:.2f}'.format(
            epoch,
            epochs,
            loss.item()
        ))
        log_writer.flush()
        print('\n')
        # del losses, y_pred_AR, y_true_AR, y_pred_ECG, y_true_ECG
        # torch.cuda.empty_cache()
        loss_hist.append(loss)
        if loss_hist[-1] == min(loss_hist):
            torch.save({
                'epoch': epoch,
                'model': model.state_dict(),
            },
                os.path.join(savemodel, 'pid_{}.pth.tar'.format(uid))
            )
        log_writer.flush()
    clear_output()
        
del model
log_writer.close()

OSError: [Errno 12] Cannot allocate memory

In [7]:
log_dir = 'log'
log_writer = SummaryWriter(os.path.join(*[log_dir, 'AMIGOS', 'Test', datetime.now().strftime('%b%d_%H-%M-%S_eval')]))

model_lst = os.listdir(savemodel)
model_lst = [os.path.join(savemodel, fname) for fname in model_lst]
model = MainNetwork(num_class).to(device)

y_pred_ARs = []
y_true_ARs = []
y_pred_ECGs = []
y_true_ECGs = []
for model_path in model_lst:
    test_loader = eval_dataloader(model_path, val_dataset, batch_size, loader_kwargs, uid=37)
    
    if len(test_loader) == 0:
        continue
    state_dicts = torch.load(model_path)
    model.load_state_dict(state_dicts['model'])

    y_pred_AR, y_true_AR, y_pred_ECG, y_true_ECG = run_val(model, test_loader, batch_size)
    y_pred_ARs.append(y_pred_AR.cpu())
    y_true_ARs.append(y_true_AR.cpu())
    y_pred_ECGs.append(y_pred_ECG.cpu())
    y_true_ECGs.append(y_true_ECG.cpu())
    torch.cuda.empty_cache()

# del model

# y_pred_ARs = torch.cat(y_pred_ARs)
# y_true_ARs = torch.cat(y_true_ARs)
# y_pred_ECGs = torch.cat(y_pred_ECGs)
# y_true_ECGs = torch.cat(y_true_ECGs)
# val_log(log_writer, alpha, scale_factor, y_pred_ARs, y_true_ARs, y_pred_ECGs, y_true_ECGs)

# log_writer.flush()
# log_writer.close()

AMIGO 37: Test samples: 85


In [5]:

clear_output()

In [8]:
import numpy as np
AR = [x[0] for x in val_dataset.labels]
ECG = [x[1] for x in val_dataset.labels]
AR = np.array(AR).ravel()
ECG = np.array(ECG).ravel()
print('AR-min:{}\tAR-max:{}\tECG-min:{}\tECG-max:{}'.format(AR.min(), AR.max(), ECG.min(), ECG.max()))

AR-min:-0.42818420244970845	AR-max:0.40530026133943436	ECG-min:-2281.0594032292756	ECG-max:2340.911172156569


In [10]:
outputs[1]

tensor([[[-0.1125, -0.1279, -0.0298,  ..., -0.1556,  0.1115, -0.1545],
         [-0.0381, -0.1077,  0.0009,  ..., -0.0934, -0.0747,  0.0953]],

        [[-0.0829, -0.1417, -0.0419,  ..., -0.1501,  0.0850, -0.1196],
         [-0.0145, -0.0513,  0.0121,  ..., -0.0623, -0.0785,  0.0334]],

        [[-0.0684, -0.1164, -0.0248,  ..., -0.1279,  0.0399, -0.0859],
         [ 0.0229, -0.0487,  0.0524,  ..., -0.0443, -0.0660,  0.0345]],

        [[-0.0796, -0.1238, -0.0317,  ..., -0.1335,  0.0815, -0.1087],
         [ 0.0019, -0.0670,  0.0169,  ..., -0.0695, -0.0766,  0.0553]]],
       device='cuda:1', dtype=torch.float16, grad_fn=<CatBackward0>)

In [12]:
(outputs[1] - 10)/5

tensor([[[-2.0215, -2.0254, -2.0059,  ..., -2.0312, -1.9785, -2.0312],
         [-2.0078, -2.0215, -2.0000,  ..., -2.0195, -2.0156, -1.9814]],

        [[-2.0176, -2.0273, -2.0078,  ..., -2.0293, -1.9824, -2.0234],
         [-2.0039, -2.0117, -1.9971,  ..., -2.0117, -2.0156, -1.9941]],

        [[-2.0137, -2.0234, -2.0039,  ..., -2.0254, -1.9922, -2.0176],
         [-1.9951, -2.0098, -1.9893,  ..., -2.0098, -2.0117, -1.9941]],

        [[-2.0156, -2.0254, -2.0059,  ..., -2.0273, -1.9844, -2.0215],
         [-2.0000, -2.0137, -1.9971,  ..., -2.0137, -2.0156, -1.9893]]],
       device='cuda:1', dtype=torch.float16, grad_fn=<DivBackward0>)