In [1]:
import pandas as pd
from PIL import Image
import cv2
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import torch
# from transformers import VideoMAEFeatureExtractor, VideoMAEForVideoClassification
from sklearn.metrics import f1_score
from torch.nn import DataParallel

import matplotlib.pyplot as plt
from IPython.display import clear_output
import time

import random
import numpy as np

import torch.nn as nn
import torch.nn.functional as F

In [2]:
import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myashkens[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

### Data

In [3]:
class ImageEmotionDataset(Dataset):
    def __init__(self, data, label2id, gray=False, clip_len=16, frame_sample_rate=4):
        self.label_dict = label2id
        self.data = data
        self.clip_len = clip_len
        self.frame_sample_rate = frame_sample_rate
        self.gray = gray
        

    def __getitem__(self, idx):
        file_path = self.data.iloc[idx]['file_path']
        file_path = f'CREMA/VideoFlash/{file_path}'
        video = self.get_frames(file_path)
        encoding = video

        label = self.label_dict[self.data.iloc[idx]['emotion']]
        return encoding, label

    def __len__(self):
        return len(self.data)
    
    def sample_frame_indices(self, seg_len):
        converted_len = int(self.clip_len * self.frame_sample_rate)
        converted_len = min(converted_len, seg_len-1)
        end_idx = np.random.randint(converted_len, seg_len)
        start_idx = end_idx - converted_len
        indices = np.linspace(start_idx, end_idx, num=self.clip_len)
        indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
        return indices
    
    def get_frames(self, file_path):
        cap = cv2.VideoCapture(file_path)
        v_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        indices = self.sample_frame_indices(v_len)

        frames = []
        for fn in range(v_len):
            success, frame = cap.read()
            if success is False:
                continue
            if (fn in indices):
                if not self.gray:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                else:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                    frame = np.reshape(frame, (frame.shape[0], frame.shape[1], 1))
                res = cv2.resize(frame, dsize=(240, 180), interpolation=cv2.INTER_CUBIC)
                frames.append(np.array(res))
        cap.release()
        
        if not frames:
            print(v_len)
            print(file_path)
            
        if len(frames) < self.clip_len:
            add_num = self.clip_len - len(frames)
            frames_to_add = [frames[-1]] * add_num
            frames.extend(frames_to_add)

        return np.array(frames)

In [4]:
def prepare_data(bs, gray=False):
    head_folder = 'CREMA/CSV'
    train = pd.read_csv(f'{head_folder}/train.csv')
    val = pd.read_csv(f'{head_folder}/val.csv')
    test = pd.read_csv(f'{head_folder}/test.csv')
    
    labels = list(set(train['emotion']))
    label2id, id2label = dict(), dict()
    for i, label in enumerate(labels):
        label2id[label] = i
        id2label[i] = label
    
    train_dataset = ImageEmotionDataset(train, label2id, gray=gray)
    test_dataset = ImageEmotionDataset(test, label2id, gray=gray)
    val_dataset = ImageEmotionDataset(val, label2id, gray=gray)
    
    train_dataloader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=bs, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=bs, shuffle=True)
    
    return len(labels), label2id, id2label, train_dataloader, test_dataloader, val_dataloader, test_dataset

### Model

In [5]:
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        
        self.num_labels = num_classes
        self.loss_func = nn.CrossEntropyLoss()
        
        self.conv1 = self._conv_layer_set(3, 64)
        self.batch_norm1 = self.batch=nn.BatchNorm3d(64)
        self.conv2 = self._conv_layer_set(64, 128)
        self.batch_norm2 = self.batch=nn.BatchNorm3d(128)
        
        self.conv3 = nn.Conv3d(128, 256, kernel_size=(1, 1, 6), padding=0)
        self.relu3 = nn.ReLU()
        self.conv4 = nn.Conv3d(256, 512, kernel_size=(1, 1, 1), padding=0)
        self.relu4 = nn.ReLU()
        self.pool4 = nn.MaxPool3d((1, 1, 1))
        self.batch_norm4 = self.batch=nn.BatchNorm3d(512)
        
        self.adap_pool = nn.AdaptiveMaxPool1d(512)
        self.fc1 = nn.Linear(512, 256)
        self.relu_fc = nn.ReLU()
        self.fc2 = nn.Linear(256, num_classes)
        
        
    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
        nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
        nn.ReLU(),
        nn.MaxPool3d((2, 2, 2)),
        )
        return conv_layer
    

    def forward(self, x, labels=None):
#         print(x.shape)
        out = self.conv1(x)
#         print(f'{out.shape}, expected 64')
        out = self.batch_norm1(out)
        out = self.conv2(out)
        out = self.batch_norm2(out)
#         print(f'{out.shape}, expected 128')
        
        out = self.conv3(out)
        out = self.relu3(out)
        out = self.conv4(out)
        out = self.relu4(out)
        out = self.pool4(out)
        out = self.batch_norm4(out)
#         print(f'{out.shape}, expected 512')
        
        out = out.reshape(out.size(0), -1)
        out = self.adap_pool(out)
#         print(f'{out.shape}, reshaped')
        out = self.fc1(out)
        out = self.relu_fc(out)
        out = self.fc2(out)
#         print(out)
        
        loss = None
        if labels is not None:
            loss = self.loss_func(out.view(-1, self.num_labels), labels.view(-1))
        
        return {'logits': out, 'loss': loss}

In [6]:
class EmotionClassificationModel():
    
    def __init__(self, model, device, gray=False):
        self.model = model
        self.device = device
#         self.model.to(device)
        self.model = DataParallel(self.model).to(device)
        self.gray = gray

        
    def __call__(self, pixel_values):
        
        self.model.eval()
        
        with torch.no_grad():
            
            if self.gray:
                pixel_values = pixel_values.unsqueeze(-1)
                
            pixel_values = np.transpose(pixel_values, (0, 4, 1, 2, 3)).float()
            pixel_values = pixel_values.to(self.device)

            output = self.model(pixel_values)
            logits = output['logits']
            
            active_logits = logits.view(-1, self.model.module.num_labels)
            pred = torch.argmax(active_logits, axis=1)
        return pred
        
    def validate(self, val_dataloader):
        
        self.model.eval()
        
        val_loss, val_fscore = 0, 0
        
        with torch.no_grad():
            for batch in val_dataloader:

                inputs, labels = batch
                
                if self.gray:
                    inputs = inputs.unsqueeze(-1)
                
                inputs = np.transpose(inputs, (0, 4, 1, 2, 3)).float()
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
            
                output = self.model(inputs, labels=labels)
                loss = output['loss']
                logits = output['logits']
                
                if torch.cuda.device_count() > 1:
                    loss = loss.mean()

                val_loss += loss.item()

                gold = labels.view(-1)
#                 active_logits = logits.view(-1, self.model.num_labels)
                active_logits = logits.view(-1, self.model.module.num_labels)
                pred = torch.argmax(active_logits, axis=1) 

                fscore = f1_score(gold.cpu().numpy(), pred.cpu().numpy(), average='weighted')
                val_fscore += fscore

        avg_val_loss = val_loss / len(val_dataloader)
        avg_val_f1 = val_fscore / len(val_dataloader)
        return avg_val_loss, avg_val_f1
    
    def train(self, train_dataloader, val_dataloader, n_epoch, optimizer, checkpoint_step, model_save_name, patience):
        
        train_losses, val_losses = [], []
        train_fscores, val_fscores = [], []
        prev_val_score = 0
        no_improv_epochs = 0
                
        for epoch in range(n_epoch):
            
            
            start_time = time.time()
            
            self.model.train()
            
            train_loss, train_fscore = 0, 0
            step_train_losses, step_train_fscores, step_val_losses, step_val_fscores = [], [], [], []
            
            for step_num, batch in enumerate(train_dataloader):
                                
                inputs, labels = batch
                
                if self.gray:
                    inputs = inputs.unsqueeze(-1)
                    
                inputs = np.transpose(inputs, (0, 4, 1, 2, 3)).float()
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)

                output = self.model(inputs, labels=labels)
                loss = output['loss']
                logits = output['logits']
                
                if torch.cuda.device_count() > 1:
                    loss = loss.mean()
            
                train_loss += loss.item()
                
                self.model.zero_grad()
                loss.backward()
                optimizer.step()

                gold = labels.view(-1)
                active_logits = logits.view(-1, self.model.module.num_labels)
#                 active_logits = logits.view(-1, self.model.num_labels)
                pred = torch.argmax(active_logits, axis=1) 

                fscore = f1_score(gold.cpu().numpy(), pred.cpu().numpy(), average='weighted')
                train_fscore += fscore
                
#                 if step_num % 50 == 0:
#                     print(f'batch train loss: {loss.item():.4f}')

                wandb.log({"batch train loss": loss.item()})
                
            avg_train_loss = train_loss / len(train_dataloader)
            avg_train_f1 = train_fscore / len(train_dataloader)
            
            # EARLY STOPPING CODE
            avg_val_loss, avg_val_f1 = self.validate(val_dataloader)
            if avg_val_f1 < prev_val_score:
                no_improv_epochs += 1
            prev_val_score = avg_val_f1
            
            if no_improv_epochs >= patience:
                return None
            
            train_losses.append(avg_train_loss)
            train_fscores.append(avg_train_f1)
            val_losses.append(avg_val_loss)
            val_fscores.append(avg_val_f1)
            
#             print(f'Epoch {epoch}')
#             print(f'Train loss: {avg_train_loss:.3f}')
#             print(f'Train micro F1: {avg_train_f1:.3f}')
#             print(f'Validation loss: {avg_val_loss:.3f}')
#             print(f'Validation micro F1: {avg_val_f1:.3f}')
#             curr_time = time.time() - start_time
#             print(f'Epoch time: {curr_time:.3f}s')
                        
            wandb.log({"train loss": avg_train_loss, "val loss": avg_val_loss, 
                       "train F1": avg_train_f1, "val F1": avg_val_f1,
                       "epoch": epoch}) 
        return None

In [7]:
# num_labels, label2id, id2label, train_dataloader, test_dataloader, dev_dataloader, test_dataset = prepare_data(16)
                
# model = CNNModel(num_labels)

# optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-05)
# device = 'cuda'
# answer_model = EmotionClassificationModel(model, device=device)

# patience = 3
# answer_model.train(train_dataloader, dev_dataloader, 10, optimizer, 200, 'test.pt', patience)

### Testing

In [8]:
def test(test_dataloader, answer_model):
    gold, pred = [], []
    for batch in test_dataloader:
        inputs, labels = batch
        res = answer_model(inputs)
        pred.extend(res.tolist())
        gold.extend(labels.tolist())
    fscore = f1_score(gold, pred, average='weighted')
    return fscore

### Wandb config

In [9]:
sweep_config = {'method': 'grid'}

metric = {
    'name': 'val F1',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

In [10]:
# variable parameters
parameters_dict = {
    'architecture': {
#         'values': ['3_blocks_1_layer', '2_blocks_2_layers']
        'values': ['experiment_1']
    }
}

sweep_config['parameters'] = parameters_dict

In [11]:
# set parameters
parameters_dict.update({
    'epochs': {
        'value': 15
    },
    'batch_size': {
        'value': 16
    },
    'seed': {
        'value': 42
    },
    'lr': {
        'value': 0.001
    }
})

In [12]:
sweep_id = wandb.sweep(sweep_config, project="3d-cnn")

Create sweep with ID: w093ytgu
Sweep URL: https://wandb.ai/yashkens/3d-cnn/sweeps/w093ytgu


### Training

In [13]:
def train_net(config=None):
    with wandb.init(config=config) as run:

        config = wandb.config
        torch.manual_seed(config.seed)
        random.seed(config.seed)
        np.random.seed(config.seed)
        
        name_str = f"3DCNN-{config.architecture}-{config.seed}-{config.lr}"
        run.name = name_str
        
        num_labels, label2id, id2label, train_dataloader, test_dataloader, dev_dataloader, test_dataset = prepare_data(config.batch_size)
                
#         if config.architecture == '3_linear':
#             model = CNNModel_Linear(num_labels)
#         elif config.architecture == '3_blocks_1_layer':
#             model = CNNModel_3ConvBlocks(num_labels)
#         elif config.architecture == '2_blocks_2_layers':
#             model = CNNModel_2ConvLayers(num_labels)
#         elif config.architecture == '3_blocks_2_layers':
#             model = CNNModel_3Blocks_2ConvLayers(num_labels)

        model = CNNModel(num_labels)
        
        optimizer = torch.optim.Adam(params=model.parameters(), lr=config.lr)
        device = 'cuda'
        answer_model = EmotionClassificationModel(model, device=device)
        
        patience = 3
        answer_model.train(train_dataloader, dev_dataloader, config.epochs, optimizer, 200, 'test.pt', patience)
        
        test_fscore = test(test_dataloader, answer_model)
        wandb.log({"test F1": test_fscore})

In [14]:
wandb.agent(sweep_id, train_net)

[34m[1mwandb[0m: Agent Starting Run: dgz058gc with config:
[34m[1mwandb[0m: 	architecture: experiment_1
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	seed: 42
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
















































[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
