In [None]:
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import cv2
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from torch.utils.data import DataLoader
import torchvision

WORK_DIR = Path(Path.cwd()).parent
sys.path.append(str(WORK_DIR))
from src.datasets import get_dataset, get_dataloader, EK_TSN_Labels
from src.utils import parse_data_cfg, IMG, TSN, EK, DATA_DIR
from src.models import get_model

In [None]:
exp_fold = ''
cfgname = 'tsn_ek_base'
cfg = parse_data_cfg(WORK_DIR/'data_cfg'/exp_fold/(cfgname + '.cfg'))
epoch = 80
exp_dir = cfg["exp_dir"]
data_split = 'train'
data_list = cfg[data_split + '_set']

epic_root = Path(DATA_DIR)/'EPIC_KITCHENS_2018'
epic_action_data =  os.path.join(epic_root, 'annotations', 'EPIC_train_action_labels.csv')
epic_action_data = pd.read_csv(epic_action_data)
action_verb_class = np.sort(epic_action_data.verb_class.unique())
action_noun_class = np.sort(epic_action_data.noun_class.unique())
verb_dict = EK.rev_lin_id(action_verb_class)
noun_dict = EK.rev_lin_id(action_noun_class)

In [None]:
model = get_model(cfg, True, epoch, None, None)

# Train dataloader

In [None]:
input, target_verb, target_noun = next(iter(model.train_loader))
input = input.permute(0, 2, 3, 1)
print(input.shape)

In [None]:
idx = 10
cur_input = input[idx]
cur_verb = verb_dict[target_verb[idx].item()]
cur_noun = noun_dict[target_noun[idx].item()]
verb = EK.get_class_name(int(cur_verb), 'verb')
noun = EK.get_class_name(int(cur_noun), 'noun')

img1 = cur_input[:, :, :3]
img2 = cur_input[:, :, 3:6]
img3 = cur_input[:, :, 6:]

img1 = cv2.normalize(np.asarray(img1), None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_8UC3)
img2 = cv2.normalize(np.asarray(img2), None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_8UC3)
img3 = cv2.normalize(np.asarray(img3), None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_8UC3)

In [None]:
print(verb, noun)
fig, ax = plt.subplots(3, 1, figsize=(10, 10))
ax[0].imshow(img1[:, :, ::-1])
ax[1].imshow(img2[:, :, ::-1])
ax[2].imshow(img3[:, :, ::-1])

# Test dataloader

In [None]:
model = get_model(cfg, False, epoch, None, None)

In [None]:
data_set        = data_split + '_set'
dataset_kwargs  = {'split_set': cfg[data_set]}
cfg['aug']      = None
cropping = torchvision.transforms.Compose([
    TSN.GroupOverSample(model.net.input_size, model.net.scale_size)])
pred_tfm = torchvision.transforms.Compose([
    cropping,
    TSN.Stack(roll=cfg['base_model']== 'BNInception'),
    TSN.ToTorchFormatTensor(div=cfg['base_model'] != 'BNInception'),
    TSN.GroupNormalize(model.net.input_mean, model.net.input_std)])
pred_dataset = EK_TSN_Labels(cfg, cfg[data_set], pred_tfm,
                             random_shift=True, test_mode=True)
pred_kwargs =  {'batch_size'  :   1,
                'shuffle'     :   True,
                'num_workers' :   int(cfg['num_workers']),
                'pin_memory'  :   True}
data_loader = DataLoader(pred_dataset,
                         sampler=None,
                         **pred_kwargs)


In [None]:
input, target_verb, target_noun = next(iter(data_loader))
input = input.permute(0, 2, 3, 1)
print(input.shape)

In [None]:
cur_input = input[0]
cur_verb = verb_dict[target_verb[0].item()]
cur_noun = noun_dict[target_noun[0].item()]
verb = EK.get_class_name(int(cur_verb), 'verb')
noun = EK.get_class_name(int(cur_noun), 'noun')
print(verb, noun)

In [None]:
fig, ax = plt.subplots(2, 5, figsize=(10, 10))
idx = 0
for i in range(2):
    for j in range(5):
        img = cur_input[:, :, idx*3:idx*3 + 3]
        img = cv2.normalize(np.asarray(img), None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_8UC3)
        ax[i, j].imshow(img[:, :, ::-1])
        idx += 10