### Obtain data

In [None]:
import os
import torch
import numpy as np
from turbojpeg import TurboJPEG, TJPF_GRAY

In [None]:
ROOT = '/home/iwawiwi/research/whispering-fairies/data/lrw_cropped/'
label = 'CHIEF'
phase = 'train'

jpeg = TurboJPEG()

In [None]:
videos = torch.load(os.path.join(ROOT, label, phase, label+'_00001.pkl'))

In [None]:
print(videos.keys())
video = videos.get('video')         # encoded JPEG data, 29 frames
label = videos.get('label')
duration = videos.get('duration')

In [None]:
print(type(label))
print(label)
print(type(duration))
print(duration)

In [None]:
print(str(len(video)) + ' frames')
frames = []
frames = [jpeg.decode(frame, TJPF_GRAY) for frame in video]

In [None]:
print(type(frames[0]))  # w, h, c
print(frames[0].shape)
img = frames[0]

In [None]:
# plot img
import matplotlib.pyplot as plt
#plt.imshow(img)


In [None]:
# stack and normalize frames
frames = np.stack(frames, axis=0) / 255.0
print(len(frames))

In [None]:
print(frames.shape)

### Load Dataset

In [None]:
# import current project directory as module
import sys
sys.path.append('/home/iwawiwi/research/whispering-fairies/')

In [None]:
from src.datamodules.components.lrw_dataset import LRWDataset
DATA_ROOT = '/home/iwawiwi/research/whispering-fairies/data/lrw_cropped/'
LABEL = '/home/iwawiwi/research/whispering-fairies/data/lrw_labels.txt'

In [None]:
data_train = LRWDataset(DATA_ROOT, LABEL, phase='train')
data_test = LRWDataset(DATA_ROOT, LABEL, phase='test')
data_val = LRWDataset(DATA_ROOT, LABEL, phase='val')

In [None]:
print(len(data_train))
print(len(data_val))
print(len(data_test))

In [None]:
sample = data_train[0]['video']
print(sample.shape)

### Define network

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class SimpleConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=1)
    
    def forward(self, x):
        x = self.conv1(x).relu() # relu output
        return x

In [None]:
# random tensor
x = torch.randn(1, 1, 4, 4)
model = SimpleConvNet()
y = model(x)
print(y.shape)

In [None]:
print(y)

In [None]:
# test init weight tensor
w = torch.empty(3, 5)
print(w)

In [None]:
nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu')
print(w)

In [None]:
# iterate module
for m in model.modules():
    # print modulename
    print(m)

### Try simple network

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# simple 3d convolutional network
class SimpleConvNet(nn.Module):
    # init
    def __init__(self, in_channel=1, out_channel=64, kernel_size=3, stride=(1, 1, 1), dilation=(1, 2, 2), padding=1):
        super().__init__()
        # self.conv1 = nn.Conv3d(in_channel, 8, kernel_size=kernel_size, stride=(2, 1, 1), padding=padding, dilation=dilation)
        # self.relu = nn.ReLU()
        # self.pool1 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(1, 2, 2))
        # self.conv2 = nn.Conv3d(8, 16, kernel_size, stride, padding, dilation=dilation)
        # self.pool2 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        # self.conv3 = nn.Conv3d(16, 32, kernel_size, stride, padding, dilation=1)
        # self.conv4 = nn.Conv3d(32, out_channel, kernel_size, stride, padding, dilation=1)
        # # reduce dimension
        # self.conv5 = nn.Conv3d(out_channel, 32, kernel_size=(1, 1, 1), stride=(1, 1, 1), padding=0)
        # self.conv6 = nn.Conv3d(32, 16, kernel_size=(1, 1, 1), stride=(1, 1, 1), padding=0)      # torch.Size([4, 16, 14, 5, 5])
        # self.fc1 = nn.Linear(5 * 5 * 14 * 16, 500)
        self.conv1 = nn.Conv3d(in_channel, 8, kernel_size=3, stride=(1, 1, 1), padding=(1, 1 ,1), dilation=1)
        #self.pool1 = nn.MaxPool3d(kernel_size=(1, 4, 4), stride=(1, 4, 4))
        #self.fc1 = nn.Linear(2 * 15 * 11 * 11, 500)

    # forward
    def forward(self, x):
        x = x.transpose(1, 2)
        
        # x = self.pool1(self.relu(self.conv1(x)))
        # x = self.pool2(self.relu(self.conv2(x)))
        # x = self.pool2(self.relu(self.conv3(x)))
        # x = self.pool2(self.relu(self.conv4(x)))
        # x = self.conv6(self.conv5(x))
        # # flatten
        # x = x.view(x.size(0), -1)
        # x = self.fc1(x).mean(dim=1)
        x = self.conv1(x)
        #x = self.pool1(x)
        #x = x.view(x.size(0), -1)
        #x = self.fc1(x)

        return x

    def __check_output_shape(self, module, input_shape):
        x = torch.rand(input_shape)
        # no grad calculation
        with torch.no_grad():
            y = module(x)
        # return shape of y
        return y.shape


In [3]:
# random tensor
vid = torch.rand((4, 29, 1, 88, 88))

In [None]:
# import dataloader
from torch.utils.data import DataLoader

dl_train = DataLoader(data_train, batch_size=4, shuffle=True)

In [None]:
batch = next(iter(dl_train))
vid, tgt = batch['video'], batch['label']

In [None]:
print(img.shape)
print(tgt.shape)

In [4]:
# init model
model = SimpleConvNet()
with torch.no_grad():
    out = model(vid)

In [5]:
print(out.shape)
#print(out.shape[-1] * out.shape[-2] * out.shape[-3] * out.shape[-4] * 500)

torch.Size([4, 8, 29, 88, 88])
