Testing

In [1]:
# imports
import torch
from torch.utils.data import Dataset, DataLoader, random_split, SequentialSampler, RandomSampler, BatchSampler
from torchvision import models, transforms
from torch import nn, optim

from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from utils.clean_labels import clean_labels
from utils.prepare_images import prepare_images
from utils.build_dataset import SVRCDataset

import os
from datetime import datetime
import random
import time

%load_ext autoreload
%autoreload 2
print(torch.version.cuda)

11.3


In [2]:
# put videos here!
# video_base = 'data/videos'
video_base = 'D:/e6691/6691_assignment2/videos'
videos = os.listdir(video_base)
# images will be output to here
# image_base = 'data/images'
image_base = 'D:/e6691/6691_assignment2/images'
if not os.path.exists(image_base):
    os.mkdir(image_base)

In [3]:
# command line: 
# ffmpeg -i {input_video} -r {frame_rate} [-f {force_format} (not needed)] {output_images}
# doc: https://ffmpeg.org/ffmpeg.html
for video in videos:
    input_path = os.path.join(video_base, video)
    # make dirs
    output_base = image_base + '/{}'.format(video.split('.')[0])
    if not os.path.exists(output_base):
        os.mkdir(output_base)
    output_path = os.path.join(output_base, '%d.png')
    # # command
    # print('Frames extracted from {} to {}'.format(input_path, output_path))
    # !ffmpeg -i {input_path} -r 1 {output_path}

In [4]:
# labels_path = 'data/labels/video.phase.trainingData.clean.StudentVersion.csv'
labels_path = 'D:/e6691/6691_assignment2/labels/video.phase.trainingData.clean.StudentVersion.csv'
# names_path = 'data/labels/names.csv'
names_path = 'D:/e6691/6691_assignment2/labels/names.csv'
labels_df = pd.read_csv(labels_path)
names_df = pd.DataFrame({'Name': list(set(labels_df['PhaseName'].to_list()))})

In [5]:
prepare_images(video_base, image_base, labels_df, names_df, 'png')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [6]:
# get all images and labels
image_paths = []
labels = []

for video in videos[:]:
    base = os.path.join(image_base, video.split('.')[0])
    # image_paths += list(map(
    #     lambda img: os.path.join(base, img), 
    #     os.listdir(base)
    # ))
    image_paths += list(map(
        lambda img: base + '/' + img,
        os.listdir(base)
    ))
    labels += list(map(
        lambda img: int(img.split('.')[0].split('-')[1]), 
        os.listdir(base)
    ))
    

In [7]:
# get 2 images and labels
image_paths = []
labels = []

for video in videos[:2]:
    base = os.path.join(image_base, video.split('.')[0])
    # image_paths += list(map(
    #     lambda img: os.path.join(base, img), 
    #     os.listdir(base)
    # ))
    image_paths += list(map(
        lambda img: base + '/' + img,
        os.listdir(base)
    ))
    labels += list(map(
        lambda img: int(img.split('.')[0].split('-')[1]), 
        os.listdir(base)
    ))
    

In [8]:
def sort_images(x):
    vid = int(x[0].split('_')[-1].split('/')[0])
    frame = int(x[0].split('/')[-1].split('-')[0])
    return vid*7200 + frame

image_paths_lstm = []
labels_lstm = []
for path,label in sorted(zip(image_paths, labels), key=sort_images):
    image_paths_lstm.append(path)
    labels_lstm.append(label)

In [45]:
# Training parameters LSTM
LR = 1e-3
BATCH_SIZE = 3
EPOCHS = 5
TRAIN_SIZE = int(0.7 * len(image_paths))
TEST_SIZE = len(image_paths) - TRAIN_SIZE

In [10]:
# number of labels
num_labels = 14

# define transforms
data_transform = {
    "train": transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485], std=[0.229])
    ])
}

In [50]:
class SVRC(nn.Module):
    def __init__(self):
        super(SVRC,self).__init__()
        # ResNet-18
        self.resnet18 = nn.Sequential(*(
            list(
                models.resnet18(pretrained=True).children()
            )[:-1]
        ))
        #self.resnet18.eval()
        self.pretrain = True
        # LSTM
        self.lstm = nn.LSTM(512,512)
        self.lstm_states = None
        # FC
        self.full = nn.Linear(512,num_labels)
        
    def forward(self,x):
        x = self.resnet18(x)
        # Reshape
        #print(x.shape)
        if not self.pretrain:
            x = x.view(3,1,-1) # time step, batch size
            x,s = self.lstm(x, self.lstm_states)
            # save lstm states
            self.lstm_states = (s[0].detach(), s[1].detach())
            
        x = self.full(x.view(-1,512))
        return x #if self.pretrain else nn.Softmax(1)(x).view(30,-1)

In [25]:
class SVRCDataset(Dataset):
    def __init__(self, image_path: list, image_class: list, transform=None):
        self.image_path = image_path
        self.image_class = image_class
        self.transform = transform

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, item): #can add more rules to pick data
        img = Image.open(self.image_path[item])
        label = self.image_class[item]
        if self.transform is not None:
            img = self.transform(img)

        return {'feature': img, 'label': label}

In [53]:
class ResnetTrainVal(object):
    def __init__(self, model, device) -> None:
        self.model = model
        self.device = device
        self.optimizer = optim.Adam(self.model.parameters(), lr=LR)
        self.criterion = nn.CrossEntropyLoss()

    def train(self, labels, features, transform):
        print('Training ResNet: ')

        dataset = SVRCDataset(features, labels, transform)
        train, test = random_split(dataset, [TRAIN_SIZE, TEST_SIZE])
        print(len(train))
        train_loader = DataLoader(train, BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test, BATCH_SIZE, shuffle=True)

        self.model.pretrain = True

        for epoch in range(EPOCHS):
            self.model.train()

            train_loss = 0.0
            train_acc = 0.0

            for i, data in enumerate(train_loader):
            
                features = data['feature'].float()
                labels = data['label']
            
                # features  = data['feature'].float()
                # labels = data['label']
                features, labels = features.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                predictions = self.model(features)
                loss = self.criterion(predictions, labels)
                loss.backward()
                self.optimizer.step()

                train_loss += loss.item()
                preds = torch.max(predictions.data, 1)[1]
                train_acc += (preds==labels).sum().item()
            
            train_loss /= len(train)
            train_acc /= len(train)

            valid_loss = 0.0
            valid_acc = 0.0
            total = 0
            self.model.eval()
            for i, data in enumerate(test_loader):
                features = data['feature']
                labels = data['label']

                features, labels = features.to(self.device), labels.to(self.device)
                predictions = self.model(features)
                loss = self.criterion(predictions,labels)
                valid_loss += loss.item()

                preds = torch.max(predictions.data, 1)[1]
                valid_acc += (preds==labels).sum().item()
                total += features.size(0)

            valid_loss /= len(test)
            valid_acc /= len(test)

            print(
                f'Epoch {epoch+1} Training Loss: {train_loss} Train_acc: {train_acc}'
                f'|| Validation Loss: {valid_loss} Valid_acc: {valid_acc}'
            )

In [54]:
class LstmTrainVal(object):
    def __init__(self, model,device) -> None:
        self.model = model
        self.device = device
        self.optimizer = optim.Adam(self.model.parameters(), lr=LR)
        self.criterion = nn.CrossEntropyLoss()

    def train(self, labels, features, transform, eval_intval=5):
        dataset = SVRCDataset(features, labels, transform)
        data_loader = DataLoader(
            dataset, batch_sampler=BatchSampler(
                SequentialSampler(dataset), 
                BATCH_SIZE, 
                drop_last=True
            )
        )

        self.model.pretrain = False

        for epoch in range(EPOCHS):
            if (epoch + 1) % eval_intval == 0:
                self.model.eval()
            else:
                self.model.lstm.train()
                self.model.full.train()

            train_loss = 0.0
            train_acc = 0.0

            for i, data in enumerate(data_loader):
                features  = data['feature'].float()
                
                labels = data['label']
                features, labels = features.to(self.device), labels.to(self.device)
                predictions = self.model(features)
                loss = self.criterion(predictions, labels)

                if not (epoch + 1) % eval_intval == 0:
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                train_loss += loss.item()
                preds = torch.max(predictions.data, 1)[1]
                train_acc += (preds==labels).sum().item()

            train_loss /= len(dataset)
            train_acc /= len(dataset)

            print('Epoch {} - {} Loss: {} Acc: {} LSTM'.format(
                epoch+1, 'Train' if not (epoch + 1) % eval_intval == 0 else 'Valid', 
                train_loss, train_acc
            ))

In [48]:
# Weights
WeightsPath = './models/weights_resnet18'

In [55]:
# Data
X = image_paths
y = labels

# resnet18 Model
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
model = SVRC()
if torch.cuda.is_available:
    model.to(device)

start_time = time.time()

model.pretrain = True
trainer = ResnetTrainVal(model, device)
trainer.train(y, X, data_transform['train'])

torch.save(model.state_dict(),WeightsPath+'1')

end_time = time.time()
print('Time:{:.2}min'.format((end_time-start_time)/60.0))

Training ResNet: 
3615
Epoch 1 Training Loss: 0.8077436924640239 Train_acc: 0.244536652835408|| Validation Loss: 0.7426833113547294 Valid_acc: 0.26064516129032256
Epoch 2 Training Loss: 0.7773627837663865 Train_acc: 0.22461964038727525|| Validation Loss: 0.7359198786750917 Valid_acc: 0.2496774193548387
Epoch 3 Training Loss: 0.760970380873436 Train_acc: 0.2359612724757953|| Validation Loss: 1.3449061864422214 Valid_acc: 0.24129032258064517


KeyboardInterrupt: 

In [78]:
# Data
X = image_paths_lstm
y = labels_lstm
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
# SVRC Model
model = SVRC()
if torch.cuda.is_available:
    model.to(device)
#print(model)

model.pretrain = False
model.load_state_dict(torch.load(WeightsPath+'1'))

start_time = time.time()

trainer = LstmTrainVal(model, device)
trainer.train(y, X, data_transform['train'])

end_time = time.time()
print('Time:{:.2}min'.format((end_time-start_time)/60.0))

Epoch 1 - Train Loss: 0.09230332366694154 Acc: 0.3035078800203355
Epoch 2 - Train Loss: 0.09115437841318438 Acc: 0.17386883579054396
Epoch 3 - Train Loss: 0.082175728386286 Acc: 0.13726487036095578
Epoch 4 - Train Loss: 0.07780717367076244 Acc: 0.13726487036095578
Epoch 5 - Valid Loss: 0.06773507595062256 Acc: 0.2846975088967972
Time:3.0min
