Testing

In [38]:
# imports
import torch
from torch.utils.data import Dataset, DataLoader, random_split, SequentialSampler, RandomSampler, BatchSampler
from torchvision import models, transforms
from torch import nn, optim

from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from utils.clean_labels import clean_labels
from utils.prepare_images import prepare_images
from utils.build_dataset import SVRCDataset

import os
from datetime import datetime
import random
import time

%load_ext autoreload
%autoreload 2
print(torch.version.cuda)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
11.3


In [39]:
# put videos here!
# video_base = 'data/videos'
video_base = 'D:/e6691/6691_assignment2/videos'
videos = os.listdir(video_base)
# images will be output to here
# image_base = 'data/images'
image_base = 'D:/e6691/6691_assignment2/images'
if not os.path.exists(image_base):
    os.mkdir(image_base)

In [40]:
# command line: 
# ffmpeg -i {input_video} -r {frame_rate} [-f {force_format} (not needed)] {output_images}
# doc: https://ffmpeg.org/ffmpeg.html
for video in videos:
    input_path = os.path.join(video_base, video)
    # make dirs
    output_base = image_base + '/{}'.format(video.split('.')[0])
    if not os.path.exists(output_base):
        os.mkdir(output_base)
    output_path = os.path.join(output_base, '%d.png')
    # # command
    # print('Frames extracted from {} to {}'.format(input_path, output_path))
    # !ffmpeg -i {input_path} -r 1 {output_path}

In [41]:
# labels_path = 'data/labels/video.phase.trainingData.clean.StudentVersion.csv'
labels_path = 'D:/e6691/6691_assignment2/labels/video.phase.trainingData.clean.StudentVersion.csv'
# names_path = 'data/labels/names.csv'
names_path = 'D:/e6691/6691_assignment2/labels/names.csv'
labels_df = pd.read_csv(labels_path)
names_df = pd.DataFrame({'Name': list(set(labels_df['PhaseName'].to_list()))})

In [5]:
prepare_images(video_base, image_base, labels_df, names_df, 'png')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [46]:
# get all images and labels
image_paths = []
labels = []

for video in videos[:]:
    base = os.path.join(image_base, video.split('.')[0])
    # image_paths += list(map(
    #     lambda img: os.path.join(base, img), 
    #     os.listdir(base)
    # ))
    image_paths += list(map(
        lambda img: base + '/' + img,
        os.listdir(base)
    ))
    labels += list(map(
        lambda img: int(img.split('.')[0].split('-')[1]), 
        os.listdir(base)
    ))
    

In [45]:
# get 2 images and labels
image_paths = []
labels = []

for video in videos[:2]:
    base = os.path.join(image_base, video.split('.')[0])
    # image_paths += list(map(
    #     lambda img: os.path.join(base, img), 
    #     os.listdir(base)
    # ))
    image_paths += list(map(
        lambda img: base + '/' + img,
        os.listdir(base)
    ))
    labels += list(map(
        lambda img: int(img.split('.')[0].split('-')[1]), 
        os.listdir(base)
    ))
    

In [51]:
def sort_images(x):
    vid = int(x[0].split('_')[-1].split('/')[0])
    frame = int(x[0].split('/')[-1].split('-')[0])
    return vid*7200 + frame

image_paths_lstm = []
labels_lstm = []
for path,label in sorted(zip(image_paths, labels), key=sort_images):
    image_paths_lstm.append(path)
    
    labels_lstm.append(label)

In [52]:
# Training parameters LSTM

TRAIN_SIZE = int(0.7 * len(image_paths))
TEST_SIZE = len(image_paths) - TRAIN_SIZE

In [53]:
# number of labels
num_labels = 14

# define transforms
data_transform = {
    "train": transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485], std=[0.229])
    ])
    }

In [54]:
class SVRC(nn.Module):
    def __init__(self):
        super(SVRC,self).__init__()
        # ResNet-18
        self.resnet18 = nn.Sequential(*(
            list(
                models.resnet18(pretrained=True).children()
            )[:-1]
        ))
        #self.resnet18.eval()
        self.pretrain = True
        # LSTM
        self.lstm = nn.LSTM(512,512)
        self.lstm_states = None
        # FC
        self.full = nn.Linear(512,num_labels)
        
    def forward(self,x):
        x = self.resnet18(x)
        # Reshape
        #print(x.shape)
        if not self.pretrain:
            x = x.view(3,1,-1) # time step, batch size
            x,s = self.lstm(x, self.lstm_states)
            # save lstm states
            self.lstm_states = (s[0].detach(), s[1].detach())
            
        x = self.full(x.view(-1,512))
        return x #if self.pretrain else nn.Softmax(1)(x).view(30,-1)
    def predict(self, features, labels, BATCH_SIZE, transform):
        self.eval()
        dataset = SVRCDataset(features, labels, transform)
        loader = DataLoader(
            dataset, batch_sampler=BatchSampler(
                SequentialSampler(dataset), 
                BATCH_SIZE, 
                drop_last=True
            )
        )
        
        test_acc = 0.0
        predicts = []
        for i, data in enumerate(loader):
            features = data['feature'].float()
            labels = data['label']
            predictions = self.forward(features)
            preds = torch.max(predictions.data, 1)[1]
            predicts.append(preds)
            test_acc += (preds == labels).sum().item()
        test_acc /= len(dataset)
        print(f'test_acc:{test_acc}')
        return predicts

In [73]:
class SVRCDataset(Dataset):
    def __init__(self, image_path: list, image_class: list=None, transform=None):
        self.image_path = image_path
        self.image_class = image_class
        self.transform = transform

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, item): #can add more rules to pick data
        img = Image.open(self.image_path[item])
        if self.image_class is not None:
            label = self.image_class[item]
        if self.transform is not None:
            img = self.transform(img)

        return {'feature': img, 'label': label} if self.image_class is not None else {'feature': img}

In [56]:
class ResnetTrainVal(object):
    def __init__(self, model, device, EPOCH, BATCH_SIZE, LR) -> None:
        self.model = model
        self.device = device
        self.EPOCH = EPOCH
        self.BATCH_SIZE = BATCH_SIZE
        self.LR = LR
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.LR)
        self.criterion = nn.CrossEntropyLoss()

    def train(self, labels, features, transform):
        print('Training ResNet: ')

        dataset = SVRCDataset(features, labels, transform)
        train, test = random_split(dataset, [TRAIN_SIZE, TEST_SIZE])
        print(len(train))
        train_loader = DataLoader(train, self.BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test, self.BATCH_SIZE, shuffle=True)

        self.model.pretrain = True

        for epoch in range(self.EPOCH):
            self.model.train()

            train_loss = 0.0
            train_acc = 0.0

            for i, data in enumerate(train_loader):
            
                features = data['feature'].float()
                labels = data['label']
            
                # features  = data['feature'].float()
                # labels = data['label']
                features, labels = features.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                predictions = self.model(features)
                loss = self.criterion(predictions, labels)
                loss.backward()
                self.optimizer.step()

                train_loss += loss.item()
                preds = torch.max(predictions.data, 1)[1]
                train_acc += (preds==labels).sum().item()
            
            train_loss /= len(train)
            train_acc /= len(train)

            valid_loss = 0.0
            valid_acc = 0.0
            total = 0
            self.model.eval()
            for i, data in enumerate(test_loader):
                features = data['feature']
                labels = data['label']

                features, labels = features.to(self.device), labels.to(self.device)
                predictions = self.model(features)
                loss = self.criterion(predictions,labels)
                valid_loss += loss.item()

                preds = torch.max(predictions.data, 1)[1]
                valid_acc += (preds==labels).sum().item()
                total += features.size(0)

            valid_loss /= len(test)
            valid_acc /= len(test)

            print(
                f'Epoch {epoch+1} Training Loss: {train_loss} Train_acc: {train_acc}'
                f'|| Validation Loss: {valid_loss} Valid_acc: {valid_acc}'
            )

In [57]:
class LstmTrainVal(object):
    def __init__(self, model,device, EPOCH, BATCH_SIZE, LR) -> None:
        self.model = model
        self.device = device
        self.EPOCH = EPOCH
        self.BATCH_SIZE = BATCH_SIZE
        self.LR = LR
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.LR)
        self.criterion = nn.CrossEntropyLoss()

    def train(self, labels, features, transform, eval_intval=3):
        dataset = SVRCDataset(features, labels, transform)
        data_loader = DataLoader(
            dataset, batch_sampler=BatchSampler(
                SequentialSampler(dataset), 
                self.BATCH_SIZE, 
                drop_last=True
            )
        )

        self.model.pretrain = False

        for epoch in range(self.EPOCH):
            if (epoch + 1) % eval_intval == 0:
                self.model.eval()
            else:
                self.model.lstm.train()
                self.model.full.train()

            train_loss = 0.0
            train_acc = 0.0

            for i, data in enumerate(data_loader):
                features  = data['feature'].float()
                
                labels = data['label']
                features, labels = features.to(self.device), labels.to(self.device)
                predictions = self.model(features)
                loss = self.criterion(predictions, labels)

                if not (epoch + 1) % eval_intval == 0:
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                train_loss += loss.item()
                preds = torch.max(predictions.data, 1)[1]
                train_acc += (preds==labels).sum().item()

            train_loss /= len(dataset)
            train_acc /= len(dataset)

            print('Epoch {} - {} Loss: {} Acc: {} LSTM'.format(
                epoch+1, 'Train' if not (epoch + 1) % eval_intval == 0 else 'Valid', 
                train_loss, train_acc
            ))

In [58]:
# Weights
WeightsPath = './models/weights_resnet18'

In [59]:
# Data
X = image_paths
y = labels

# resnet18 Model
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
model = SVRC()
if torch.cuda.is_available:
    model.to(device)

start_time = time.time()

model.pretrain = True
trainer = ResnetTrainVal(model, device, 5, 30, 1e-3)
trainer.train(y, X, data_transform['train'])

torch.save(model.state_dict(),WeightsPath+'_70')

end_time = time.time()
print('Time:{:.2}min'.format((end_time-start_time)/60.0))

Training ResNet: 
150539
Epoch 1 Training Loss: 0.024475088597899253 Train_acc: 0.7605404579544172|| Validation Loss: 0.01592618384509064 Valid_acc: 0.8422455748783285
Epoch 2 Training Loss: 0.013247700232216423 Train_acc: 0.8697613243079866|| Validation Loss: 0.010955648369718301 Valid_acc: 0.8920766297777365
Epoch 3 Training Loss: 0.009097111920864289 Train_acc: 0.9110928065152551|| Validation Loss: 0.010903670767681848 Valid_acc: 0.9068167023156328
Epoch 4 Training Loss: 0.00716468797997593 Train_acc: 0.9294601398973024|| Validation Loss: 0.007357738582379434 Valid_acc: 0.9282215815741344
Epoch 5 Training Loss: 0.005694102683291168 Train_acc: 0.9441473638060569|| Validation Loss: 0.007043369710279112 Valid_acc: 0.9330574413342013
Time:2.2e+02min


In [64]:
#dataset for test
image_paths = []
labels = []

for video in videos[50:70]:
    base = os.path.join(image_base, video.split('.')[0])
    # image_paths += list(map(
    #     lambda img: os.path.join(base, img), 
    #     os.listdir(base)
    # ))
    image_paths += list(map(
        lambda img: base + '/' + img,
        os.listdir(base)
    ))
    labels += list(map(
        lambda img: int(img.split('.')[0].split('-')[1]), 
        os.listdir(base)
    ))

In [65]:
# dataset for test of ResNet
def sort_images(x):
    vid = int(x[0].split('_')[-1].split('/')[0])
    frame = int(x[0].split('/')[-1].split('-')[0])
    return vid*7200 + frame

image_paths_test = []
labels_test = []
for path,label in sorted(zip(image_paths, labels), key=sort_images):
    image_paths_test.append(path)
    labels_test.append(label)

In [66]:
WeightsPath = './models/weights_resnet18_70'
X = image_paths_test
y = labels_test
model = SVRC()
model.pretrain = True
model.load_state_dict(torch.load(WeightsPath))
a = model.predict(X, y, 30, transform = data_transform['train'])


test_acc:0.9575570223076281


In [69]:
# Data
WeightsPath = './models/weights_resnet18_70'
X = image_paths_lstm
y = labels_lstm
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')
# SVRC Model
model = SVRC()
if torch.cuda.is_available():
    model.to(device)

model.pretrain = False
model.load_state_dict(torch.load(WeightsPath))

start_time = time.time()

trainer = LstmTrainVal(model, device, 20, 3, 1e-5)
trainer.train(y, X, data_transform['train'])
torch.save(model.state_dict(),WeightsPath+'_LSTM')
end_time = time.time()
print('Time:{:.2}min'.format((end_time-start_time)/60.0))

Epoch 1 - Train Loss: 0.3513061202120706 Acc: 0.6672370580822758 LSTM
Epoch 2 - Train Loss: 0.22994594864438234 Acc: 0.7781425389547887 LSTM
Epoch 3 - Train Loss: 0.17711631583863016 Acc: 0.8305472502638835 LSTM
Epoch 4 - Train Loss: 0.14286621200371366 Acc: 0.8634966543753516 LSTM
Epoch 5 - Valid Loss: 1.6568060865719982 Acc: 0.06767043156000502 LSTM
Epoch 6 - Train Loss: 0.13071372540655407 Acc: 0.8784601291750559 LSTM
Epoch 7 - Train Loss: 0.02568127496087503 Acc: 0.9765132034762877 LSTM
Epoch 8 - Train Loss: 0.01870487627525775 Acc: 0.9830696048024478 LSTM
Epoch 9 - Train Loss: 0.014655133675175227 Acc: 0.9870452949683107 LSTM
Epoch 10 - Valid Loss: 0.013645206177535833 Acc: 0.9879659811119843 LSTM
Epoch 11 - Train Loss: 0.011677250751410293 Acc: 0.9900026504601106 LSTM
Epoch 12 - Train Loss: 0.009359580709360157 Acc: 0.9922857660992203 LSTM
Epoch 13 - Train Loss: 0.007874114600798314 Acc: 0.9938062932152871 LSTM
Epoch 14 - Train Loss: 0.006684266908701929 Acc: 0.9946665302687194 L

In [71]:
WeightsPath = './models/weights_resnet18_70_LSTM'
X = image_paths_test
y = labels_test
model = SVRC()
model.pretrain = False
model.load_state_dict(torch.load(WeightsPath))
a = model.predict(X, y, 3, transform = data_transform['train'])

test_acc:0.9969922299273122


In [74]:
class ResnetEvaluator:
    def __init__(self, model, device):
        self.model = model
        self.device = device

    def predict(self, images, transform):
        dataset = SVRCDataset(images, None, transform)
        loader = DataLoader(dataset)
        preds = []
        self.model.eval()
        for i,data in enumerate(loader):
            feature = data['feature'].float().to(self.device)
            pred = torch.max(self.model(feature).data, 1)[1]
            preds.append(pred)
        return preds

    def eval(self, preds, labels):
        acc = sum([p.item() == l for p,l in zip(preds, labels)]) / len(labels)
        print('Accuracy: {}'.format(acc))
        return acc

In [75]:
# Data
WeightsPath = './models/weights_resnet18_70'
X = image_paths_test
y = labels_test
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# SVRC Model
model = SVRC()
if torch.cuda.is_available():
    model.to(device)
#print(model)

model.pretrain = True
model.load_state_dict(torch.load(WeightsPath), strict=False)

start_time = time.time()

evaluator = ResnetEvaluator(model, device)
preds = evaluator.predict(X, data_transform['train'])
acc = evaluator.eval(preds, y)

end_time = time.time()
print('Time:{:.2}min'.format((end_time-start_time)/60.0))

Accuracy: 0.9578494443980282
Time:1.8e+01min
