## Import libraries

In [2]:
import pytorchvideo
from torchvision.datasets.video_utils import VideoClips
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, TensorDataset
# from torchsampler import ImbalancedDatasetSampler

# import ResNet

import cv2

import pandas as pd
import numpy as np
import os
import time
import pickle
import json
import random
import urllib
import copy
import matplotlib.pyplot as plt

from data_loader import VideoDataset
# from Utils import do_epoch
from Utils.metric import *
# from Utils.utility import *

import myutils

from torchvideotransforms import video_transforms, volume_transforms

In [3]:
import gc
gc.collect()
torch.cuda.empty_cache()

## Import data and transformation

In [4]:
# from pytorchvideo.data.encoded_video import EncodedVideo
from torchvision import models

# from torchvision.transforms._transforms_video import (
#     CenterCropVideo,
#     NormalizeVideo,
# )

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    ShortSideScale,
    UniformTemporalSubsample,
    AugMix,
    create_video_transform
)

from torchvision.transforms import (
    Compose,
    RandomCrop,
    RandomHorizontalFlip,
    ToTensor
)

In [5]:
import random
import numpy as np

# set seed
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
# torch.backends.cudnn.benchmark = False
# torch.backends.cudnn.deterministic = True #turning this on might result in CNN underfitting?

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [7]:
side_size = 256
crop_size = 224
num_frames = 20
frames_per_second = 30
epochs = 500
batch_size = 4

In [None]:
# NOT IN USE
# train_transform = create_video_transform(num_samples=num_frames, max_size=side_size, crop_size=crop_size, mode='train')
# test_transform = create_video_transform(num_samples=num_frames, max_size=side_size, crop_size=crop_size, mode='val')

In [8]:
'''
video_transforms.Resize kind of perform scaling based on shorter side.
'''

train_transform = Compose([
            video_transforms.Resize((side_size, side_size)),
            video_transforms.CenterCrop(crop_size),
            volume_transforms.ClipToTensor(),
            UniformTemporalSubsample(num_frames)
            ])


test_transform = Compose([
            video_transforms.Resize((side_size, side_size)),
            video_transforms.CenterCrop(crop_size),
            volume_transforms.ClipToTensor(),
            UniformTemporalSubsample(num_frames)
            ])  


In [9]:
train_df = pd.read_csv('train_final.csv')
test_df = pd.read_csv('test1_final.csv')
robust_df = pd.read_csv('test2_final.csv')
print("train size:", len(train_df))
print("test size:", len(test_df))
print("robust test size:", len(robust_df))

train size: 3304
test size: 826
robust test size: 203


In [10]:
'''
Weighted sampling
'''
# # class weighting based on binary label
labels_unique, counts = np.unique(train_df['label'], return_counts=True)
class_weights = [sum(counts) / c for c in counts]

# # assign weight to each input sample (based on binary label)
sample_weights = [class_weights[i] for i in train_df['label']]


g = torch.Generator()
g.manual_seed(42)


# # weighted sampling without oversample/downsample
# train_sampler = WeightedRandomSampler(sample_weights, len(train_df))


# oversampling
train_sampler = WeightedRandomSampler(sample_weights, len(train_df['label']) + (len(train_df[train_df['label']==0]) - len(train_df[train_df['label']==1])), generator=g)

# # downsampling
# train_sampler = WeightedRandomSampler(sample_weights, len(train_df[train_df['label']==1])*2)

'\nfor downsampling\n'

In [11]:
clip_base_path = 'D:/Dataset_BENG/short_clips/all_short_clips'
train_dataset = VideoDataset(video_clip_path=os.path.join(clip_base_path, 'train_final.pkl'), dataframe=train_df, video_transform=train_transform)
test_dataset = VideoDataset(video_clip_path=os.path.join(clip_base_path, 'test1_final.pkl'), dataframe=test_df, video_transform=test_transform)
robust_dataset = VideoDataset(video_clip_path=os.path.join(clip_base_path, 'test2_final.pkl'), dataframe=robust_df, video_transform=test_transform)

In [12]:
# oversampling only for train data loader
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, pin_memory=True, num_workers=6, drop_last=False)
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=6, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, drop_last=False)
robust_test_loader = DataLoader(robust_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=6, drop_last=False)

In [None]:
'''
view the video frame
'''
plt.imshow(train_loader.dataset[831][0][0].permute(1,2,0))

# Models

### ResNet34-LSTM: Predict at every frame
**flattening vs. global avg pooling vs. global max pooling**

In [None]:
class Resnt34LSTM(nn.Module):
    def __init__(self, params_model):
        super(Resnt34LSTM, self).__init__()
        num_classes = params_model["num_classes"]
#         dr_rate= params_model["dr_rate"]
        pretrained = params_model["pretrained"]
        self.rnn_hidden_size = params_model["rnn_hidden_size"]
        rnn_num_layers = params_model["rnn_num_layers"]
        attention = params_model["attention"]
        
        
        baseModel = models.resnet34(pretrained=pretrained)

        '''
        # as feature extractor, no finetuning during backpropogation
#         for param in baseModel.parameters():
#             param.requires_grad = False
        '''        


        num_features = baseModel.fc.in_features
#         baseModel.avgpool = nn.AdaptiveMaxPool2d(output_size=(1,1)) # uncomment this for GLOBAL MAX POOLING
#         baseModel.avgpool = Identity() # uncomment this for GLOBAL AVG POOLING
        baseModel.fc = Identity()
        self.baseModel = baseModel
#         self.dropout= nn.Dropout(dr_rate)
#         self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers) # uncomment this for GLOBAL MAX & AVG POOLING
        self.rnn = nn.LSTM(25088, self.rnn_hidden_size, rnn_num_layers) # uncomment this for flattening
        
        # prediction at every frame
        self.fc1 = nn.Linear(self.rnn_hidden_size, 1)
        self.fc2 = nn.Linear(20, 2)

    def forward(self, x):
        b_z, ts, c, h, w = x.shape
        
        ii = 0
        y = self.baseModel((x[:,ii]))
        output, (hn, cn) = self.rnn(y.unsqueeze(0))
        
        
        # prediction at every frame
        rnn_pred_cat = self.fc1(output.view(b_z, -1))
        
        for ii in range(1, ts):
            y = self.baseModel((x[:,ii]))
            out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            
            # prediction at every frame
            rnn_pred = self.fc1(out.view(b_z, -1))
            rnn_pred_cat = torch.cat((rnn_pred_cat, rnn_pred), dim=1)
    
               
        # prediction at every frame
        final_out = self.fc2(rnn_pred_cat.view(b_z, -1))
   
        return final_out 
    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x    

### ResNet34-LSTM: Predict using LAST HIDDEN STATE
**flattening vs. global avg pooling vs. global max pooling**

In [None]:
class Resnt34LSTM(nn.Module):
    def __init__(self, params_model):
        super(Resnt34LSTM, self).__init__()
        num_classes = params_model["num_classes"]
#         dr_rate= params_model["dr_rate"]
        pretrained = params_model["pretrained"]
        self.rnn_hidden_size = params_model["rnn_hidden_size"]
        rnn_num_layers = params_model["rnn_num_layers"]
        attention = params_model["attention"]
        
        
        baseModel = models.resnet34(pretrained=pretrained)    


        num_features = baseModel.fc.in_features
#         baseModel.avgpool = nn.AdaptiveMaxPool2d(output_size=(1,1)) # uncomment this for GLOBAL MAX POOLING
#         baseModel.avgpool = Identity() # uncomment this for GLOBAL AVG POOLING
        baseModel.fc = Identity()
        self.baseModel = baseModel
#         self.dropout= nn.Dropout(dr_rate)
#         self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers) # uncomment this for GLOBAL MAX & AVG POOLING
        self.rnn = nn.LSTM(25088, self.rnn_hidden_size, rnn_num_layers) # uncomment this for flattening
        self.fc1 = nn.Linear(self.rnn_hidden_size, 2)


    def forward(self, x):
        b_z, ts, c, h, w = x.shape
        
        ii = 0
        y = self.baseModel((x[:,ii]))
        output, (hn, cn) = self.rnn(y.unsqueeze(0))
        
        
        for ii in range(1, ts):
            y = self.baseModel((x[:,ii]))
            out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))


        out = self.fc1(out)
        final_out = self.fc1(out.view(b_z, -1))
        
        return final_out 
    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x    

### ResNet34-LSTM: Predict using ALL HIDDEN STATES (no attention)
**flattening vs. global avg pooling vs. global max pooling**

In [None]:
class Resnt34LSTM(nn.Module):
    def __init__(self, params_model):
        super(Resnt34LSTM, self).__init__()
        num_classes = params_model["num_classes"]
#         dr_rate= params_model["dr_rate"]
        pretrained = params_model["pretrained"]
        self.rnn_hidden_size = params_model["rnn_hidden_size"]
        rnn_num_layers = params_model["rnn_num_layers"]
        attention = params_model["attention"]
        
        
        baseModel = models.resnet34(pretrained=pretrained)    

        num_features = baseModel.fc.in_features
#         baseModel.avgpool = nn.AdaptiveMaxPool2d(output_size=(1,1)) # uncomment this for GLOBAL MAX POOLING
#         baseModel.avgpool = Identity() # uncomment this for GLOBAL AVG POOLING
        baseModel.fc = Identity()
        self.baseModel = baseModel
#         self.dropout= nn.Dropout(dr_rate)
#         self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers) # uncomment this for GLOBAL MAX & AVG POOLING
        self.rnn = nn.LSTM(25088, self.rnn_hidden_size, rnn_num_layers) # uncomment this for flattening
        self.fc1 = nn.Linear(self.rnn_hidden_size*20, 2)


    def forward(self, x):
        b_z, ts, c, h, w = x.shape
        
        ii = 0
        y = self.baseModel((x[:,ii]))
        output, (hn, cn) = self.rnn(y.unsqueeze(0))

        
        for ii in range(1, ts):
            y = self.baseModel((x[:,ii]))
            out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            # all hidden states
            output = torch.cat((output.view(b_z,-1,self.rnn_hidden_size), out.view(b_z,-1,self.rnn_hidden_size)), dim=1)


# output = self.dropout(output.view(b_z, -1))


        # prediction using all HS
        final_out = self.fc1(output.view(b_z, -1))
        
        return final_out 
    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x    

### ResNet34-LSTM: Predict using ALL HIDDEN STATES (WITH attention)
**flattening vs. global avg pooling vs. global max pooling**

In [13]:
'''
Attention module used in LSTM
'''
class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()

        self.attn_1 = nn.Linear(feature_dim, feature_dim)
        self.attn_2 = nn.Linear(feature_dim, 1)

        # inititalize
        nn.init.xavier_uniform_(self.attn_1.weight)
        nn.init.xavier_uniform_(self.attn_2.weight)
        self.attn_1.bias.data.fill_(0.0)
        self.attn_2.bias.data.fill_(0.0)

    def forward(self, x):
        """
        Input x is encoder output
        return_attention decides whether to return
        attention scores over the hidden states
        """
#         frame_length = x.shape[1]

        self_attention_scores = self.attn_2(torch.tanh(self.attn_1(x)))
        
        return self_attention_scores

In [None]:
class Resnt34LSTM(nn.Module):
    def __init__(self, params_model):
        super(Resnt34LSTM, self).__init__()
        num_classes = params_model["num_classes"]
#         dr_rate= params_model["dr_rate"]
        pretrained = params_model["pretrained"]
        self.rnn_hidden_size = params_model["rnn_hidden_size"]
        rnn_num_layers = params_model["rnn_num_layers"]
        attention = params_model["attention"]

        
        baseModel = models.resnet34(pretrained=pretrained)      
        num_features = baseModel.fc.in_features
#         baseModel.avgpool = nn.AdaptiveMaxPool2d(output_size=(1,1))
        baseModel.avgpool = Identity() #flattening
        baseModel.fc = Identity()
        self.baseModel = baseModel
#         self.dropout= nn.Dropout(dr_rate)
        if attention:
            self.attention = Attention(self.rnn_hidden_size)
        
#         self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers) # uncomment this for GLOBAL MAX & AVG POOLING
        self.rnn = nn.LSTM(25088, self.rnn_hidden_size, rnn_num_layers)
        self.fc1 = nn.Linear(self.rnn_hidden_size*20, num_classes)

#     def forward(self, x, coord_seq):
    def forward(self, x):        
        b_z, ts, c, h, w = x.shape
        
        ii = 0
        y = self.baseModel((x[:,ii]))
        output, (hn, cn) = self.rnn(y.unsqueeze(0))
        
        for ii in range(1, ts):
            y = self.baseModel((x[:,ii]))
            out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            output = torch.cat((output.view(b_z,-1,self.rnn_hidden_size), out.view(b_z,-1,self.rnn_hidden_size)), dim=1)

       
        '''
        temporal attention to all hidden states
        '''
        if self.attention is not None:
            out_att_score = self.attention(output)
            out_att_score_sftmx = F.softmax(out_att_score, dim=1)
            out_agg_att = out_att_score_sftmx * output
            final_out = self.fc1(out_agg_att.view(b_z, -1)) #return all attended hidden states

        return final_out 
    
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x    

### VGG16-LSTM (baseline)

In [None]:
# class VGG16LSTM(nn.Module):
#     def __init__(self, params_model):
#         super(VGG16LSTM, self).__init__()
#         num_classes = params_model["num_classes"]
# #         dr_rate= params_model["dr_rate"]
#         pretrained = params_model["pretrained"]
#         self.rnn_hidden_size = params_model["rnn_hidden_size"]
#         rnn_num_layers = params_model["rnn_num_layers"]
#         attention = params_model["attention"]
        
#         baseModel = models.vgg16(pretrained=pretrained)
#         num_features = baseModel.classifier[0].in_features
#         baseModel.avgpool = Identity()
#         baseModel.classifier = Identity()
#         self.baseModel = baseModel
# #         self.dropout= nn.Dropout(dr_rate)
#         self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers)
#         self.fc1 = nn.Linear(self.rnn_hidden_size, 1)
#         self.fc2 = nn.Linear(20, 2)
        
#     def forward(self, x):
#         b_z, ts, c, h, w = x.shape
        
#         ii = 0
#         y = self.baseModel((x[:,ii]))
#         output, (hn, cn) = self.rnn(y.unsqueeze(0))
        
#         # prediction for every frame
#         rnn_pred_cat = self.fc1(output.view(b_z, -1))
        
#         for ii in range(1, ts):
#             y = self.baseModel((x[:,ii]))
#             out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            
#             # prediction for every frame
#             rnn_pred = self.fc1(out.view(b_z, -1))
#             rnn_pred_cat = torch.cat((rnn_pred_cat, rnn_pred), dim=1)

#         final_out = self.fc2(rnn_pred_cat.view(b_z, -1))
            
#         return final_out

    
# class Identity(nn.Module):
#     def __init__(self):
#         super(Identity, self).__init__()
#     def forward(self, x):
#         return x    

### InceptionV3-LSTM (baseline)

In [None]:
# class InceptionV3(nn.Module):
#     def __init__(self, params_model):
#         super(InceptionV3, self).__init__()
#         num_classes = params_model["num_classes"]
# #         dr_rate= params_model["dr_rate"]
#         pretrained = params_model["pretrained"]
#         self.rnn_hidden_size = params_model["rnn_hidden_size"]
#         rnn_num_layers = params_model["rnn_num_layers"]
#         attention = params_model["attention"]
        
#         baseModel = models.inception_v3(pretrained=pretrained)
#         num_features = baseModel.fc.in_features
#         baseModel.avgpool = Identity()
#         baseModel.dropout = nn.Dropout(0.0)
#         baseModel.fc = Identity()
#         baseModel.aux_logits = False
#         self.baseModel = baseModel
# #         self.dropout= nn.Dropout(dr_rate)
#         self.rnn = nn.LSTM(131072, self.rnn_hidden_size, rnn_num_layers)
#         self.fc1 = nn.Linear(self.rnn_hidden_size, 1)
#         self.fc2 = nn.Linear(20, 2)
        
#     def forward(self, x, coord_seq):
#         b_z, ts, c, h, w = x.shape
        
#         ii = 0
#         y = self.baseModel((x[:,ii]))
#         output, (hn, cn) = self.rnn(y.unsqueeze(0))
        
#         # prediction for every frame
#         rnn_pred_cat = self.fc1(output.view(b_z, -1))
        
#         for ii in range(1, ts):
#             y = self.baseModel((x[:,ii]))
#             out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            
#             # prediction for every frame
#             rnn_pred = self.fc1(out.view(b_z, -1))
#             rnn_pred_cat = torch.cat((rnn_pred_cat, rnn_pred), dim=1)

#         final_out = self.fc2(rnn_pred_cat.view(b_z, -1))
            
#         return final_out

    
# class Identity(nn.Module):
#     def __init__(self):
#         super(Identity, self).__init__()
#     def forward(self, x):
#         return x    

<br>

<br>

### Model hyperparameters settings

In [None]:
params_model={
        "num_classes": 2,
#         "dr_rate": 0.5,
        "pretrained" : True,
        "rnn_num_layers": 1,
        "rnn_hidden_size": 60,
        "attention": True}
model = Resnt34LSTM(params_model)  

In [None]:
model = model.to(device)
model

In [None]:
lr = 0.01
# momentum = 0.9

# criterion
criterion = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss(reduction='sum')

# optimizer & learning rate schedulers
optimizer = optim.SGD(model.parameters(), lr=lr)
lr_schedulers = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.2, patience = 5)

In [None]:
model_type = "ResNet34_LSTM60_WSOS_flattening_attention"

### Log

In [None]:
from datetime import date, datetime

today = date.today().strftime("%Y%m%d")
print("Today's date:"+today)

In [None]:
def log(message):
#     with open(os.path.join(args.log_path, args.exp)+'.txt', 'a+') as logger:
    with open("D:/0_Graduation Thesis/training_log/"+ today + model_type + '.txt', 'a+') as logger:
        logger.write(f'{message}\n')

In [None]:
log(f"Date:{today}")
log(f"Model: {model_type}")
log(f"train set with len: {len(train_dataset)}")
log(f"test set with len: {len(test_dataset)}")
# log(f"ROBUST test set with len: {len(robust_dataset)}\n")

log(f"Oversample/Undersample training set with len: {train_loader.sampler.num_samples}\n")

# log(f'Explanation: {args.exp}')
# log(f"Lambda: {args.ld}")
log(f"Transformation applied on training set: {train_transform}")
log(f"Resize: {side_size}")
log(f"Centre crop size: {crop_size}")
# log(f"Uniform temporal sampled frames: {num_frames}\n")

log(f"Device: {device}")
log(f'n_epochs: {epochs}')
log(f'batch_size: {batch_size}\n')
log(f"Optimizer : {optimizer}")

# log(f'num_workers: {args.num_workers}')
# log(f'Early Stop Count: {esct}')
# log(f"Optimizer: SGD")
# log(f"Learning Rate: lr")

## Train & test

In [None]:
params_train={
    "num_epochs": epochs,
    "optimizer": optimizer,
    "loss_func": criterion,
    "train_dl": train_loader,
    "val_dl": test_loader,
    "robust_dl": robust_test_loader,
    "sanity_check": False,
    "lr_scheduler": lr_schedulers,
    "path2weights": "D:/0_Graduation Thesis/models/weights/"+model_type+"_"+today,
    "esct": 15,
    "path2cp": "D:/0_Graduation Thesis/models/checkpoints/"+model_type+"_"+today
    }
# model, loss_hist, metric_hist, pred_label_epoch= myutils.train_val(model,params_train,log)
model, loss_hist, metric_hist, pred_label_epoch, robust_pred_label = myutils.train_val(model,params_train,log)

In [None]:
'''
plot the loss & acc
'''
myutils.plot_loss(loss_hist, metric_hist)

In [None]:
'''
sort the metrics and print
'''

import math
# get sorted accuracy list
sorted_acc = sorted([x for x in metric_hist['val_acc'] if not math.isnan(x)])
sorted_acc_r = sorted([x for x in metric_hist['robust_acc'] if not math.isnan(x)])

# get sorted loss list
sorted_loss = sorted([x for x in loss_hist['val'] if not math.isnan(x)])

# get sorted MCC list
sorted_mcc = sorted([x for x in metric_hist['val_mcc'] if not math.isnan(x)])
sorted_mcc_r = sorted([x for x in metric_hist['robust_mcc'] if not math.isnan(x)])

print('%.4G / %.4G' % (sorted_loss[0], sorted_loss[-1]))

In [None]:
# get the original index of the element before sorting
sorted_acc_index = np.argsort(metric_hist['val_acc'])
sorted_loss_index = np.argsort(loss_hist['val'])
sorted_auc_index = np.argsort(metric_hist['val_auc'])
sorted_mcc_index = np.argsort(metric_hist['val_mcc'])

<br>

<br>

<br>

<br>

## Load and run the checkpoint / saved models
to check the wrongly predicted cases

In [None]:
from sklearn.metrics import recall_score, roc_auc_score, accuracy_score, precision_score, confusion_matrix, average_precision_score

In [43]:
class Attention(nn.Module):
    def __init__(self, feature_dim):
        super(Attention, self).__init__()

        self.attn_1 = nn.Linear(feature_dim, feature_dim)
        self.attn_2 = nn.Linear(feature_dim, 1)

        # inititalize
        nn.init.xavier_uniform_(self.attn_1.weight)
        nn.init.xavier_uniform_(self.attn_2.weight)
        self.attn_1.bias.data.fill_(0.0)
        self.attn_2.bias.data.fill_(0.0)

    def forward(self, x):
        """
        Input x is encoder output
        return_attention decides whether to return
        attention scores over the hidden states
        """
#         frame_length = x.shape[1]

        self_attention_scores = self.attn_2(torch.tanh(self.attn_1(x)))
        
        return self_attention_scores

In [56]:
class Resnt34LSTM(nn.Module):
    def __init__(self):
        super(Resnt34LSTM, self).__init__()
        num_classes = 2
        pretrained = True
        self.rnn_hidden_size = 60
        rnn_num_layers = 1
        
        baseModel = models.resnet34(pretrained=pretrained)  
        num_features = baseModel.fc.in_features
        baseModel.avgpool = nn.AdaptiveMaxPool2d(output_size=(1,1))
        baseModel.fc = Identity()
        self.baseModel = baseModel
        self.attention = Attention(self.rnn_hidden_size)
        self.rnn = nn.LSTM(num_features, self.rnn_hidden_size, rnn_num_layers)
        self.fc1 = nn.Linear(self.rnn_hidden_size*20, num_classes)

        
    def forward(self, x):        
        b_z, ts, c, h, w = x.shape
        
        
        ii = 0
        y = self.baseModel((x[:,ii]))
        output, (hn, cn) = self.rnn(y.unsqueeze(0))
        for ii in range(1, ts):
            y = self.baseModel((x[:,ii]))
            out, (hn, cn) = self.rnn(y.unsqueeze(0), (hn, cn))
            output = torch.cat((output.view(b_z,-1,self.rnn_hidden_size), out.view(b_z,-1,self.rnn_hidden_size)), dim=1)

        out_att_score = self.attention(output)
        out_att_score_sftmx = F.softmax(out_att_score, dim=1)
        out_agg_att = out_att_score_sftmx * output
        final_out = self.fc1(out_agg_att.view(b_z, -1))
        
        
        return final_out
        
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x  

In [57]:
model = Resnt34LSTM()  
model = model.to(device)

# criterion / loss function
criterion = nn.CrossEntropyLoss()

checkpoint = torch.load("D:/0_Graduation Thesis/models/weights/Resnet34_LSTM60_MAXpool_WSOS_allattendedHS_20220604_best_accuracy.pt")
model.load_state_dict(checkpoint)
model.eval()

Resnt34LSTM(
  (baseModel): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [18]:
def metrics_batch(output, target):
    pred = output.argmax(dim=1, keepdim=True)
    pred_label = output.argmax(dim=1)
    prob = F.softmax(output, dim=1)[:, 1]
    corrects=pred.eq(target.view_as(pred)).sum().item()
    return corrects, prob, pred_label

def loss_batch(loss_func, output, target, opt=None):
    loss = loss_func(output, target)
    with torch.no_grad():
        metric_b, prob_b, pred_label_b = metrics_batch(output,target)
    if opt is not None:
        opt.zero_grad()
        loss.backward()
        opt.step()
    return loss.item(), metric_b, prob_b, pred_label_b

In [58]:
import torch.nn.functional as F
with torch.no_grad():
    running_loss=0.0
    running_acc=0.0
    len_data = len(robust_test_loader.dataset)

    true_label= np.empty(1)
    predict_prob= np.empty(1)
    pred_label= np.empty(1)

    for xb, yb in robust_test_loader:
        true_label = np.append(true_label, yb.numpy())

        xb=xb.to(device)
        yb=yb.to(device)
        output=model(xb)
        loss_b,acc_b,prob_b,pred_b=loss_batch(criterion, output, yb)
        running_loss+=loss_b

        predict_prob = np.append(predict_prob, prob_b.cpu().numpy())
        pred_label = np.append(pred_label, pred_b.cpu().numpy())

        if acc_b is not None:
            running_acc+=acc_b

    loss=running_loss/float(len_data)
    acc=running_acc/float(len_data)

    true_label = true_label[1:]
    predict_prob = predict_prob[1:]
    pred_label = pred_label[1:]

In [59]:
label_bool = true_label == pred_label

In [60]:
pd.set_option('display.max_colwidth', 100)
# wrong_df = robust_df[label_bool]
correct_df = robust_df[label_bool]

<br>
<br>
<br>