In [1]:
import torch
import torchvision.models as models
import h5py 
from logger import Logger
from torchvision.transforms import transforms 
import torch.utils.data as data
import numpy as np 
import pdb
import matplotlib.pyplot as plt
import torch.nn as nn 
import torch.optim as optim 
from torch.autograd import Variable
import shutil
import os 
import random
import torch.nn.functional as F

class FrameDataset(data.Dataset):
    
    def __init__(self, f, transform=None, test = False):
        self.f = f 
        self.transform = transform 
        self.test = test
        
    def __getitem__(self, index):
        rgb = np.array(self.f["rgb"][index])
        label = np.array(self.f["labels"][index], dtype=np.uint8)
        
        t_rgb = torch.zeros(rgb.shape[0], 3, 224, 224)
        
        prob = random.uniform(0, 1)
        prob2 = random.uniform(0, 1)

        if self.transform is not None:
            for i in range(rgb.shape[0]):
                if (prob > 0.5 and not self.test):
                    flip_transform = transforms.Compose([transforms.ToPILImage(), transforms.RandomHorizontalFlip(1.0)])
                    rgb[i,:,:,:] = flip_transform(rgb[i,:,:,:])
                if (prob2 > 0.5 and not self.test):
                	color_jitter_transform = transforms.Compose([transforms.ToPILImage() ,transforms.ColorJitter(brightness = 0.5, contrast = 0.5, saturation = 0.5, hue = 0.2)])
                	rgb[i,:,:,:] = color_jitter_transform(rgb[i,:,:,:])
                t_rgb[i,:,:,:] = self.transform(rgb[i,:,:,:])

                
        return t_rgb, label
    
    def __len__(self):
        return len(self.f["rgb"])

def load_vgg_voc_weights(MODEL_PATH):
    checkpoint_dict = torch.load(MODEL_PATH)
    vgg_model.load_state_dict(checkpoint_dict)

vgg_model = models.vgg16(pretrained=True)
num_final_in = vgg_model.classifier[-1].in_features
NUM_CLASSES = 20 ## in VOC
vgg_model.classifier[-1] = nn.Linear(num_final_in, NUM_CLASSES)
model_path = '/home/aashi/the_conclusion/model_files/' + 'vgg_on_voc' + str(800)
load_vgg_voc_weights(model_path)

class VGGNet(nn.Module):
    
    def __init__(self):
        super(VGGNet, self).__init__()
        self.rgb_net = self.get_vgg_features()
        
        kernel_size = 3 
        padding = int((kernel_size - 1)/2)
        self.conv_layer = nn.Conv2d(512, 16, kernel_size, 1, padding, bias=True)
        self.conv_bn = nn.BatchNorm2d(16)
        ## input_channels, output_channels, kernel_size, stride, padding, bias
        self.feature_size = 16*7*7*4
        self.final_layer = nn.Sequential(
        nn.Linear(self.feature_size, 256),
        nn.Linear(256, 4)  ## 4 classes instead of 2 
        #nn.Sigmoid()
        #nn.Softmax()  ## If loss function uses Softmax  
        )
        
    def forward(self, rgb): ## sequence of four images - last index is latest 
        four_imgs = []
        for i in range(rgb.shape[1]):
            img_features = self.rgb_net(rgb[:,i,:,:,:])
            channels_reduced = self.conv_bn(self.conv_layer(img_features))
            img_features = channels_reduced.view((-1, 16*7*7))
            four_imgs.append(img_features)
        concat_output = torch.cat(four_imgs, dim = 1)
        out = self.final_layer(concat_output)
        return out
#         return concat_output
        
    def get_vgg_features(self):

        ##vgg16 = load_vgg_voc_weights(vgg16, model_path)
        modules = list(vgg_model.children())[:-1]
        ## I can also freeze 
        ## high level layer, should I take a lower level?
        vgg16 = nn.Sequential(*modules)
        
        ## Uncommented this to let it fine-tune on my model 
        # for p in vgg16.parameters():
        #     p.requires_grad = False 
        
        return vgg16.type(torch.Tensor)

hfp_train = h5py.File('/mnt/hdd1/aashi/cmu_data/threeSecsTrain.h5', 'r')
hfp_test = h5py.File('/mnt/hdd1/aashi/cmu_data/threeSecsTest.h5', 'r')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
test_loader = data.DataLoader(FrameDataset(f = hfp_test, transform = transforms.Compose([transforms.ToTensor(), normalize]), test = True), 
                               batch_size=1)
batch_size = 1
train_loader = data.DataLoader(FrameDataset(f = hfp_train, transform = transforms.Compose([transforms.ToTensor(), normalize]),test = True),
                              batch_size=1)
model = VGGNet().cuda()

def save_model_weights(epoch_num):
    #model_file = '/home/aashi/the_conclusion/model_files/vgg_voc_flip_freeze_' + str(epoch_num).zfill(3)
    model_file = '/mnt/hdd1/aashi/3sW_' + str(epoch_num).zfill(3)
    torch.save(model.state_dict(), model_file)


def load_model_weights(epoch_num):
    #model_file = '/home/aashi/the_conclusion/model_files/vgg_voc_flip_freeze_' + str(epoch_num).zfill(3)
    model_file = '/mnt/hdd1/aashi/3sW_' + str(epoch_num).zfill(3)
    checkpoint_dict = torch.load(model_file)
    model.load_state_dict(checkpoint_dict)
    
load_model_weights(50)

model.eval()

  from ._conv import register_converters as _register_converters


VGGNet(
  (rgb_net): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace)
      (16): MaxPool2d(kernel_size=2, stride=2,

In [2]:
## Training Accuracy 
confusionMatrix1s = np.zeros((2,2))
confusionMatrix2s = np.zeros((2,2))
confusionMatrix3s = np.zeros((2,2))
confusionMatrixNone = np.zeros((2,2))
thresh = 0.5 
for iter, (rgb, label) in enumerate(train_loader, 0):

    rgb = rgb.float().cuda()

    label = label.squeeze(-1)
    outputs = model(rgb)
    outputs = F.sigmoid(outputs)

    label = label.int().numpy()
    outputs = outputs.detach().cpu().numpy()
    label = label[0]
    outputs = outputs[0]
    outputs = outputs > thresh
    
    confusionMatrix1s[int(outputs[0])][label[0]] += 1
    confusionMatrix2s[int(outputs[1])][label[1]] += 1
    confusionMatrix3s[int(outputs[2])][label[2]] += 1
    confusionMatrixNone[int(outputs[3])][label[3]] += 1

In [3]:
confusionMatrixNone

array([[17945.,   178.],
       [  100.,  3751.]])

In [4]:
###### F1 score for 0-1 sec ######
tp = confusionMatrix1s[1][1]
fp = confusionMatrix1s[1][0]
fn = confusionMatrix1s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 0-1 sec: ' + str(f1Score))

###### F1 score for 1-2 sec ######
tp = confusionMatrix2s[1][1]
fp = confusionMatrix2s[1][0]
fn = confusionMatrix2s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 1-2 sec: ' + str(f1Score))

###### F1 score for 2-3 sec ######
tp = confusionMatrix3s[1][1]
fp = confusionMatrix3s[1][0]
fn = confusionMatrix3s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 2-3 sec: ' + str(f1Score))

###### F1 score for None #########
tp = confusionMatrixNone[1][1]
fp = confusionMatrixNone[1][0]
fn = confusionMatrixNone[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for None: ' + str(f1Score))

F1 score for 0-1 sec: 0.8286627335299902
F1 score for 1-2 sec: 0.7649453065377766
F1 score for 2-3 sec: 0.9916045291355978
F1 score for None: 0.96426735218509


In [5]:
## Test Accuracy 
confusionMatrix1s = np.zeros((2,2))
confusionMatrix2s = np.zeros((2,2))
confusionMatrix3s = np.zeros((2,2))
confusionMatrixNone = np.zeros((2,2))
thresh = 0.4 
for iter, (rgb, label) in enumerate(test_loader, 0):

    rgb = rgb.float().cuda()

    label = label.squeeze(-1)
    outputs = model(rgb)

    outputs = F.sigmoid(outputs)

    label = label.int().numpy()
    
    outputs = outputs.detach().cpu().numpy()
    label = label[0]
    outputs = outputs[0]
    outputs = outputs > thresh
    
#     if (label[0] == 1):
#         pdb.set_trace()
    
    confusionMatrix1s[int(outputs[0])][label[0]] += 1
    confusionMatrix2s[int(outputs[1])][label[1]] += 1
    confusionMatrix3s[int(outputs[2])][label[2]] += 1
    confusionMatrixNone[int(outputs[3])][label[3]] += 1

In [6]:
confusionMatrix1s

array([[848.,  85.],
       [ 35., 180.]])

In [7]:
###### F1 score for 0-1 sec ######
tp = confusionMatrix1s[1][1]
fp = confusionMatrix1s[1][0]
fn = confusionMatrix1s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 0-1 sec: ' + str(f1Score))

###### F1 score for 1-2 sec ######
tp = confusionMatrix2s[1][1]
fp = confusionMatrix2s[1][0]
fn = confusionMatrix2s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 1-2 sec: ' + str(f1Score))

###### F1 score for 2-3 sec ######
tp = confusionMatrix3s[1][1]
fp = confusionMatrix3s[1][0]
fn = confusionMatrix3s[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for 2-3 sec: ' + str(f1Score))

###### F1 score for None #########
tp = confusionMatrixNone[1][1]
fp = confusionMatrixNone[1][0]
fn = confusionMatrixNone[0][1]
precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1Score = 2*precision*recall/(precision + recall)
print('F1 score for None: ' + str(f1Score))

F1 score for 0-1 sec: 0.7499999999999999
F1 score for 1-2 sec: 0.5958333333333333
F1 score for 2-3 sec: 0.9544159544159544
F1 score for None: 0.6431718061674009


In [8]:
confusionMatrix2s

array([[811., 126.],
       [ 68., 143.]])

In [9]:
confusionMatrix1s

array([[848.,  85.],
       [ 35., 180.]])

In [10]:
confusionMatrix3s

array([[  47.,   21.],
       [  75., 1005.]])

In [11]:
confusionMatrixNone


array([[994.,  49.],
       [ 32.,  73.]])

In [12]:
hfp_test.close()

In [13]:
hfp_train.close()