In [1]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn.functional as F
import torchvision.transforms.functional as F1
from torch import optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch import nn
from tqdm import tqdm
from torchvision.models import resnet18, resnet101

In [2]:
class customDataset(Dataset):
    def __init__(self, videoList, classInd, subsample, size=(240, 320)):
        with open(videoList) as f:
            self.videoList = f.read().splitlines()
        with open(classInd) as f:
            classList = f.read().splitlines()
            self.encodeClass = {x.split(" ")[1] : int(x.split(" ")[0]) - 1 for x in classList}
            self.decodeClass = {int(x.split(" ")[0]) -1 : x.split(" ")[1] for x in classList}
        self.subsample = subsample
        self.n = len(classList)
        self.size = size

    def __len__(self):
        return len(self.videoList)

    def __getitem__(self, idx):
        videoPath = self.videoList[idx].split(" ")[0]
        label = self.encodeClass[videoPath.split("/")[0]]
        video, _, _ = torchvision.io.read_video("./UCF-101/"+videoPath, pts_unit='sec', output_format="TCHW")
        video = video[np.linspace(0, len(video)-1, self.subsample, dtype="int")]
        if video.shape[2:] != self.size:
            video = F1.resize(video, size=self.size, antialias=False)
#         video = F1.rgb_to_grayscale(video).transpose(0,1)
        return video/255, label

In [3]:
trainingData = customDataset("trainlist01.txt", "classInd.txt", 8)
testData = customDataset("testlist01.txt", "classInd.txt", 8)

trainDataloader = DataLoader(trainingData, batch_size=32, shuffle=True, pin_memory=True)
validationDataloader = DataLoader(testData, batch_size=32, shuffle=False, pin_memory=True)

In [13]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        weights = torchvision.models.ResNet101_Weights.IMAGENET1K_V1
        self.resnet = resnet101(weights=weights)
        for parameter in self.resnet.parameters():
            parameter.requires_grad = False
        self.preprocess = weights.transforms()
        self.resnet.fc = nn.Sequential(nn.Linear(self.resnet.fc.in_features, 300))
        self.lstm = nn.LSTM(input_size=300, hidden_size=256, num_layers=3)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, trainingData.n)

    def forward(self, x_3d):
        hidden = None
        for t in range(x_3d.size(1)):
            x = x_3d[:, t, :, :]
            x = self.preprocess(x)
            x = self.resnet(x)
            out, hidden = self.lstm(x.unsqueeze(0), hidden)         
        x = self.fc1(out[-1, :, :])
        x = F.relu(x)
        x = self.fc2(x)
        return x
    
def accuracy(outputs, labels):
    with torch.no_grad():
        eq = outputs.argmax(1).to("cpu") == labels
        return (eq.sum() / eq.numel()).numpy()

In [14]:
net = Net().to("cuda")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

In [None]:
lossTrainHist = []
lossValidationHist = []
accuracyTrainHist = []
accuracyValidationHist = []
minLoss = float('inf')

for epoch in range(300):
    print("epoch",epoch+1)
    
    runningTrainLoss = 0.0
    runningValidationLoss = 0.0
    
    net.train()
    print("train")
    for i, data in enumerate(tqdm(trainDataloader), 0):
        inputs, labels = data
        optimizer.zero_grad()

        outputs = net(inputs.to("cuda"))
        
        lossTrain = criterion(outputs, labels.to("cuda"))
        lossTrain.backward()
        optimizer.step()
        
        runningTrainLoss += lossTrain.item()
        accuracyTrainHist.append(accuracy(outputs, labels))
        
    lossTrainHist.append(runningTrainLoss/(i+1))
    print(lossTrainHist[-1], accuracyTrainHist[-1])

    net.eval()
    print("eval")
    for i, data in enumerate(tqdm(validationDataloader), 0):
        inputs, labels = data
        with torch.no_grad():
            outputs = net(inputs.to("cuda"))

        lossValidation = criterion(outputs, labels.to("cuda"))
        
        runningValidationLoss += lossValidation.item()
        accuracyValidationHist.append(accuracy(outputs, labels))
        
    lossValidationHist.append(runningValidationLoss/(i+1))
    print(lossValidationHist[-1], accuracyValidationHist[-1])
    
    if minLoss>lossValidationHist[epoch]:
        minLoss = lossValidationHist[epoch]
        bestWeights1 = net.state_dict().copy()
        epochSave = epoch

epoch 1
train


100%|████████████████████████████████████████████████████████████████████████████████| 299/299 [40:29<00:00,  8.13s/it]


4.615953241303613 0.0
eval


100%|████████████████████████████████████████████████████████████████████████████████| 119/119 [11:11<00:00,  5.64s/it]


4.612613173092113 0.0
epoch 2
train


100%|████████████████████████████████████████████████████████████████████████████████| 299/299 [40:01<00:00,  8.03s/it]


4.611458781570894 0.0
eval


100%|████████████████████████████████████████████████████████████████████████████████| 119/119 [10:52<00:00,  5.48s/it]


4.60995415679547 0.0
epoch 3
train


100%|████████████████████████████████████████████████████████████████████████████████| 299/299 [39:43<00:00,  7.97s/it]


4.603698441814818 0.0
eval


100%|████████████████████████████████████████████████████████████████████████████████| 119/119 [10:39<00:00,  5.38s/it]


4.613977267962544 0.0
epoch 4
train


 33%|██████████████████████████▌                                                      | 98/299 [12:59<27:39,  8.26s/it]