In [1]:
import numpy as np
import cv2

import os
import time
import copy

from PIL import Image

import torch
from torch import optim, nn
from torch.autograd import Variable
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader

In [2]:
def create_pathlist(root='/content/drive/My Drive/data'):
    pathlist = {'train' : [],
                'val' : [],
                'test' : []}

    for r, d, f in os.walk(root):
        for path in f:
            if '.avi' not in path:
                continue
            group = int(path.split('_')[-2])
            full_path = os.path.join(r, path)

            if group <= 20:
                split = 'train'
            elif group <= 22:
                split = 'val'
            else:
                split = 'test'

            pathlist[split].append(full_path)
    
    return pathlist

In [3]:
pathlist = create_pathlist()

In [4]:
transform = {'train' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                           transforms.RandomHorizontalFlip(),
                                           transforms.RandomRotation(24),
                                           transforms.CenterCrop(144),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
             
             'val' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                         transforms.CenterCrop(144),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
             
             'test' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                          transforms.CenterCrop(144),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

In [5]:
class VideoFolder(Dataset):
    def __init__(self, root, split, transform=None):
        self.pathlist = create_pathlist(root)[split]
        self.split = split
        self.transform = transform
        self.batch_size = 16
        self.num = 0
        self.classes = {'shooting' : 0,
                        'biking' : 1,
                        'diving' : 2,
                        'golf' : 3,
                        'riding' : 4,
                        'juggle' : 5,
                        'swing' : 6,
                        'tennis' : 7,
                        'jumping' : 8,
                        'spiking' : 9,
                        'walk' : 10}
        
    
    def __len__(self):
        return len(self.pathlist)
    

    def video_read(self, j):
        path = self.pathlist[j]

        label = self.classes[path.split('/')[-1].split('_')[1]]

        capture = cv2.VideoCapture(path)

        num_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))

        frame_list = np.random.choice(num_frames, self.batch_size, replace=False)

        image_frames = []
        labels = [label for _ in range(self.batch_size)]

        for i in range(num_frames):
            if i not in frame_list:
                continue

            running, frame = capture.read()
            if not running:
                break
                
            image = Image.fromarray(frame)
            image = self.transform(image)

            image_frames.append(image)

        return image_frames, label
    
    
    def __getitem__(self, index):
        image_frames, label = self.video_read(index)
        
        images = torch.stack([frame for frame in image_frames])
        labels = torch.stack([torch.LongTensor([label for _ in range(self.batch_size)])]).reshape((-1))
        
        return (images, labels)
    
    
    def __iter__(self):
        return self
    
    
    def __next__(self):
        try:
            num = self.num
            self.num += 1
            return self[num]
        except:
            self.num = 0
            raise StopIteration

In [6]:
datasets = {x : VideoFolder('/content/drive/My Drive/data', x, transform[x]) for x in ['train', 'val', 'test']}

In [7]:
gpu = torch.cuda.is_available()

In [8]:
model = models.vgg16(pretrained=True)

torch.cuda.empty_cache()

model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 11)
        )

model.cuda()

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /tmp/.cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [00:04<00:00, 111792245.14it/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.01)

In [10]:
def train(model, optimizer, criterion, num_epochs=10, lr=0.01):
    start = time.time()
    best_model_state = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        for images, labels in datasets['train']:
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

            optimizer.zero_grad()

            outputs = model(images)

            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
        print("Train:               Epoch: {}, Loss: {}".format(epoch+1, loss.item()))

        correct = 0

        for images, labels in datasets['val']:
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

            with torch.set_grad_enabled(False):
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            _, predicted = torch.max(outputs.data, 1) 
            correct += torch.sum(predicted.cpu() == labels.cpu()).item()
        
        accuracy = correct / len(datasets['val'])
        accuracy /= 16
        print("Validation:          Epoch: {}, Loss: {}, Accuracy: {}".format(epoch+1, str(loss.item()), str(accuracy)))

        if accuracy > best_acc:
            best_model_state = copy.deepcopy(model.state_dict)
            best_acc = accuracy

    stop = time.time()

    print("Time taken: {:.4f}".format(stop-start))
    print("Best accuracy: {:.4f} %".format(100 * best_acc))
    
    model = model.load_state_dict(best_model_state)
    return model

In [11]:
model = train(model, optimizer, criterion)

Train:               Epoch: 1, Loss: 0.001557469367980957
Validation:          Epoch: 1, Loss: 11.155577659606934, Accuracy: 0.10687022900763359
Train:               Epoch: 2, Loss: 0.0005014538764953613
Validation:          Epoch: 2, Loss: 10.972640991210938, Accuracy: 0.10687022900763359
Train:               Epoch: 3, Loss: 5.97834587097168e-05
Validation:          Epoch: 3, Loss: 12.392721176147461, Accuracy: 0.10687022900763359
Train:               Epoch: 4, Loss: 0.00010114908218383789
Validation:          Epoch: 4, Loss: 12.91894245147705, Accuracy: 0.10782442748091603
Train:               Epoch: 5, Loss: 1.7523765563964844e-05
Validation:          Epoch: 5, Loss: 13.316166877746582, Accuracy: 0.11211832061068702
Train:               Epoch: 6, Loss: 3.8743019104003906e-05
Validation:          Epoch: 6, Loss: 12.719279289245605, Accuracy: 0.125
Train:               Epoch: 7, Loss: 0.0
Validation:          Epoch: 7, Loss: 19.84551429748535, Accuracy: 0.10687022900763359
Train:     

AttributeError: 'function' object has no attribute 'copy'