In [0]:
import numpy as np
import cv2

import os
import time
import copy

from PIL import Image

import torch
from torch import optim, nn
from torch.autograd import Variable
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
def create_pathlist(root='/content/drive/My Drive/data'):
    pathlist = {'train' : [],
                'val' : [],
                'test' : []}

    for r, d, f in os.walk(root):
        for path in f:
            if '.avi' not in path:
                continue
            group = int(path.split('_')[-2])
            full_path = os.path.join(r, path)

            if group <= 20:
                split = 'train'
            elif group <= 22:
                split = 'val'
            else:
                split = 'test'

            pathlist[split].append(full_path)
    
    return pathlist

In [0]:
transform = {'train' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                           transforms.RandomHorizontalFlip(),
                                           transforms.RandomRotation(24),
                                           transforms.CenterCrop(144),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
             
             'val' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                         transforms.CenterCrop(144),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
             
             'test' : transforms.Compose([transforms.RandomResizedCrop(196, scale=[0.8, 1.0]),
                                          transforms.CenterCrop(144),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

In [0]:
class VideoFolder(Dataset):
    def __init__(self, root, split, transform=None):
        self.pathlist = create_pathlist(root)[split]
        self.split = split
        self.transform = transform
        self.batch_size = 4
        self.num = 0
        self.classes = {'shooting' : 0,
                        'biking' : 1,
                        'diving' : 2,
                        'golf' : 3,
                        'riding' : 4,
                        'juggle' : 5,
                        'swing' : 6,
                        'tennis' : 7,
                        'jumping' : 8,
                        'spiking' : 9,
                        'walk' : 10}
        
    
    def __len__(self):
        return len(self.pathlist)
    

    def video_read(self, j):
        path = self.pathlist[j]
        label = self.classes[path.split('/')[-1].split('_')[1]]

        capture = cv2.VideoCapture(path)

        num_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_list = np.random.choice(num_frames, self.batch_size, replace=False)

        image_frames = []

        for i in range(num_frames):
            if i not in frame_list:
                continue

            running, frame = capture.read()
            if not running:
                break
                
            image = Image.fromarray(frame)
            image = self.transform(image)

            image_frames.append(image)

        return image_frames, label
    
    
    def __getitem__(self, index):
        image_frames, label = self.video_read(index)
        
        images = torch.stack([frame for frame in image_frames])
        label = torch.LongTensor(label)
        
        return (images, label)
    
    
    def __iter__(self):
        return self
    
    
    def __next__(self):
        try:
            num = self.num
            self.num += 1
            return self[num]
        except:
            self.num = 0
            raise StopIteration

In [0]:
datasets = {x : VideoFolder('/content/drive/My Drive/data', x, transform[x]) for x in ['train', 'val', 'test']}

In [0]:
gpu = torch.cuda.is_available()

In [0]:
model = models.resnet50(pretrained=True)

torch.cuda.empty_cache()

num_ft = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(num_ft, 512),
                         nn.ReLU(),
                         nn.Dropout(),
                         nn.Linear(512, 512),
                         nn.ReLU(),
                         nn.Dropout(),
                         nn.Linear(512, 11))

model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.01)

In [0]:
def train(model, optimizer, criterion, num_epochs=10, lr=0.01):
    start = time.time()
    best_model_state = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        for images, label in datasets['train']:
            images = Variable(images.cuda())
            label = Variable(label.cuda())

            optimizer.zero_grad()

            outputs = model(images)

            outputs = torch.mean(outputs)

            loss = criterion(outputs, label)
            loss.backward()

            optimizer.step()
        print("Train:               Epoch: {}, Loss: {}".format(epoch+1, loss.item()))

        correct = 0

        for images, label in datasets['val']:
            images = Variable(images.cuda())
            labels = Variable(label.cuda())

            with torch.set_grad_enabled(False):
                outputs = model(images)
                outputs = torch.mean(outputs)
                loss = criterion(outputs, label)
            
            _, predicted = torch.max(outputs.data, 1)
            correct += torch.sum(predicted.cpu() == labels.cpu)
        
        accuracy = correct.item() / len(datasets['val'])
        print("Validation:          Epoch: {}, Loss: {:.4f}, Accuracy: {:.4f}".format(epoch+1, loss.item(), accuracy))

        if accuracy > best_acc:
            best_model_state = model.state_dict
            best_acc = accuracy

    stop = time.time()

    print("Time taken: {:.4f}".format(stop-start))
    print("Best accuracy: {:.4f} %".format(100 * best_acc))
    
    model = model.load_state_dict(best_model_state)
    return model

In [0]:
model = train(model, optimizer, criterion)

IndexError: ignored