In [None]:
# ! cp './drive/MyDrive/Colab Notebooks/UCF101.rar' .
# ! unrar x './UCF101.rar'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g06_c05.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g06_c06.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g06_c07.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c01.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c02.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c03.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c04.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c05.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c06.avi             62%  OK 
Extracting  UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c07.avi             62%  OK 
Extracting  UCF

# RNN Model

In [2]:
# ! pip install av

import torch
from torch import nn
from torch.nn.utils.rnn import pack_sequence
from torch.utils.data import DataLoader
import torchvision
from torchvision import io, models
from torchvision.datasets.folder import make_dataset
from torchvision.datasets.utils import list_dir

from sklearn.model_selection import train_test_split
import os
import re
import time
from tqdm import tqdm

## PyTorch Dataset

In [53]:
class UCF101(torchvision.datasets.VisionDataset):
    def __init__(self, root, annotation, transform=None):
        super(UCF101, self).__init__(root)

        extensions = ('avi',)
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
        self.classes = classes

        video_list = [x[0] for x in self.samples]
        self.indices = [i for i in range(len(video_list)) if video_list[i] in annotation]
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        file_ = self.samples[self.indices[idx]][0]
        video, audio, info = io.read_video(file_, pts_unit='sec')
        label = self.samples[self.indices[idx]][1] - 1
        video = video.transpose(2, 3).transpose(1, 2)

        if self.transform is not None:
            video = self.transform(video)

        return video, audio, label

## Set up Training/Testing Datasets

In [54]:
file_list = {}
unique_list = []
# class_list = ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress']
class_list = ['ApplyLipstick', 'Archery']

f_re = re.compile('v_([A-Za-z]+)_g([0-9]+)_c([0-9]+).avi')
# Shuffle train/test dataset
for _, dirs, _ in os.walk('./UCF-101'):
    for dir in dirs:
        for _, _, files in os.walk(f'./UCF-101/{dir}'):
            for file in files:
                if dir in class_list:
                    m = f_re.match(file)
                    if m.group(1) not in file_list:
                        file_list[m.group(1)] = {}
                    if m.group(2) not in file_list[m.group(1)]:
                        file_list[m.group(1)][m.group(2)] = []
                    file_list[m.group(1)][m.group(2)].append(m.group(3))
                    unique_list.append(f'{m.group(1)}_{m.group(2)}')

unique_list = list(set(unique_list))
train, test = train_test_split(unique_list, random_state=42)

train_sublists = [[f'./UCF-101/{file.split("_")[0]}/v_{file.split("_")[0]}_g{file.split("_")[1]}_c{c}.avi' for c in file_list[file.split("_")[0]][file.split("_")[1]]] for file in train]
train_list = [item for sublist in train_sublists for item in sublist]

test_sublists = [[f'./UCF-101/{file.split("_")[0]}/v_{file.split("_")[0]}_g{file.split("_")[1]}_c{c}.avi' for c in file_list[file.split("_")[0]][file.split("_")[1]]] for file in test]
test_list = [item for sublist in test_sublists for item in sublist]

train_dataset = UCF101('./UCF-101', train_list)
val_dataset = UCF101('./UCF-101', test_list)

## Define RNN Model

In [44]:
class RNNModel(nn.Module):
    def __init__(self, hidden_dim, num_classes, n_layers, drop_prob=0.2):
        super(RNNModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.conv = models.resnet18(pretrained=True)
        self.conv.fc = nn.Identity()
        for param in self.conv.parameters():
            param.requires_grad = False
        self.gru = nn.GRU(4096, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
        self.fc = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x, h=None):
        x = self.conv(x.squeeze(0))
        for frame in x:
            out, h = self.gru(frame.unsqueeze(0).unsqueeze(0), h)
        out = self.fc(out.squeeze(1))
        return out

## Define Evaluation Function

In [31]:
def evaluate(val_loader, model, criterion):
    correct_count = 0
    avg_loss = 0
    print('Evaluating...')
    loop = tqdm(total=len(val_loader), position=0, leave=True)
    for i, (video, audio, label) in enumerate(val_loader, 1):
        out = model(video.float())
        loss = criterion(out, label)
        avg_loss += loss.item()
        if torch.argmax(out) == label:
            correct_count += 1
        loop.update(1)
    loop.close()
    return avg_loss/len(val_loader), correct_count/len(val_loader)

## Define Training Function

In [55]:
def train(train_loader, val_loader, learn_rate, hidden_dim=256, EPOCHS=5, state_file=None):
    # Setting common hyperparameters
    input_dim = 240*320
    num_classes = 2
    n_layers = 2
    # Instantiating the model
    model = RNNModel(hidden_dim, num_classes, n_layers)
    # Load state_dict
    if state_file:
        if os.path.exists(state_file):
            model.load_state_dict(torch.load(state_file))
        else:
            print(f'WARNING: {state_file} does not exist. A new file will be created after first epoch.')
  
    # Defining loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)
    
    train_losses = []
    avg_train_losses = []
    val_losses = []

    model.train()
    epoch_times = []
    # Start training loop
    for epoch in range(1, EPOCHS+1):
        epoch_start = time.time()
        avg_loss = 0
        loop = tqdm(total=len(train_loader), position=0, leave=True)
        for i, (video, audio, label) in enumerate(train_loader, 1):
            model.zero_grad()
            out = model(video.float())
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()
            loss_item = loss.item()
            avg_loss += loss_item

            train_losses.append(loss_item)
            loop.update(1)
            loop.set_description(f"Epoch {epoch}, Step: {i}/{len(train_loader)}, Loss: {loss_item}, Average Loss for Epoch: {avg_loss/i}")
        loop.close()

        avg_train_losses.append(avg_loss/len(train_loader))
        print()
        print(f"Epoch {epoch}/{EPOCHS} Done, Total Loss: {avg_train_losses[-1]}")
        print(f"Total Train Time Elapsed: {(time.time() - epoch_start):.2f} seconds")
        
        val_start = time.time()
        val_loss, accuracy = evaluate(val_loader, model, criterion)
        val_losses.append(val_loss)
        print()
        print(f"Total Val Time Elapsed: {(time.time() - val_start):.2f} seconds")
        print(f"Validator Loss: {val_loss}, Accuracy: {accuracy}")
        
        epoch_times.append(time.time()-epoch_start)

        # Save state_dict
        if state_file:
            torch.save(model.state_dict(), state_file)
  
    print(f"Total Training Time: {str(sum(epoch_times))} seconds")
    return model, train_losses, avg_train_losses, val_losses

In [None]:
# Set up our data loaders
train_loader = DataLoader(train_dataset, shuffle=True)
val_loader = DataLoader(val_dataset, shuffle=False)
# Train our model
model, train_losses, avg_train_losses, val_losses = train(train_loader, val_loader, .001, state_file='binary_model_3.pth')

  0%|          | 0/187 [00:00<?, ?it/s]



Epoch 1, Step: 187/187, Loss: 0.004647287540137768, Average Loss for Epoch: 0.1910881095504068: 100%|██████████| 187/187 [18:31<00:00,  5.94s/it]   
  0%|          | 0/72 [00:00<?, ?it/s]


Epoch 1/5 Done, Total Loss: 0.1910881095504068
Total Train Time Elapsed: 1111.49 seconds
Evaluating...


100%|██████████| 72/72 [05:20<00:00,  4.45s/it]



Total Val Time Elapsed: 320.75 seconds
Validator Loss: 0.10989177556989994, Accuracy: 0.9444444444444444


Epoch 2, Step: 187/187, Loss: 0.009604908525943756, Average Loss for Epoch: 0.1278650614677416: 100%|██████████| 187/187 [18:17<00:00,  5.87s/it]   
  0%|          | 0/72 [00:00<?, ?it/s]


Epoch 2/5 Done, Total Loss: 0.1278650614677416
Total Train Time Elapsed: 1097.61 seconds
Evaluating...


 90%|█████████ | 65/72 [04:48<00:41,  5.88s/it]