## Video Action Classification

In [1]:
import os
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_packed_sequence, pack_sequence
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
%load_ext autoreload
%autoreload 2

dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device : {}".format(dev))

Using device : cuda:0


Load dataset

In [2]:
# pickle_filename = 'data.pickle'

# if os.path.isfile(pickle_filename):
#     with open(pickle_filename, 'rb') as file:
#         train_data_feat, train_data_labels, test_data_feat = pickle.load(file)
# else:
#     from read_datasetBreakfast import load_data, read_mapping_dict

#     COMP_PATH = ''

#     ''' 
#     training to load train set
#     test to load test set
#     '''
#     train_split =  os.path.join(COMP_PATH, 'splits/train.split1.bundle') #Train Split
#     test_split  =  os.path.join(COMP_PATH, 'splits/test.split1.bundle') #Test Split
#     GT_folder   =  os.path.join(COMP_PATH, 'groundTruth/') #Ground Truth Labels for each training video 
#     DATA_folder =  os.path.join(COMP_PATH, 'data/') #Frame I3D features for all videos
#     mapping_loc =  os.path.join(COMP_PATH, 'splits/mapping_bf.txt') 

#     actions_dict = read_mapping_dict(mapping_loc)
#     train_data_feat, train_data_labels = load_data( train_split, actions_dict, GT_folder, DATA_folder, datatype = "training") #Get features and labels
#     test_data_feat = load_data( test_split, actions_dict, GT_folder, DATA_folder, datatype = "test") #Get features only
    
#     with open(pickle_filename, 'wb') as file:
#         pickle.dump((train_data_feat, train_data_labels, test_data_feat), file)

# print(len(train_data_feat))
# print(len(train_data_labels))
# print(len(test_data_feat))

```python
print(len(train_data_feat))
print(len(train_data_labels))
print(len(test_data_feat))
> 1460
> 1460
> 252
```

Flatten dataset

In [3]:
# with open("training_segment.txt", 'r') as file:
#     training_segment = [[int(a) for a in line.split()] for line in file]

# # tot_segment_len = 0
# # n_segment_len = 0
# # max_segment_len = 0
# # min_segment_len = float("inf")

# train_feat_flat = []
# train_labels_flat = []
# train_flat_idx = []
# for video_idx, video_segments in enumerate(tqdm(training_segment)):
#     for i in range(len(video_segments)-1):
#         segment_len = video_segments[i+1] - video_segments[i]
# #         tot_segment_len += segment_len
# #         n_segment_len += 1
# #         if segment_len > max_segment_len:
# #             max_segment_len = segment_len
# #         if segment_len < min_segment_len:
# #             min_segment_len = segment_len
#         train_flat_idx.append(len(train_feat_flat))
#         train_feat_flat.extend(train_data_feat[video_idx][video_segments[i]:video_segments[i+1]])
#         train_labels_flat.extend([(train_data_labels[video_idx][i])-1]*segment_len)
# train_flat_idx.append(len(train_feat_flat))

# # avg_segment_len = tot_segment_len / n_segment_len
# # print(avg_segment_len, max_segment_len, min_segment_len)

```python
print(avg_segment_len, max_segment_len, min_segment_len)
> 404.3518021201413 5791 8
```

Pickle (or load) flatten dataset

In [4]:
num_split = 10
load_split = 10

files_exist = []
for i in range(num_split):
    pickle_filename = "flatten_data_{}.pickle".format(i)
    files_exist.append(os.path.isfile(pickle_filename))

if not all(files_exist):
    max_len = len(train_feat_flat)
    split_len = max_len//num_split
    for i in tqdm(range(num_split)):
        pickle_filename = "flatten_data_{}.pickle".format(i)
        start_idx = split_len*i
        end_idx = split_len*(i+1) if i < num_split-1 else max_len
        print(i, start_idx, end_idx)
        with open(pickle_filename, 'wb') as file:
            train_feat_split_tensor = torch.stack(train_feat_flat[start_idx:end_idx]).to(dtype=torch.float32)
            train_labels_split_tensor = torch.LongTensor(train_labels_flat[start_idx:end_idx])
            pickle.dump((train_feat_split_tensor, train_labels_split_tensor), file)
    with open("flatten_data_idx.txt", 'w') as file:
        for i in train_flat_idx:
            file.write("{} ".format(i))

train_feat_split_list = []
train_labels_split_list = []
for i in tqdm(range(load_split)):
    pickle_filename = "flatten_data_{}.pickle".format(i)
    with open(pickle_filename, 'rb') as file:
        train_feat_split, train_labels_split = pickle.load(file)
        train_feat_split_list.append(train_feat_split)
        train_labels_split_list.append(train_labels_split)
with open("flatten_data_idx.txt", 'r') as file:
    train_flat_idx = [int(a) for line in file for a in line.split()]

train_feat_tensor = torch.cat(train_feat_split_list, dim=0)
train_labels_tensor = torch.cat(train_labels_split_list, dim=0)
print(train_feat_tensor.shape)
print(train_labels_tensor.shape)
print(len(train_flat_idx))
print(train_flat_idx[:10])

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


torch.Size([2860789, 400])
torch.Size([2860789])
7076
[0, 260, 465, 793, 1054, 1687, 5923, 7216, 7288, 7338]


Define model

In [5]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(400, 64, batch_first=True)
        self.linear = nn.Linear(64*3, 47)
        self.linear = nn.Linear(64, 47)
        self.padding_value = 0
    def forward(self, x):
        h, (h_last, c) = self.lstm(x)
        h_pad, lengths = pad_packed_sequence(h, batch_first=True, padding_value=self.padding_value)
        h_avg_pool = torch.sum(h_pad, 1) / torch.sum((h_pad != self.padding_value), 1)
        h_max_pool = torch.max(h_pad, 1)[0]
        x = torch.cat((h_pad[:,-1], h_avg_pool, h_max_pool), 1)
        x = h_max_pool
        x = self.linear(x)
        return x

model = Model()
for param in model.parameters():
    print(type(param.data), param.size())

<class 'torch.Tensor'> torch.Size([256, 400])
<class 'torch.Tensor'> torch.Size([256, 64])
<class 'torch.Tensor'> torch.Size([256])
<class 'torch.Tensor'> torch.Size([256])
<class 'torch.Tensor'> torch.Size([47, 64])
<class 'torch.Tensor'> torch.Size([47])


Define training routine

In [6]:
import time
import copy

import copy
def train_val(num_epoch, dataloader_dict, model, loss, optimizer, print_epoch=True, device=None):
    val_acc_history = []
    val_loss_history = []
    train_acc_history = []
    train_loss_history = []
    best_acc = 0.0
    best_model_weights = copy.deepcopy(model.state_dict())
    
    for epoch in range(num_epoch):
        since = time.time()
        if print_epoch:
            print("Epoch {}/{}".format(epoch+1, num_epoch))
            print("-----------")
        
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else :
                model.eval()
            total_loss = 0.0
            total_correct = 0.0
            for X, y, batch_len in dataloader_dict[phase]:
                if device is not None:
                    X = X.to(device)
                    y = y.to(device)
                with torch.set_grad_enabled(phase == "train"):
                    y_tilde = model(X)
                    L = loss(y_tilde, y)
                    if phase == "train":
                        optimizer.zero_grad()
                        L.backward()
                        optimizer.step()
                y_tilde_label = torch.argmax(y_tilde, dim=1)
                num_correct = torch.sum((y_tilde_label == y))
                
                total_loss += L.item() * batch_len
                total_correct += num_correct.item()
            
            epoch_loss = total_loss / len(dataloader_dict[phase].dataset)
            epoch_acc = (total_correct / len(dataloader_dict[phase].dataset)) * 100
            
            if phase == "val":
                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_weights = copy.deepcopy(model.state_dict())
            if phase == "train":
                train_loss_history.append(epoch_loss)
                train_acc_history.append(epoch_acc)

            if print_epoch:
                print("{} | Loss : {:.4f}  Acc : {:.2f}%".format(phase.capitalize().ljust(5), epoch_loss, epoch_acc))
        
        print("Time elapsed : {:.2f} s".format(time.time() - since))
        print()
    
    model.load_state_dict(best_model_weights)
    return model, (val_acc_history, val_loss_history, train_acc_history, train_loss_history)

Create dataset

In [7]:
from torch.utils.data import Dataset, DataLoader, Subset
from torch._utils import _accumulate

class MyDataset(Dataset):
    
    def __init__(self, features, labels, indices):
        self.features = features
        self.labels = labels
        self.indices = indices
        self.len = len(self.indices) - 1

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return (self.features[self.indices[idx]:self.indices[idx+1]], self.labels[self.indices[idx]])

my_dataset = MyDataset(train_feat_tensor, train_labels_tensor, train_flat_idx)

Split dataset

In [8]:
tot_len = len(my_dataset)
print("Number of training data: {}".format(tot_len))

train_split_size = 0.8
lengths = [int(tot_len*train_split_size), tot_len - int(tot_len*train_split_size)]

train_dataset, val_dataset = torch.utils.data.random_split(my_dataset, lengths)

Number of training data: 7075


Create DataLoader

In [9]:
def collate(batch_data):
    features = []
    labels = []
    for feature, label in batch_data:
        features.append(feature)
        labels.append(label)
    return (pack_sequence(features, enforce_sorted=False), torch.tensor(labels), len(batch_data))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True, collate_fn=collate)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, pin_memory=True, collate_fn=collate)

Start training

In [10]:
my_model = Model()
my_model.to(dev)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_model.parameters())

trained_model, acc = train_val(100, {"train": train_loader, "val": val_loader}, my_model, loss, optimizer, device=dev)

Epoch 1/100
-----------
Train | Loss : 3.4606  Acc : 12.58%
Val   | Loss : 3.2393  Acc : 21.84%
Time elapsed : 58.49 s

Epoch 2/100
-----------
Train | Loss : 3.0820  Acc : 23.53%
Val   | Loss : 2.9384  Acc : 26.01%
Time elapsed : 55.47 s

Epoch 3/100
-----------
Train | Loss : 2.7792  Acc : 28.23%
Val   | Loss : 2.7036  Acc : 30.88%
Time elapsed : 56.19 s

Epoch 4/100
-----------
Train | Loss : 2.5654  Acc : 31.57%
Val   | Loss : 2.5176  Acc : 34.56%
Time elapsed : 54.89 s

Epoch 5/100
-----------
Train | Loss : 2.3805  Acc : 36.54%
Val   | Loss : 2.3407  Acc : 36.54%
Time elapsed : 56.80 s

Epoch 6/100
-----------
Train | Loss : 2.2335  Acc : 38.82%
Val   | Loss : 2.2434  Acc : 39.29%
Time elapsed : 56.49 s

Epoch 7/100
-----------
Train | Loss : 2.1167  Acc : 42.17%
Val   | Loss : 2.1715  Acc : 41.27%
Time elapsed : 55.46 s

Epoch 8/100
-----------
Train | Loss : 2.0282  Acc : 44.72%
Val   | Loss : 2.0901  Acc : 41.91%
Time elapsed : 56.03 s

Epoch 9/100
-----------
Train | Loss : 1

Train | Loss : 0.4494  Acc : 88.85%
Val   | Loss : 1.6641  Acc : 51.31%
Time elapsed : 58.49 s

Epoch 70/100
-----------
Train | Loss : 0.4103  Acc : 90.48%
Val   | Loss : 1.6396  Acc : 52.93%
Time elapsed : 58.06 s

Epoch 71/100
-----------
Train | Loss : 0.4070  Acc : 90.46%
Val   | Loss : 1.6514  Acc : 52.72%
Time elapsed : 59.04 s

Epoch 72/100
-----------
Train | Loss : 0.4295  Acc : 89.10%
Val   | Loss : 1.6784  Acc : 51.24%
Time elapsed : 59.19 s

Epoch 73/100
-----------
Train | Loss : 0.4085  Acc : 89.96%
Val   | Loss : 1.6573  Acc : 52.65%
Time elapsed : 57.43 s

Epoch 74/100
-----------
Train | Loss : 0.3857  Acc : 91.20%
Val   | Loss : 1.6967  Acc : 51.24%
Time elapsed : 59.11 s

Epoch 75/100
-----------
Train | Loss : 0.3874  Acc : 90.30%
Val   | Loss : 1.6710  Acc : 53.07%
Time elapsed : 56.33 s

Epoch 76/100
-----------
Train | Loss : 0.3773  Acc : 91.33%
Val   | Loss : 1.6795  Acc : 53.00%
Time elapsed : 55.91 s

Epoch 77/100
-----------
Train | Loss : 0.4161  Acc : 88.

RuntimeError: CUDA error: unspecified launch failure