In [None]:
# default_exp core-sequence

# Actions in video

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import sys
import os
import pathlib
import time
import datetime
from tqdm.notebook import tqdm

import torch
import torchvision.transforms as transforms
import torch.nn as nn
from torch.autograd import Variable

from actions_in_videos.dataset_ucf101 import UCF101, SequenceDataset, DataLoader, ARSequenceDataset
from actions_in_videos.models import ResNet50Classifier, ConvLSTM
from actions_in_videos.dataset_ucf101 import SequenceBatchShower

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline

In [None]:
print('torch.__version__', torch.__version__)

torch.__version__ 1.4.0


Create a directory to save the models if it does not exists:

In [None]:
save_dir = pathlib.Path('model-checkpoints')
save_dir.mkdir(parents=True, exist_ok=True)

Define model and run options:

In [None]:
opt = {
    'channels':3,
    'img_dim':224,
    'sequence-length':16,
    'latent-dim': 512,
    'sample-interval': 1,
    'dataset-path': '../data/UCF101/',
    'batch-size': 32,
    'learning-rate': 0.0001,
    'num-epochs': 20,
    'checkpoint_interval': 2,
    'checkpoint-model': False,
    'smaller-dataset':False
      }

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

image_shape = (opt['channels'], opt['img_dim'], opt['img_dim'])

# Normalization parameters for pre-trained PyTorch models
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

## Create the Training set and its associated Dataloader:

In [None]:
def sequence_collate(batch):
    try:
        len_batch = len(batch) # original batch length
        batch = list(filter (lambda x:x is not None, batch)) # filter out all the Nones
        batch = list(filter (lambda x: len(x[0])>0, batch))  # filter out all the empty ones.

#         if len_batch > len(batch):
#             print('dataset_ucf101:sequence_collate. FILTER!')
#             # source all the required samples from the original dataset at random
#             diff = len_batch - len(batch)
#             for i in range(diff):
#                 batch.append(dataset[np.random.randint(0, len(dataset))])
        
        batch = torch.utils.data.dataloader.default_collate(batch)
    except Exception as e:    
        import pdb
        pdb.set_trace()
        print(e)

    return batch

In [None]:
# # Define training set
# train_ds = SequenceDataset(
#     dataset_path=opt['dataset-path'],
#     sequence_length=opt['sequence-length'],
#     sample_interval=opt['sample-interval'],
#     training=True,
#     transform=transforms.Compose(
#     [
#         transforms.ToPILImage(),
#         transforms.RandomHorizontalFlip(p=0.5),
#         transforms.RandomCrop((224, 224)),
#         transforms.Resize((opt['img_dim'], opt['img_dim']), Image.BICUBIC), 
#         transforms.ToTensor(), 
#         transforms.Normalize(mean, std),
#     ])
# )

# train_dl = DataLoader(train_ds, batch_size=opt['batch-size'], shuffle=True, num_workers=4)

# print(f"Num of classes: {train_ds.num_classes}")

In [None]:
train_ds = ARSequenceDataset(dataset_path='../data/UCF101/', 
                           sequence_length=16, 
                           training=True, 
                           transform=None, 
                           verbose=True, 
                           smaller_dataset=opt['smaller-dataset'])

print(f"Num of classes: {train_ds.num_classes}")

train_dl = DataLoader(dataset=train_ds, batch_size=opt['batch-size'], collate_fn=sequence_collate, shuffle=True, num_workers=0)

Number of training files: 9537
Num of classes: 101


Let's make sure that we are loading the correct stuff:

In [None]:
# dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 
# for i, data in enumerate(train_dl):             
#     data[0] = data[0].type(dtype)     

In [None]:
# sb = SequenceBatchShower(train_dl, fig_size=10, max_batch_len=4, max_seq_len=6)
# print(sb)

# sb.showBatch(3, scale=255, permuteToImg=(1,2,3,0))

# Define test set

In [None]:
# test_ds = SequenceDataset(
#     dataset_path=opt['dataset-path'],
#     sequence_length=opt['sequence-length'],
#     sample_interval=opt['sample-interval'],
#     training=False,
#     transform=transforms.Compose(
#     [
#         transforms.ToPILImage(),
#         transforms.Resize((opt['img_dim'], opt['img_dim']), Image.BICUBIC), 
#         transforms.ToTensor(), 
#         transforms.Normalize(mean, std),
#     ])
# )


# test_dl = DataLoader(test_ds, batch_size=opt['batch-size'], shuffle=False, num_workers=4)

In [None]:
test_ds = ARSequenceDataset(dataset_path='../data/UCF101/', 
                           sequence_length=16, 
                           training=False, 
                           transform=None, 
                           verbose=True, 
                           smaller_dataset=opt['smaller-dataset'])

print(f"Num of classes: {test_ds.num_classes}")

Number of validation files: 3783
Num of classes: 101


In [None]:
test_dl = DataLoader(test_ds, batch_size=opt['batch-size'], collate_fn=sequence_collate, shuffle=False, num_workers=0)

We can see what it contains. Note that in this case, there is no randomness so most likely all frames will be from the same video.

In [None]:
# sb = SequenceBatchShower(test_dl, fig_size=10, max_batch_len=1, max_seq_len=16)
# print(sb)

# sb.showBatch(2, scale=255, permuteToImg=(1,2,3,0))

## Define the model

In [None]:
# Classification criterion
criterion = nn.CrossEntropyLoss().to(device)

# Define network
model = ConvLSTM(
    num_classes=train_ds.num_classes,
    latent_dim=opt['latent-dim'],
    lstm_layers=1,
    hidden_dim=1024,
    bidirectional=True,
    attention=True,
)
model = model.to(device)
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor 

Freeze the parameters of the parts we do not want to train and set to true those of the last few layers that we are interested in updating.

The dataset is not large enough to warrant training a full ResNet-50 model. 

To start, we will just fine-tune the output layer and the last BottleNeck block (or residual block). `model.layer4` contains three residual blocks implying model.layer4[2] is the last of these three residual blocks. Fine-tuning only the top layers also reduces the amount of GPU memory, meaning a higher batch size can be used and the model can be trained more quickly with less overfitting.

In [None]:
#model

In [None]:
for param in model.parameters():
    param.requires_grad_(False)
    
params = []
for param in model.lstm.parameters():
    param.requires_grad_(True)
    params.append(param)
for param in model.output_layers.parameters():
    param.requires_grad_(True)
    params.append(param)
for param in model.attention_layer.parameters():
    param.requires_grad_(True)
    params.append(param)

In [None]:
optimizer = torch.optim.Adam(params, lr=opt['learning-rate'])

In [None]:
if opt['checkpoint-model']:
    print(f"Loading checkpoint model: opt['checkpoint-model']")
    model.load_state_dict(torch.load(opt['checkpoint-model']))

## Training

We have now everything we need to build the training loop. This is done in the next two functions, the first one is to test the model on the validation set and the second one is the actual training loop.

In [None]:
def test_model(model,epoch):
    """ Evaluate the model on the test set """
    print("")
    model.eval()
    test_metrics = {"loss": [], "acc": []}
        
    for batch_i, (X, y) in enumerate(tqdm(test_dl, 'Test', leave=False)):
        
        X = X.permute(0,2,1,3,4)   
            
        image_sequences = Variable(X.to(device), requires_grad=True).type(dtype) 
#        image_sequences = Variable(X.to(device), requires_grad=True).float()
        labels = Variable(y.to(device), requires_grad=False)
            
        labels = labels.squeeze()
        if len(labels.size()) == 0:
            labels = torch.tensor([labels]).to(device)
        
        with torch.no_grad():
            # Reset LSTM hidden state
            model.lstm.reset_hidden_state()
            # Get sequence predictions
            preds = model(image_sequences)
                    
        # Compute metrics
        acc = 100 * (preds.detach().argmax(1) == labels).cpu().numpy().mean()
        loss = criterion(preds, labels).item()
        
        # Keep track of loss and accuracy
        test_metrics["loss"].append(loss)
        test_metrics["acc"].append(acc)
        
        # Log test performance
        sys.stdout.write(
            "\r    Testing | Batch %d/%d | Loss: %f (%f), Acc: %.2f%% (%.2f%%) | "
            % (
                batch_i,
                len(test_dl),
                np.mean(test_metrics["loss"]),
                loss,
                np.mean(test_metrics["acc"]),
                acc,                
            )
        )    
    print("")

In [None]:
def train_model(model, opt, train_dl, test_dl):

    assert 'checkpoint-model' in opt.keys(), "Specify path to checkpoint model or set it to False"
    
    for epoch in tqdm(range(opt['num-epochs']), desc='Epoch #'):
        epoch_metrics = {"loss": [], "acc": []}
        prev_time = time.time()

        #print(f"--- Epoch {epoch} ---")
        for batch_i, (X, y) in enumerate(tqdm(train_dl, 'Train', leave=False)):
            model.train()
            
            #print(X.shape) # [32, 3, 16, 224, 224]
            
            # dataload outputs size: [batch_size, c, seq_len, h, w]
            # we need the input x to be size [batch_size, seq_length, c, h, w ]. 
            X = X.permute(0,2,1,3,4)   
            
            #print(X.shape) # [32, 16, 3, 224, 224]
            
            if X.size(0) == 1:
                continue    
                
                
            image_sequences = Variable(X.to(device), requires_grad=True).type(dtype) 
            labels = Variable(y.to(device), requires_grad=False)
            
            labels = labels.squeeze()
            if len(labels.size()) == 0:
                labels = torch.tensor([labels]).to(device)

            optimizer.zero_grad()

            # Reset LSTM hidden state
            model.lstm.reset_hidden_state()

            # forward
            with torch.set_grad_enabled(True):
                # Get sequence predictions
                preds = model(image_sequences)

                # Compute metrics   
                loss = criterion(preds, labels)
                acc = 100 * (preds.detach().argmax(1) == labels).cpu().numpy().mean()

                loss.backward()
                optimizer.step()

            # Keep track of epoch metrics
            epoch_metrics["loss"].append(loss.item())
            epoch_metrics["acc"].append(acc)

            # Determine approximate time left
            batches_done = epoch * len(train_dl) + batch_i
            batches_left = opt['num-epochs'] * len(train_dl) - batches_done
            time_left = datetime.timedelta(seconds=batches_left * (time.time() - prev_time))
            prev_time = time.time()

            # Print log
            sys.stdout.write(
                "\rEpoch %d/%d | Batch %d/%d | Loss: %f (%f), Acc: %.2f%% (%.2f%%) | ETA: %s"
                % (
                    epoch,
                    opt['num-epochs'],
                    batch_i,
                    len(train_dl),
                    np.mean(epoch_metrics["loss"]),
                    loss.item(),
                    np.mean(epoch_metrics["acc"]),                    
                    acc,
                    time_left,
                )
            )

#             Empty cache
            if torch.cuda.is_available():
                 torch.cuda.empty_cache()

        # Evaluate the model on the test set
        test_model(model, epoch)

        model.train()
        
        # Save model checkpoint
        if epoch % opt['checkpoint_interval'] == 0:
            os.makedirs('model-checkpoints', exist_ok=True)
            torch.save(model.state_dict(), f"model-checkpoints/{model.__class__.__name__}_{epoch}.pth")
            
    # Print log
    print("-------------------------------------------------------")
    sys.stdout.write(
        "\rEpoch %d/%d | Batch %d/%d | Loss: %f (%f), Acc: %.2f%% (%.2f%%) | ETA: %s"
        % (
            epoch+1,
            opt['num-epochs'],
            batch_i,
            len(train_dl),
            np.mean(epoch_metrics["loss"]),
            loss.item(),            
            np.mean(epoch_metrics["acc"]),
            acc,
            time_left,
        )
    )
    
    # save last model
    os.makedirs('model-checkpoints', exist_ok=True)
    random_int = int(np.random.uniform(0,1e14))
    torch.save(model.state_dict(), f"model-checkpoints/{model.__class__.__name__}_{epoch}_{random_int}.pth")
    print("")
    print(f"Model saved as: model-checkpoints/{model.__class__.__name__}_{epoch}_{random_int}.pth")

In [None]:
opt['num-epochs'] = 3

In [None]:
train_model(model, opt, train_dl, test_dl)

HBox(children=(FloatProgress(value=0.0, description='Epoch #', max=3.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 0/3 | Batch 297/299 | Loss: 2.191058 (1.491686), Acc: 60.56% (71.88%) | ETA: 0:21:47.723522


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 3.143807 (3.962840), Acc: 61.49% (14.29%) |  


HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 1/3 | Batch 297/299 | Loss: 0.864903 (0.485932), Acc: 85.84% (96.88%) | ETA: 0:12:48.3221668


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.468541 (2.859514), Acc: 64.72% (14.29%) | 


HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 2/3 | Batch 297/299 | Loss: 0.492307 (0.561026), Acc: 92.19% (84.38%) | ETA: 0:00:04.0418409


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.309576 (3.297924), Acc: 66.31% (0.00%) |   

-------------------------------------------------------
Epoch 3/3 | Batch 298/299 | Loss: 0.492307 (0.561026), Acc: 92.19% (84.38%) | ETA: 0:00:04.041840
Model saved as: model-checkpoints/ConvLSTM_2_82127395089564.pth


In [None]:
torch.save(model.state_dict(), f"model-checkpoints/{model.__class__.__name__}_last-layers.pth")
print(f"Model saved as: model-checkpoints/{model.__class__.__name__}_last-layers.pth")

Model saved as: model-checkpoints/ConvLSTM_last-layers.pth


Unfreeze and train again:

In [None]:
for param in model.parameters():
    param.requires_grad_(True)    

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=opt['learning-rate'])

In [None]:
opt['num-epochs'] = 4
train_model(model, opt, train_dl, test_dl)

HBox(children=(FloatProgress(value=0.0, description='Epoch #', max=4.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 0/4 | Batch 297/299 | Loss: 0.413146 (0.222269), Acc: 91.52% (93.75%) | ETA: 0:31:06.1319315


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.563200 (3.480815), Acc: 59.28% (14.29%) |  


HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 1/4 | Batch 297/299 | Loss: 0.278821 (0.605692), Acc: 93.93% (90.62%) | ETA: 0:19:57.2000609


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.384608 (1.496032), Acc: 63.14% (57.14%) |  


HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 2/4 | Batch 297/299 | Loss: 0.193620 (0.222469), Acc: 96.12% (90.62%) | ETA: 0:10:44.6329074


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.449765 (3.072552), Acc: 63.39% (0.00%) |   


HBox(children=(FloatProgress(value=0.0, description='Train', max=299.0, style=ProgressStyle(description_width=…

Epoch 3/4 | Batch 24/299 | Loss: 0.138903 (0.063817), Acc: 96.62% (96.88%) | ETA: 0:09:40.1242838
Exception: ../data/UCF101/UCF-101/Basketball/v_Basketball_g16_c04.avi
Epoch 3/4 | Batch 297/299 | Loss: 0.169123 (0.156699), Acc: 96.12% (93.75%) | ETA: 0:00:04.0211294


HBox(children=(FloatProgress(value=0.0, description='Test', max=119.0, style=ProgressStyle(description_width='…

    Testing | Batch 118/119 | Loss: 1.361179 (1.363994), Acc: 64.91% (42.86%) | 

-------------------------------------------------------
Epoch 4/4 | Batch 298/299 | Loss: 0.169123 (0.156699), Acc: 96.12% (93.75%) | ETA: 0:00:04.021129
Model saved as: model-checkpoints/ConvLSTM_3_81376913231468.pth


In [None]:
from nbdev.export import *
notebook2script()