In [None]:
!pip install opencv-python
!pip install torch
!pip install torchvision
!pip install pytorch-ignite
from torchvision import models
from torch import nn
import torch
import json
from os import listdir
from os.path import isfile, join
import torchvision
import cv2
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.rnn as rnn_utils
from ignite.metrics import Accuracy, Recall, Precision
from ignite.engine import create_supervised_trainer, create_supervised_evaluator
from ignite.engine.events import Events
import numpy as np
import re

In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
!mkdir models
!mkdir data

In [1]:
class RecursiveCNN(nn.Module):
    def __init__(self, num_classes):
        super(RecursiveCNN, self).__init__()
        self.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
        
        self.feature_extractor = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

        self.feature_extractor.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5))

        self.rnn = nn.LSTM(input_size=4096, hidden_size=1024, num_layers=1)

        self.output_fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 64),
            nn.Sigmoid(),
            nn.Linear(64, num_classes),
            nn.Softmax(dim=1) )
        
    def forward(self, x): #input should be a batch of videos of below shape
        b_z, length, colors, height, width = x.shape
        hidden_state = None
        for frame in range(length):#for every image in the video (all samples in the batch at once)
            with torch.no_grad():
                cnn_out = self.feature_extractor((x[:,frame]))
            lstm_out, hidden_state = self.rnn(cnn_out.unsqueeze(0), hidden_state) #rnn takes input of shape (sequence_length, batch_size, input_size)
            del cnn_out
        del hidden_state
        return self.output_fc(lstm_out.squeeze(0))


class HockeyDataset(Dataset):
    def __init__(self, device, max_frames, image_size, start_idx=0, end_idx=500):
        super(HockeyDataset, self).__init__()
        self.image_size = image_size
        self.max_frames = max_frames
        self.fight_folder = '/content/drive/MyDrive/hockey_data/dataset/fight/'
        self.nofight_folder = '/content/drive/MyDrive/hockey_data/dataset/no_fight/'
        self.device = device
        fight_filenames = [f for f in listdir(self.fight_folder) if isfile(join(self.fight_folder, f))]
        no_fight_filenames = [f for f in listdir(self.nofight_folder) if isfile(join(self.nofight_folder, f))]
        self.filenames = fight_filenames[start_idx:end_idx]
        self.filenames.extend(no_fight_filenames[start_idx:end_idx])
        self.num_classes = 2
        self.end_idx = end_idx
        self.start_idx = start_idx

    def preprocessSample(self, video):
        augmentations = torchvision.transforms.Compose([
            torchvision.transforms.Normalize([0.43216, 0.394666, 0.37645], [0.22803, 0.22145, 0.216989])
            ])
        return augmentations(video)
    def video_to_frames(self, video_path, size, max_frames):
        video = cv2.VideoCapture(video_path)
        if not video.isOpened():
            video.release()
            print('Could not open video. Check given path: ' + str(video_path))
            return None
        frames = []
        frameCount = 1 #not 0 since frameStart begins at 1 as well
        while True:
            framesLeft, frame = video.read()
            if framesLeft:
                if max_frames < frameCount: break #do not store frames after our end point
                toTensor = torchvision.transforms.ToTensor()
                frame = toTensor(frame)
                resize = torchvision.transforms.Resize(size)
                frame = resize(frame)
                frames.append(frame)
                frameCount += 1
            else:
                break
        video.release()
        tensor_frames = torch.empty((len(frames), 3, size[0], size[1]))
        for i in range(len(tensor_frames)):
            tensor_frames[i] = frames[i]
        del frames
        return tensor_frames
    def __len__(self):
        return len(self.filenames)
    def __getitem__(self, idx):
        filename = self.filenames[idx]
        if idx < self.end_idx-self.start_idx:
            filename = self.fight_folder + filename
            label = np.array([1, 0])
        else:
            filename = self.nofight_folder + filename
            label = np.array([0, 1])
        sample = self.video_to_frames(filename, self.image_size, self.max_frames)
        sample = self.preprocessSample(sample)
        return sample, label


def start_training(train_loader, test_loader, epochs, model, optimizer, loss, metrics, gradient_accumulation_steps): 
        trainer = create_supervised_trainer(model, optimizer, loss, gradient_accumulation_steps=gradient_accumulation_steps)
        evaluator = create_supervised_evaluator(model, metrics=metrics)
        
        @trainer.on(Events.ITERATION_COMPLETED)
        def log_batch_complete():
            print(f"epoch {trainer.state.epoch} iteration {trainer.state.iteration} completed.")

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_epoch_time():
            print(f"epoch {trainer.state.epoch} completed, time elapsed : {trainer.state.times['EPOCH_COMPLETED']}")
            evaluate()

        def evaluate():
            print("evaluating model")
            evaluator.run(test_loader)
            metrics = dict(evaluator.state.metrics)
            print(str(metrics))
            torch.save(model.state_dict(), 'models/model_e' + str(trainer.state.epoch) + "_accuracy_" + str(metrics['accuracy']) + ".pt")
        trainer.run(train_loader, max_epochs=epochs)
        evaluator.run(test_loader)
        return 
        
def custom_collate_fn(data): #data is a list of [batch_size] tuples of the form (video, label), we need to return the padded videos and the corresponding labels as tensors
    device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
    videos = []
    labels = torch.zeros((len(data), len(data[0][1])))
    for i in range(len(data)):
        sample = data[i]
        videos.append(sample[0])
        labels[i][0] = sample[1][0]
        labels[i][1] = sample[1][1]
    videos = rnn_utils.pad_sequence(videos, batch_first=True, padding_value=0.0)
    del data
    return videos.to(device), labels.to(device)

def getMetricInFilename(f):
    metric_in_filename = re.findall('\d*\.?\d+',f)[1]
    return metric_in_filename

def roundToLabel(output):
    y_pred, y = output
    for out in y_pred:
        if out[1] < out[0]: 
            out[0] = 1.0
            out[1]= 0.0
        else:
            out[0] = 0.0
            out[1]= 1.0
    return y_pred, y

def launch(data_splits, batch_sizes, max_frames, device, profile, epochs, image_size, gradient_accumulation_steps, force_generate_dataset, continuous_training, samples_per_label):
    print("starting on " + str(device))
    #load dataset from file system or from scratch
    try:
        if force_generate_dataset: raise Exception
        train_ds, test_ds, val_ds = torch.load("data/datasets.sav")
        print("loaded datasets from file system")
    except Exception as e:
        print("generating dataset from scratch")
        #there are 1000 videos (500 per label) so stratified split is easy
        split_per_label = [samples_per_label*x for x in data_splits]
        train_ds = HockeyDataset(device, max_frames, image_size, 0, int(split_per_label[0]))
        test_ds = HockeyDataset(device, max_frames, image_size, int(split_per_label[0]), int(split_per_label[0]+split_per_label[1]))
        val_ds = HockeyDataset(device, max_frames, image_size, int(split_per_label[0]+split_per_label[1]), int(split_per_label[0]+split_per_label[1]+split_per_label[2]))
        torch.save((train_ds, test_ds, val_ds), "data/datasets.sav")

    print("setting up dataloaders, metrics and model")
    # split into 3 dataloaders (for train, test, and valid)
    train_dl = DataLoader(train_ds, batch_size = batch_sizes[0], shuffle=True, collate_fn= custom_collate_fn)
    test_dl = DataLoader(test_ds, batch_size = batch_sizes[1], shuffle=True, collate_fn= custom_collate_fn)
    val_dl = DataLoader(val_ds, batch_size = batch_sizes[2], shuffle=True, collate_fn= custom_collate_fn)
    #load model
    if continuous_training:
        files = [f for f in listdir('models/') if isfile(join('models/', f))]
        noModels = False
        if len(files) == 0:
            print("No model file to continue training from.")
            exit()
        best_model = max([getMetricInFilename(f) for f in files])
        for f in files:
            if best_model not in f: continue
            model.load_state_dict(torch.load(f))    
            print("continuing from: " + f)
    else:
        model = RecursiveCNN(train_ds.num_classes)
    model.to(device)
    loss = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters())
    metrics = {'accuracy': Accuracy(device=device, output_transform=roundToLabel), 'recall': Recall(device=device), 'precision':Precision(device=device)}

    torch.cuda.empty_cache()
    print("Starting training with " + str(len(train_ds)) + " out of 1000 samples.")
    if profile:
        with torch.autograd.profiler.profile() as prof:
            try:
                measured_metrics = start_training(train_dl, test_dl, epochs, model, optimizer, loss, metrics, gradient_accumulation_steps)
            except Exception as e:
                print("ended with error " + str(e))
            print(prof.key_averages())
    else:
        measured_metrics = start_training(train_dl, test_dl, epochs, model, optimizer, loss, metrics, gradient_accumulation_steps)
    
    print("done!")

NameError: ignored

In [None]:
data_splits = [0.7, 0.2, 0.1]
batch_sizes = [16, 16, 8]
gradient_accumulation_steps = 1
image_size = (224, 224)
samples_per_label = 500
max_frames = 20 #no more than _ frames
device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
epochs = 50
profile = True
force_generate_dataset = True #when True re-generates the dataset object instead of loading from file system
continuous_training = False
launch(data_splits, batch_sizes, max_frames, device, profile, epochs, image_size, gradient_accumulation_steps, force_generate_dataset, continuous_training, samples_per_label)

starting on cuda
generating dataset from scratch
setting up dataloaders, metrics and model
Starting training with 700 out of 1000 samples.
epoch 1 iteration 1 completed.
epoch 1 iteration 2 completed.
epoch 1 iteration 3 completed.
epoch 1 iteration 4 completed.
epoch 1 iteration 5 completed.
epoch 1 iteration 6 completed.
epoch 1 iteration 7 completed.
epoch 1 iteration 8 completed.
epoch 1 iteration 9 completed.
epoch 1 iteration 10 completed.
epoch 1 iteration 11 completed.
epoch 1 iteration 12 completed.
epoch 1 iteration 13 completed.
epoch 1 iteration 14 completed.
epoch 1 iteration 15 completed.
epoch 1 iteration 16 completed.
epoch 1 iteration 17 completed.
epoch 1 iteration 18 completed.
epoch 1 iteration 19 completed.
epoch 1 iteration 20 completed.
epoch 1 iteration 21 completed.
epoch 1 iteration 22 completed.
epoch 1 iteration 23 completed.
epoch 1 iteration 24 completed.
epoch 1 iteration 25 completed.
epoch 1 iteration 26 completed.
epoch 1 iteration 27 completed.
epoch 

In [None]:
import sys
import psutil
import os
import gc
import torch

torch.cuda.empty_cache()

def memReport():
    for obj in gc.get_objects():
        try:
            if torch.is_tensor(obj):
                print(type(obj), obj.size())
                del obj
        except:
            continue
    gc.collect()
        
def cpuStats():
    print(sys.version)
    print(psutil.cpu_percent())
    print(psutil.virtual_memory())  # physical memory usage
    pid = os.getpid()
    py = psutil.Process(pid)
    memoryUse = py.memory_info()[0] / 2. ** 30  # memory use in GB...I think
    print('memory GB:', memoryUse)

memReport()
cpuStats()

3.8.10 (default, Nov 14 2022, 12:59:47) 
[GCC 9.4.0]
26.7
svmem(total=13616324608, available=11847000064, percent=13.0, used=1483288576, free=10924474368, active=407371776, inactive=2031546368, buffers=153669632, cached=1054892032, shared=1343488, slab=117903360)
memory GB: 0.297119140625


