# Video Classification with PyTorch

## 1. PyTorch 3D RESNET

In [None]:
# importing required components 
import torch
import torchvision

In [None]:
# importing remaining components
import json
import urllib
from pytorchvideo.data.encoded_video import EncodedVideo

from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample
)

In [None]:
# Choose the `slow_r50` pretrained model - for our video classification model training 
model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True)

In [None]:
# DEVICE setup - Set the model to eval mode and move to desired device.
# Set to GPU or CPU

device = "cpu"
model = model.eval()
model = model.to(device)

In [None]:
#Download the id to label mapping for the Kinetics 400 dataset on which the torch hub models were trained. 
#This will be used to get the category label names from the predicted class ids.

json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
json_filename = "kinetics_classnames.json"
try: urllib.URLopener().retrieve(json_url, json_filename)
except: urllib.request.urlretrieve(json_url, json_filename)

In [None]:
with open(json_filename, "r") as f:
    kinetics_classnames = json.load(f)

# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

* Input Transformation

In [None]:
side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 256
num_frames = 8
sampling_rate = 8
frames_per_second = 30

# Note that this transform is specific to the slow_R50 model.
transform =  ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(num_frames),
            Lambda(lambda x: x/255.0),
            NormalizeVideo(mean, std),
            ShortSideScale(
                size=side_size
            ),
            CenterCropVideo(crop_size=(crop_size, crop_size))
        ]
    ),
)

# The duration of the input clip is also specific to the model.
clip_duration = (num_frames * sampling_rate)/frames_per_second

* Loading video data

In [None]:
url_link = "https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4"
video_path = 'archery.mp4'
try: urllib.URLopener().retrieve(url_link, video_path)
except: urllib.request.urlretrieve(url_link, video_path)

* Load the video and transform into input format (for the model training)

In [None]:
# Select the duration of the clip to load by specifying the start and end duration
# The start_sec should correspond to where the action occurs in the video
start_sec = 0
end_sec = start_sec + clip_duration

# Initialize an EncodedVideo helper class and load the video
video = EncodedVideo.from_path(video_path)

# Load the desired clip
video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)

# Apply a transform to normalize the video input
video_data = transform(video_data)

# Move the inputs to the desired device
inputs = video_data["video"]
inputs = inputs.to(device)

* Predictions on Video Clip. Output come with top 5 predicted labels

In [None]:
# Pass the input clip through the model
preds = model(inputs[None, ...])

# Get the predicted classes
post_act = torch.nn.Softmax(dim=1)
preds = post_act(preds)
pred_classes = preds.topk(k=5).indices[0]

# Map the predicted classes to the label names
pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes]
print("Top 5 predicted labels: %s" % ", ".join(pred_class_names))

## 2. PyTorch - using pytorchvideo

https://github.com/YuxinZhaozyx/pytorch-VideoDataset

In [None]:
import torch
import torchvision
import os
import PIL
import collections
import random
import cv2
import numpy as np 
import pandas as pd

from torch.utils.data import Dataset

In [None]:
data_loader = torch.utils.data.DataLoader(dataset, batch_size = 2, shuffle = True)

dataset = datasets.VideoDataset(
	"./data/example_video_file.csv",
    transform=torchvision.transforms.Compose([
        transforms.VideoFilePathToTensor(max_len=50, fps=10, padding_mode='last'),
        transforms.VideoRandomCrop([512, 512]),
        transforms.VideoResize([256, 256]),
    ])
)

for videos in data_loader:
    print(videos.size())

## VC Autism

### Autism Non-Autism majore differences

1. good eye contact
2. sits relatively still

* image classification approach
* Concept: train significant feature images of autism patients

In [None]:
import os
import time
import random
import numpy as np
import pandas as pd
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from PIL import Image
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from efficientnet_pytorch import EfficientNet

* Changing file names in directory

In [None]:
# file_path = "E:/RESEARCH/Datasets/VC/autism_test/val/nonautism"
# file_names = os.listdir(file_path)

In [None]:
# i = 1
# for name in file_names:
#     src = os.path.join(file_path, name)
#     dst = str(i) + '.png'
#     dst = os.path.join(file_path, dst)
#     os.rename(src, dst)
#     i += 1

In [None]:
class Args:
    # arugments
    epochs=30
    bs=6
    lr=0.001
    momentum=0.9
    
    num_channels=3
    num_classes=2
    verbose='store_true'
    seed=712002

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

In [None]:
#Setting torch environment

if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

* Data transformation for some augmentation

In [None]:
# Data Transformation
data_transforms = transforms.Compose([
#     transforms.CenterCrop(1024),
    transforms.Resize(256),
    transforms.RandomResizedCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
#     transforms.ColorJitter(contrast=(0.3, 1), saturation=(0.3, 1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456,0.406], [0.229, 0.224, 0.225])
])

* Setting directory

In [None]:
# Uploading image data
# data_dir = 'E:/RESEARCH/Datasets/VC/autism_test/train'
printer_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/VC/autism_test/train', transform = data_transforms)
# printer_data = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}

In [None]:
train_size = int(0.8 * len(printer_data))
test_size = len(printer_data)-train_size

In [None]:
print(train_size)
print(test_size)

In [None]:
train_dataset, test_dataset = torch.utils.data.random_split(printer_data, [train_size, test_size])

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(labels)

In [None]:
# model_res = models.resnet18(num_classes=2, pretrained=True)
model_eff3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=2)
# model = model_res.to(DEVICE)
model = model_eff3.to(DEVICE)

* Model training and Accuracy Check

In [None]:
# Setting Optimizer and Objective Function

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=30, anneal_strategy='cos')

print(model)

In [None]:
# Function for checking model performance during CNN model

def train(model, train_loader, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))

    scheduler.step() #for learning rate scheduler

In [None]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader)) 
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    return test_loss, test_accuracy

In [None]:
# Checking train, val loss and accuracy

total = []

for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, test_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, test_loss, test_accuracy))
    
    total.append((test_loss, test_accuracy))

In [None]:
total

In [None]:
def main():
    ## please first init this to the path of your model parameters, e.g., './xxxxx.pth'
    model_path = 'E:/RESEARCH/Datasets/VC/models/track1_model.pt'
    # change this to your student id
    your_stu_id = '2020712002'

    cuda = torch.cuda.is_available()
    device = 'cpu' if not cuda else 'cuda'

    ########## Load your model #############

    your_model = CNN_food(in_channels=3, num_classes=50)
    your_model.load_state_dict(torch.load(your_model_path))
    your_model.to(device)
    
    print('Model loaded')

    ########## Load evaluation dataset ##########
    transform_list = [
        transforms.Resize(256),
        transforms.ToTensor()]

    # if normalization is applied in your training, you can utilize the codes below.
    
    if normalize:
        transform_list.append(
            transforms.Normalize(mean=[0.485, 0.456,0.406],
                                std=[0.229, 0.224, 0.225])
            )
    


    _transforms = transforms.Compose(transform_list)

    data_folder_path = 'E:/RESEARCH/Datasets/VC/autism_test/val'
    test_dataset = TestDataSet(root=data_folder_path, transform=_transforms)
    test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, pin_memory=cuda)
    print('Dataset loaded')

    ######### evaluate ###########
    print('Evaluating...')
    preds_list, image_names = evaluate(model=your_model, loader=test_dataloader, device=device)

    filename = your_stu_id + '.csv'
    with open(filename, 'w', newline='') as csvfile:
        eval_writer = csv.writer(csvfile, delimiter=',')
        eval_writer.writerow(['ID', 'Category'])
        for i in range(len(preds_list)):
            eval_writer.writerow([i, int(preds_list[i])])

    print('Done!')
    print('Results saved at : ', os.path.join(os.getcwd(), filename))


if __name__ == '__main__':
    main()


In [None]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

In [None]:
# Setting Optimizer and Objective Function

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = args.lr, momentum = args.momentum)
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# scheduler = optim.lr_scheduler.LambdaLR(optimizer = optimizer,
#                                        lr_lambda = lambda epoch:0.95 ** epoch,
#                                        last_epoch = -1,
#                                        verbose = False)

# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, 
#                                                 steps_per_epoch=10, epochs=10,anneal_strategy='linear')

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, total_steps=50,anneal_strategy='cos')

# print(model)

In [None]:
data_folder_path = 'E:/RESEARCH/Datasets/VC/autism_test/val'
test_dataset = datasets.ImageFolder(root=data_folder_path, transform=data_transforms)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
for epoch in range(1, 50):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, test_accuracy = evaluate(model, test_dataloader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, test_loss, test_accuracy))