# Video Classification with PyTorch

## 1. PyTorch 3D RESNET

In [1]:
# importing required components 
import torch
import torchvision

In [2]:
# importing remaining components
import json
import urllib
from pytorchvideo.data.encoded_video import EncodedVideo

from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample
)

In [3]:
# Choose the `slow_r50` pretrained model - for our video classification model training 
model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True)

Using cache found in C:\Users\user/.cache\torch\hub\facebookresearch_pytorchvideo_master
Downloading: "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics/SLOW_8x8_R50.pyth" to C:\Users\user/.cache\torch\hub\checkpoints\SLOW_8x8_R50.pyth


  0%|          | 0.00/248M [00:00<?, ?B/s]

In [4]:
# DEVICE setup - Set the model to eval mode and move to desired device.
# Set to GPU or CPU

device = "cpu"
model = model.eval()
model = model.to(device)

In [5]:
#Download the id to label mapping for the Kinetics 400 dataset on which the torch hub models were trained. 
#This will be used to get the category label names from the predicted class ids.

json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
json_filename = "kinetics_classnames.json"
try: urllib.URLopener().retrieve(json_url, json_filename)
except: urllib.request.urlretrieve(json_url, json_filename)

In [6]:
with open(json_filename, "r") as f:
    kinetics_classnames = json.load(f)

# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
    kinetics_id_to_classname[v] = str(k).replace('"', "")

* Input Transformation

In [7]:
side_size = 256
mean = [0.45, 0.45, 0.45]
std = [0.225, 0.225, 0.225]
crop_size = 256
num_frames = 8
sampling_rate = 8
frames_per_second = 30

# Note that this transform is specific to the slow_R50 model.
transform =  ApplyTransformToKey(
    key="video",
    transform=Compose(
        [
            UniformTemporalSubsample(num_frames),
            Lambda(lambda x: x/255.0),
            NormalizeVideo(mean, std),
            ShortSideScale(
                size=side_size
            ),
            CenterCropVideo(crop_size=(crop_size, crop_size))
        ]
    ),
)

# The duration of the input clip is also specific to the model.
clip_duration = (num_frames * sampling_rate)/frames_per_second

* Loading video data

In [8]:
url_link = "https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4"
video_path = 'archery.mp4'
try: urllib.URLopener().retrieve(url_link, video_path)
except: urllib.request.urlretrieve(url_link, video_path)

* Load the video and transform into input format (for the model training)

In [9]:
# Select the duration of the clip to load by specifying the start and end duration
# The start_sec should correspond to where the action occurs in the video
start_sec = 0
end_sec = start_sec + clip_duration

# Initialize an EncodedVideo helper class and load the video
video = EncodedVideo.from_path(video_path)

# Load the desired clip
video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)

# Apply a transform to normalize the video input
video_data = transform(video_data)

# Move the inputs to the desired device
inputs = video_data["video"]
inputs = inputs.to(device)

An exception occurred in telemetry logging.Disabling telemetry to prevent further exceptions.
Traceback (most recent call last):
  File "C:\Users\user\anaconda3\envs\vc\lib\site-packages\iopath\common\file_io.py", line 946, in __log_tmetry_keys
    handler.log_event()
  File "C:\Users\user\anaconda3\envs\vc\lib\site-packages\iopath\common\event_logger.py", line 97, in log_event
    del self._evt
AttributeError: _evt


* Predictions on Video Clip. Output come with top 5 predicted labels

In [10]:
# Pass the input clip through the model
preds = model(inputs[None, ...])

# Get the predicted classes
post_act = torch.nn.Softmax(dim=1)
preds = post_act(preds)
pred_classes = preds.topk(k=5).indices[0]

# Map the predicted classes to the label names
pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes]
print("Top 5 predicted labels: %s" % ", ".join(pred_class_names))

Top 5 predicted labels: archery, throwing axe, playing paintball, stretching arm, riding or walking with horse


## 2. PyTorch - using pytorchvideo

https://github.com/YuxinZhaozyx/pytorch-VideoDataset

In [13]:
import torch
import torchvision
import os
import PIL
import collections
import random
import cv2
import numpy as np 
import pandas as pd

from torch.utils.data import Dataset

In [None]:
data_loader = torch.utils.data.DataLoader(dataset, batch_size = 2, shuffle = True)

dataset = datasets.VideoDataset(
	"./data/example_video_file.csv",
    transform=torchvision.transforms.Compose([
        transforms.VideoFilePathToTensor(max_len=50, fps=10, padding_mode='last'),
        transforms.VideoRandomCrop([512, 512]),
        transforms.VideoResize([256, 256]),
    ])
)

for videos in data_loader:
    print(videos.size())

## VC Autism

### Autism Non-Autism majore differences

1. good eye contact
2. sits relatively still

* image classification approach
* Concept: train significant feature images of autism patients

In [1]:
import os
import time
import random
import numpy as np
import pandas as pd
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [51]:
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from PIL import Image
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from efficientnet_pytorch import EfficientNet

* Changing file names in directory

In [22]:
file_path = "E:/RESEARCH/Datasets/VC/autism_test/val/nonautism"
file_names = os.listdir(file_path)

In [23]:
i = 1
for name in file_names:
    src = os.path.join(file_path, name)
    dst = str(i) + '.png'
    dst = os.path.join(file_path, dst)
    os.rename(src, dst)
    i += 1

In [24]:
class Args:
    # arugments
    epochs=30
    bs=6
    lr=0.001
    momentum=0.9
    
    num_channels=3
    num_classes=50
    verbose='store_true'
    seed=712002

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x1eaa212d170>

In [25]:
#Setting torch environment

if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

Using PyTorch version: 1.7.1  Device:  cuda


In [42]:
# Data Transformation
data_transforms = transforms.Compose([
#     transforms.CenterCrop(1024),
    transforms.Resize(256),
    transforms.RandomResizedCrop(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
#     transforms.ColorJitter(contrast=(0.3, 1), saturation=(0.3, 1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456,0.406], [0.229, 0.224, 0.225])
])

In [43]:
# Uploading image data
printer_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/VC/autism_test/train', transform = data_transforms)

In [44]:
train_size = int(0.8 * len(printer_data))
test_size = len(printer_data)-train_size

In [45]:
print(train_size)
print(test_size)

800
200


In [46]:
train_dataset, test_dataset = torch.utils.data.random_split(printer_data, [train_size, test_size])

In [47]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)

In [48]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(labels)

tensor([0, 1, 0, 0, 1, 0])


In [52]:

model_eff3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=2)
model = model_eff3.to(DEVICE)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth" to C:\Users\user/.cache\torch\hub\checkpoints\efficientnet-b3-5fb5a3c3.pth


  0%|          | 0.00/47.1M [00:00<?, ?B/s]

Loaded pretrained weights for efficientnet-b3


In [53]:
# Setting Optimizer and Objective Function

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, total_steps=30, anneal_strategy='cos')

print(model)

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 40, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        40, 40, kernel_size=(3, 3), stride=[1, 1], groups=40, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(40, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        40, 10, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        10, 40, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        40, 24, kernel_siz

In [54]:
# Function for checking model performance during CNN model

def train(model, train_loader, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))

    scheduler.step() #for learning rate scheduler

In [55]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader)) 
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    return test_loss, test_accuracy

In [56]:
# Checking train, val loss and accuracy

total = []

for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, test_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, test_loss, test_accuracy))
    
    total.append((test_loss, test_accuracy))

3.9999999999999996e-05

[EPOCH: 1], 	Test Loss: 0.1940, 	Test Accuracy: 97.00 % 

7.653782439458233e-05

[EPOCH: 2], 	Test Loss: 0.0380, 	Test Accuracy: 99.00 % 

0.00018058874503045722

[EPOCH: 3], 	Test Loss: 0.0383, 	Test Accuracy: 99.50 % 

0.00033631195246475686

[EPOCH: 4], 	Test Loss: 0.0827, 	Test Accuracy: 98.50 % 

0.0005200000000000001

[EPOCH: 5], 	Test Loss: 0.3405, 	Test Accuracy: 85.00 % 

0.0007036880475352432

[EPOCH: 6], 	Test Loss: 0.1795, 	Test Accuracy: 95.50 % 

0.0008594112549695428

[EPOCH: 7], 	Test Loss: 0.8230, 	Test Accuracy: 95.50 % 

0.0009634621756054177

[EPOCH: 8], 	Test Loss: 0.2683, 	Test Accuracy: 92.00 % 

0.001

[EPOCH: 9], 	Test Loss: 0.4995, 	Test Accuracy: 88.50 % 

0.0009944154354509117

[EPOCH: 10], 	Test Loss: 0.0521, 	Test Accuracy: 98.00 % 

0.0009777864917474587

[EPOCH: 11], 	Test Loss: 0.0339, 	Test Accuracy: 99.00 % 

0.0009504846320134736

[EPOCH: 12], 	Test Loss: 11.2412, 	Test Accuracy: 76.50 % 

0.0009131197346804487

[EPOCH: 13], 	