# Video Classification on CM

* Here we generate real-time classical music instruments information generator on classical music concert video input. 
* We basically adopt video classification method using pytorch, to classify what instrument is currently viewed on the screen. 
* Audience will receive the streaming video with information on it.

- - -

In [None]:
# importing required components 
import numpy as np
import pandas as pd
import torch
import os
import time
import json
import urllib
import random
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
from PIL import Image
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from efficientnet_pytorch import EfficientNet

In [None]:
# # importing remaining components
# import json
# import urllib
# from pytorchvideo.data.encoded_video import EncodedVideo

# from torchvision.transforms import Compose, Lambda
# from torchvision.transforms._transforms_video import (
#     CenterCropVideo,
#     NormalizeVideo,
# )
# from pytorchvideo.transforms import (
#     ApplyTransformToKey,
#     ShortSideScale,
#     UniformTemporalSubsample
# )

In [None]:
class Args:
    # arugments
    epochs=20
    bs=6
    lr=0.001
    momentum=0.9
    num_channels=3  # due to RGB channels(image)
    num_classes=17  # total 17 instruments labeled
    verbose='store_true'
    seed=710674

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

In [None]:
#Setting torch environment

if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

## Data Crawling for model training

* Here we create each folder for classical instruments that used in classical music concert
* bassdrum / bassoon / cello / clarinet / contrabass / cymbales / flute / horn / oboe / snaredrum / percussion / tamtam / timpani / trombone / trumpet / tuba / viola / violin -- total 17 instruments

### Image extraction from classical music concert video

- - -

In [None]:
import cv2
from glob import glob

We are using orchestra symphony video as validation

In [None]:
## Code for 
def create_dir(path):
    try:
        if not os.path.exists(path):
            os.makedirs(path)
    except OSError:
        print(f"ERROR: creating directory with name {path}")

def save_frame(video_path, save_dir, gap=10):
    name = video_path.split("/")[-1].split(".")[0]
    save_path = os.path.join(save_dir, name)
    create_dir(save_path)

    cap = cv2.VideoCapture(video_path)
    idx = 0

    while True:
        ret, frame = cap.read()

        if ret == False:
            cap.release()
            break

        if idx == 0:
            cv2.imwrite(f"{save_path}/{idx}.png", frame)
        else:
            if idx % gap == 0:
                cv2.imwrite(f"{save_path}/{idx}.png", frame)

        idx += 1

Extracting images

In [None]:
if __name__ == "__main__":
    video_paths = glob("E:/RESEARCH/Datasets/VC/classic/val/INPROCESS/*")
    save_dir = "save"

    for path in video_paths:
        save_frame(path, save_dir, gap=150)

In [None]:
## you have to end all windows before next step
cv2.destroyAllWindows()

- - -

## Training model preparation

* Selected Classical convert video: 
https://www.youtube.com/watch?v=65nvqmVhZ3g

* Our approach is train the model from image classification task, and apply it for the video classification. 
* So it is also possible to use pretrained models, such as, resnet, efficientnet

In [None]:
# Choose the `slowFAST_r50` pretrained model - for our video classification model training 
## slowfast net is for video classification. 
model = torch.hub.load('facebookresearch/pytorchvideo', 'slowfast_r50', pretrained=True)

In [None]:
# model_res = models.resnet18(num_classes=2, pretrained=True)
model_eff3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=args.num_classes)
# model = model_res.to(DEVICE)
model = model_eff3.to(DEVICE)

## Image classification approach

* Generating classification model with image classification task.
* Then we can adop the model for video input, to figure out the instrument on the screen.

In [None]:
# Data Transformation
data_transforms = transforms.Compose([
#     transforms.CenterCrop(1024),
    transforms.Resize(256),
    transforms.RandomResizedCrop(256),
    transforms.RandomHorizontalFlip(),
#     transforms.RandomVerticalFlip(),
    transforms.ColorJitter(contrast=(0.3, 1), saturation=(0.3, 1)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456,0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Uploading image data
# classic_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/VC/classic/train', transform = data_transforms)
sports_data = datasets.ImageFolder(root = 'E:/RESEARCH/Datasets/VC/sports/data/train', transform = data_transforms)

In [None]:
train_size = int(0.8 * len(sports_data))
test_size = len(sports_data)-train_size
print(train_size)
print(test_size)

In [None]:
train_dataset, test_dataset = torch.utils.data.random_split(sports_data, [train_size, test_size])

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(labels)

In [None]:
# Setting Optimizer and Objective Function

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=20, anneal_strategy='cos')

# print(model)

In [None]:
# Function for checking model performance during CNN model

def train(model, train_loader, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))

    scheduler.step() #for learning rate scheduler

In [None]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    validation =[]

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader)) 
    validation_accuracy = 100. * correct / len(test_loader.dataset)
    validation.append(validation_accuracy)
    
    return test_loss, validation_accuracy

In [None]:
# Checking train, val loss and accuracy

total = []

for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, validation_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tValidation Accuracy: {:.2f} % \n".format(
        epoch, test_loss, validation_accuracy))
    
    total.append((test_loss, validation_accuracy))

## Classification model test

* Model performance test process

In [None]:
# Setting Optimizer and Objective Function

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = args.lr, momentum = args.momentum)
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# scheduler = optim.lr_scheduler.LambdaLR(optimizer = optimizer,
#                                        lr_lambda = lambda epoch:0.95 ** epoch,
#                                        last_epoch = -1,
#                                        verbose = False)

# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, 
#                                                 steps_per_epoch=10, epochs=10,anneal_strategy='linear')

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, total_steps=35,anneal_strategy='cos')

# print(model)

In [None]:
# data_folder_path = 'E:/RESEARCH/Datasets/VC/classic/val'
data_folder_path = 'E:/RESEARCH/Datasets/VC/sports/data/val'
test_dataset = datasets.ImageFolder(root=data_folder_path, transform=data_transforms)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
## Model testing
total_evaluation = []

for epoch in range(1, 35):
    train(model, train_loader, optimizer, log_interval = 200)
    test_loss, test_accuracy = evaluate(model, test_dataloader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, test_loss, test_accuracy))
    
    total_evaluation.append((test_loss, test_accuracy))