Colab imports


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd drive/MyDrive/work_with_ML/cv_course/

/content/drive/MyDrive/work_with_ML/cv_course


# Exp 3
## Resnet(2+1)D

Проведем обучение моджели в Google Colab. За основу возьмем модель Resnet 2+1 D без предобучения

In [None]:
import cv2
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torchvision
from torch.utils.data import Dataset
from sklearn.metrics import accuracy_score, f1_score

In [None]:
labels = glob.glob('DATA/train/*')

In [None]:
labels = [el[11:] for el in labels]

In [None]:
labels

['dancing charleston',
 'dancing gangnam style',
 'dancing ballet',
 'tango dancing']

In [None]:
device = torch.device("cuda")

In [None]:
class CustomVideoDataset(Dataset):
    def __init__(self, labels, transform=None, target_transform=None, train_flag = True):
        self.labels = labels
        if train_flag:
            self.paths = glob.glob('DATA/train/*/*.mp4')
        else:
            self.paths = glob.glob('DATA/val/*/*.mp4')
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        image = self.read_img(self.paths[idx])/255
        for i in range(len(self.labels)):
            if self.labels[i] in self.paths[idx]:
                label = i
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return np.moveaxis(image, -1, 1), label

    def read_img(self, path):
        cap = cv2.VideoCapture(path)
        imgs_list = []
        i = 0
        while cap.isOpened():
            ret, img = cap.read()
            if ret == True:
                imgs_list.append(cv2.resize(img, (64, 64), interpolation=cv2.INTER_LINEAR))
            else:
                break
        imgs_list = np.asarray(imgs_list)[np.linspace(0, len(imgs_list)-1, 10).astype(np.int8)]
        return imgs_list

In [None]:
test_ds = CustomVideoDataset(labels, train_flag=False)
train_ds = CustomVideoDataset(labels)

train_loader = torch.utils.data.DataLoader(train_ds, batch_size=32,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=32,
                                          shuffle=False)

In [None]:
resnet = torchvision.models.video.r2plus1d_18(pretrained=False, progress=True)



In [None]:
#Поверх rensnet добавим линейный слой для классификации 4 классов
class Resnet_model(torch.nn.Module):
  def __init__(self, resnet):
    super(Resnet_model, self).__init__()
    self.resnet = resnet
    self.lin = torch.nn.Linear(400, 4)
    self.sm = torch.nn.Softmax(dim = 1)


  def forward(self, x):
    x = self.resnet(x)
    x = self.lin(x)
    x = self.sm(x)
    return x


In [None]:
resnet_model = Resnet_model(resnet)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet_model.parameters(), lr=0.005)

In [None]:
#Train loop
print("Start training")
loss_hist = []
val_loss_hist = []
patience = 0
resnet_model.to(device)

for epoch in range(8):
    resnet_model.train(True)
    running_loss = 0.0
    iters = 0
    train_acc = 0
    train_f1 = 0
    print('_____________________')
    print('EPOCH: ', epoch+1)
    for i, (img, label) in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()

        res = resnet_model(img.float().moveaxis(2, 1).to(device))
        loss = criterion(res, label.to(device))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        iters += 1
        train_acc += accuracy_score(label, res.cpu().argmax(dim=1))
        train_f1 += f1_score(label, res.cpu().argmax(dim=1), average='weighted')
    loss_hist.append(running_loss/iters)
    train_acc = train_acc/iters
    train_f1 = train_f1/iters

    #show results per epoch
    print('train_loss = ', running_loss/iters)
    print(f'Train: accuracy = {train_acc}, F1 = {train_f1}')

    #validating
    running_loss = 0.0
    iters = 0
    test_acc = 0
    test_f1 = 0
    resnet_model.eval()
    with torch.no_grad():
        for i ,(test_img, test_label) in enumerate(test_loader, 0):
            val_res = resnet_model(test_img.float().moveaxis(2, 1).to(device))
            val_loss = criterion(val_res, test_label.to(device))
            running_loss += val_loss.item()
            iters += 1
            test_acc += accuracy_score(test_label, val_res.cpu().argmax(dim=1))
            test_f1 += f1_score(test_label, val_res.cpu().argmax(dim=1), average='weighted')
        val_loss_hist.append(running_loss/iters)
        test_acc = test_acc/iters
        test_f1 = test_f1/iters

    #show val results per epoch
    print('val_loss = ', running_loss/iters)
    print(f'Test: accuracy = {test_acc}, F1 = {test_f1}')
    print('\n')

    #early stop
    if epoch > 3:
        if val_loss_hist[-1] > val_loss_hist[-2]:
            patience += 1
        else:
            patience = 0
    if patience == 4:
        break
        #save best model
    if epoch == 0:
        best_f1 = test_f1
    else:
        if test_f1 > best_f1:
            best_f1 = test_f1
            torch.save(resnet_model, 'models/resnet_model_best.pth')
print('Finish! Num epochs: ', epoch+1)

Start training
_____________________
EPOCH:  1


100%|██████████| 118/118 [1:00:46<00:00, 30.91s/it]


train_loss =  1.4028598110554582
Train: accuracy = 0.26868782240235817, F1 = 0.17398236880684279
val_loss =  1.3992339372634888
Test: accuracy = 0.25, F1 = 0.30644146946795664


_____________________
EPOCH:  2


100%|██████████| 118/118 [30:02<00:00, 15.27s/it]


train_loss =  1.3951922675310555
Train: accuracy = 0.2779453758290346, F1 = 0.20951398577655458
val_loss =  1.3700848647526331
Test: accuracy = 0.3482142857142857, F1 = 0.43402607468555127


_____________________
EPOCH:  3


100%|██████████| 118/118 [30:19<00:00, 15.42s/it]


train_loss =  1.3794419745267448
Train: accuracy = 0.3143422991893884, F1 = 0.2513599617069062
val_loss =  1.3533661024911063
Test: accuracy = 0.2857142857142857, F1 = 0.3715336461057241


_____________________
EPOCH:  4


 25%|██▌       | 30/118 [07:30<21:37, 14.74s/it]

In [None]:
torch.save(resnet_model, 'models/resnet_model_last.pth')