In [1]:
import copy
import os
import random
import re

import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from torch import nn
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from tqdm import tqdm

from src.model import Resnt18Rnn, Identity
from src.prediction import predict_label
import wandb

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(2023)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
wandb.init(
    project='video_classification',
    name=f'CnnResnet_exp1',
    config={
        "epochs": 10,
        "batch_size": 4,
        "timestamps": 4,
    },
)

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/dmitry/.netrc


## Dataset

In [3]:
class VideoDataset(Dataset):
    def __init__(self, path, timestamps, transform):   
        self.path = path
        self.transform = transform
        self.timestamps = timestamps

        self.frames_path = []
        for label in ['0','1']:
            label_path = os.path.join(os.path.join(self.path, label), 'images')
            folders = [i for i in os.listdir(label_path) if '.DS' not in i]
            for folder in folders:
                folder_path = os.path.join(label_path, folder)
                folder_frames = [os.path.join(folder_path, i) for i in os.listdir(folder_path)]
                folder_frames.sort(key=self.__natural_keys__)
                self.frames_path.extend(folder_frames)
        
    def __atoi__(self, text):
        return int(text) if text.isdigit() else text

    def __natural_keys__(self, text):
        return [self.__atoi__(c) for c in re.split(r'(\d+)', text)]

    def __len__(self):
        return int(len(self.frames_path) / self.timestamps)

    def __getitem__(self, idx):
        path2imgs = self.frames_path[idx*self.timestamps:(idx+1)*self.timestamps]
        label = path2imgs[1].split('/')[-4]

        frames = []
        for img_path in path2imgs:
            frame = Image.open(img_path)
            frames.append(frame)
        
        for i, frame in enumerate(frames):
            frame = self.transform(frame)
            frames[i] = frame
        if len(frames)>0:
            frames = torch.stack(frames)
        return frames, int(label)

In [4]:
# transform params
h, w = 224, 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [5]:
train_transforms = transforms.Compose([
            transforms.Resize((h,w)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomAffine(degrees=0, translate=(0.1,0.1)),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            ])

In [6]:
train_path = '/Users/dmitry/Desktop/cv_itmo/video_classification/data/train/'
val_path = '/Users/dmitry/Desktop/cv_itmo/video_classification/data/val/'

train_ds = VideoDataset(path=train_path, timestamps=4, transform=train_transforms)
val_ds = VideoDataset(path=val_path, timestamps=4, transform=train_transforms)

In [7]:
val_ds.__getitem__(100)[0].shape

torch.Size([4, 3, 224, 224])

In [8]:
def collate_fn_rnn(batch):
    imgs_batch, label_batch = list(zip(*batch))
    imgs_batch = [imgs for imgs in imgs_batch if len(imgs)>0]
    label_batch = [torch.tensor(l) for l, imgs in zip(label_batch, imgs_batch) if len(imgs)>0]
    imgs_tensor = torch.stack(imgs_batch)
    labels_tensor = torch.stack(label_batch)
    return imgs_tensor, labels_tensor

In [9]:
train_dataloader = DataLoader(train_ds, batch_size=4, shuffle=True, collate_fn=collate_fn_rnn)
val_dataloader = DataLoader(val_ds, batch_size=4, shuffle=False, collate_fn=collate_fn_rnn)

## Model

In [11]:
num_classes = 2
params_model={
    "num_classes": num_classes,
    "dr_rate": 0.1,
    "weights" : 'ResNet18_Weights.IMAGENET1K_V1',
    "rnn_num_layers": 1,
    "rnn_hidden_size": 100,}
model = Resnt18Rnn(params_model)

In [12]:
x,y = next(iter(train_dataloader))
x.shape, y.shape

(torch.Size([4, 4, 3, 224, 224]), torch.Size([4]))

In [14]:
with torch.no_grad():
    y_pred = model(x)

In [15]:
print(y_pred)
torch.argmax(y_pred,dim=1), y

tensor([[ 1.5605e-01, -1.2749e-01],
        [ 1.6692e-01,  3.1535e-02],
        [ 9.9931e-02, -2.8290e-05],
        [ 2.5767e-01, -1.2400e-01]])


(tensor([0, 0, 0, 0]), tensor([0, 1, 1, 1]))

In [16]:
EPOCHS = 10
DEVICE = 'cpu'
EXP_PATH = 'experiments/exp1'
criterion = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(model.parameters(), lr=3e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min',factor=0.5, patience=5,verbose=1)

In [17]:
def train_model():
    best_model = None
    best_acc = 0
    best_epoch = 0
    
    checkpoint_save_path = f'{EXP_PATH}/checkpoints'
    os.makedirs(checkpoint_save_path,exist_ok=True)
    
    for epoch in range(EPOCHS):

        model.train()
        train_loss = 0
        train_labels, train_preds = [], []

        for step, batch in tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f'Epoch: {epoch}'):
            x_train, y_train = batch
            x_train = x_train.to(DEVICE)
            y_train = y_train.to(DEVICE)

            optimizer.zero_grad()
            
            preds = model(x_train)

            loss = criterion(preds, y_train)

            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            preds = preds.argmax(dim=1)
            train_labels.extend(y_train.numpy())
            train_preds.extend(preds.numpy())

        train_loss = train_loss / len(train_dataloader)
        train_acc = accuracy_score(train_labels, train_preds)
        print("Train Loss: {0:.5f}".format(train_loss))
        print("Train Accuracy: {0:.5f}".format(train_acc))

        
        model.eval()
        val_labels, val_preds = [], []
        with torch.no_grad():
            for step, batch in tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc=f'Epoch: {epoch}'):
                x_val, y_val = batch
                x_val = x_val.to(DEVICE)
                y_val = y_val.to(DEVICE)
                
                preds = model(x_val)

                preds = preds.argmax(dim=1)
                val_labels.extend(y_val.numpy())
                val_preds.extend(preds.numpy())

        val_acc = accuracy_score(val_labels, val_preds)
        print("Val Accuracy: {0:.5f}".format(val_acc))

        if val_acc > best_acc:
            best_model = copy.deepcopy(model)
            best_acc = val_acc
            best_epoch = epoch
            torch.save(best_model, f'{checkpoint_save_path}/checkpoint_{best_acc}_{best_epoch}ep.pth')
        
        scheduler.step(loss)

        wandb.log({"Train/Loss" : train_loss}, step=epoch)
        wandb.log({"Train/Accuracy" : train_acc}, step=epoch)
        wandb.log({"Val/Accuracy" : val_acc}, step=epoch)

In [19]:
train_model()

Epoch: 0: 100%|██████████| 1204/1204 [15:06<00:00,  1.33it/s]


Train Loss: 1.64340
Train Accuracy: 0.82360


Epoch: 0: 100%|██████████| 230/230 [01:08<00:00,  3.37it/s]


Val Accuracy: 0.67721


Epoch: 1: 100%|██████████| 1204/1204 [14:57<00:00,  1.34it/s]


Train Loss: 1.00405
Train Accuracy: 0.90318


Epoch: 1: 100%|██████████| 230/230 [01:08<00:00,  3.36it/s]


Val Accuracy: 0.71210


Epoch: 2: 100%|██████████| 1204/1204 [14:39<00:00,  1.37it/s]


Train Loss: 0.62762
Train Accuracy: 0.94764


Epoch: 2: 100%|██████████| 230/230 [01:05<00:00,  3.49it/s]


Val Accuracy: 0.70883


Epoch: 3: 100%|██████████| 1204/1204 [14:42<00:00,  1.36it/s]


Train Loss: 0.45662
Train Accuracy: 0.95969


Epoch: 3: 100%|██████████| 230/230 [01:09<00:00,  3.32it/s]


Val Accuracy: 0.66848


Epoch: 4: 100%|██████████| 1204/1204 [15:20<00:00,  1.31it/s]


Train Loss: 0.34628
Train Accuracy: 0.97299


Epoch: 4: 100%|██████████| 230/230 [01:10<00:00,  3.25it/s]


Val Accuracy: 0.71101


Epoch: 5: 100%|██████████| 1204/1204 [14:59<00:00,  1.34it/s]


Train Loss: 0.26740
Train Accuracy: 0.97860


Epoch: 5: 100%|██████████| 230/230 [01:09<00:00,  3.33it/s]


Val Accuracy: 0.68920


Epoch: 6: 100%|██████████| 1204/1204 [15:08<00:00,  1.33it/s]


Train Loss: 0.24352
Train Accuracy: 0.98005


Epoch: 6: 100%|██████████| 230/230 [01:12<00:00,  3.18it/s]


Val Accuracy: 0.65213


Epoch: 7: 100%|██████████| 1204/1204 [15:06<00:00,  1.33it/s]


Train Loss: 0.21841
Train Accuracy: 0.98130


Epoch: 7: 100%|██████████| 230/230 [01:08<00:00,  3.37it/s]


Val Accuracy: 0.67285


Epoch: 8: 100%|██████████| 1204/1204 [15:15<00:00,  1.31it/s]


Train Loss: 0.14660
Train Accuracy: 0.98899


Epoch: 8: 100%|██████████| 230/230 [01:09<00:00,  3.30it/s]


Val Accuracy: 0.64558


Epoch: 9: 100%|██████████| 1204/1204 [15:04<00:00,  1.33it/s]


Train Loss: 0.16921
Train Accuracy: 0.98649


Epoch: 9: 100%|██████████| 230/230 [01:11<00:00,  3.22it/s]

Val Accuracy: 0.69575





## Testing

In [None]:
model = torch.load('experiments/exp1/checkpoints/checkpoint_0.7121046892039259_1ep.pth', map_location=DEVICE)
model.eval()

In [20]:
# go throw every class and its every folder to get list of folder paths   
path = 'data/test/'
all_folders = []
for label in ['0','1']:
    label_path = os.path.join(os.path.join(path, label), 'cut_videos/images')
    folders = [os.path.join(label_path, i) for i in os.listdir(label_path) if '.DS' not in i]
    all_folders.extend(folders)
print(all_folders[0])

data/test/0/cut_videos/images/Decorating the Christmas tree 2015


In [21]:
test_transforms = transforms.Compose([
            transforms.Resize((h,w)),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            ])

In [22]:
timestamps = 4 # take every 4 frames of folder to predict label 
labels, predictions = [], [] # lists to save true labels and predictions

for folder_path in tqdm(all_folders):
    true_label, pred_label = predict_label(model,folder_path, timestamps, test_transforms, inference_mode=False)
    
    labels.append(true_label)
    predictions.append(pred_label) 

100%|██████████| 34/34 [01:24<00:00,  2.48s/it]


In [23]:
accuracy_score(labels, predictions)

0.7941176470588235