In [2]:
import copy
import glob
import os
import random
import re


import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from torch import nn
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from tqdm import tqdm

import wandb

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(2023)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
wandb.init(
    project='video_classification',
    name=f'frames_resnet18_exp2',
    config={
        "epochs": 10,
        "batch_size": 4,
        #"timestamps": 4,
    },
)

[34m[1mwandb[0m: Currently logged in as: [33mdmitryai[0m ([33mcv-itmo[0m). Use [1m`wandb login --relogin`[0m to force relogin


## Dataset

In [4]:
class VideoDataset(Dataset):
    def __init__(self, path, transform):   
        self.path = path
        self.transform = transform

        if self.path[-1] == '/':
            self.path = self.path[:-1]
        self.frames_path = glob.glob(f'{path}/*/*/*/*.jpg')

    def __len__(self):
        return len(self.frames_path)

    def __getitem__(self, idx):
        img_path = self.frames_path[idx]
        label = img_path.split('/')[-4]

        image = Image.open(img_path)
        
        if self.transform:
            image = self.transform(image)

        return image, int(label)

In [5]:
h, w = 224, 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [6]:
train_transforms = transforms.Compose([
            transforms.Resize((h,w)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomAffine(degrees=0, translate=(0.1,0.1)),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            ])

In [7]:
train_path = '/Users/dmitry/Desktop/cv_itmo/kinetics_video_classification/data/train/'
val_path = '/Users/dmitry/Desktop/cv_itmo/kinetics_video_classification/data/val/'

train_ds = VideoDataset(path=train_path, transform=train_transforms)
val_ds = VideoDataset(path=val_path, transform=train_transforms)

In [8]:
val_ds.__getitem__(100)[0].shape

torch.Size([3, 224, 224])

In [9]:
train_dataloader = DataLoader(train_ds, batch_size=4, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=4, shuffle=False)

## Model

In [10]:
num_classes = 2

model = models.resnet18(weights = 'ResNet18_Weights.IMAGENET1K_V1')
in_features = model.fc.in_features 
model.fc = nn.Linear(in_features, num_classes)

In [11]:
x,y = next(iter(train_dataloader))
x.shape, y.shape

(torch.Size([4, 3, 224, 224]), torch.Size([4]))

In [12]:
with torch.no_grad():
    y_pred = model(x)

In [13]:
print(y_pred)
torch.argmax(y_pred,dim=1), y

tensor([[-0.5900,  0.2168],
        [ 0.3127,  0.1464],
        [-0.2468,  0.1933],
        [-0.1194, -0.8725]])


(tensor([1, 0, 1, 0]), tensor([0, 1, 1, 1]))

In [14]:
EPOCHS = 10
DEVICE = 'cpu'
EXP_PATH = 'experiments/exp2'
criterion = nn.CrossEntropyLoss(reduction="sum")
optimizer = torch.optim.Adam(model.parameters(), lr=3e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min',factor=0.5, patience=5,verbose=1)

In [15]:
def train_model():
    best_model = None
    best_acc = 0
    best_epoch = 0
    
    checkpoint_save_path = f'{EXP_PATH}/checkpoints'
    os.makedirs(checkpoint_save_path,exist_ok=True)
    
    for epoch in range(EPOCHS):

        model.train()
        train_loss = 0
        train_labels, train_preds = [], []

        for step, batch in tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f'Epoch: {epoch}'):
            x_train, y_train = batch
            x_train = x_train.to(DEVICE)
            y_train = y_train.to(DEVICE)

            optimizer.zero_grad()
            
            preds = model(x_train)

            loss = criterion(preds, y_train)

            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            preds = preds.argmax(dim=1)
            train_labels.extend(y_train.numpy())
            train_preds.extend(preds.numpy())

        train_loss = train_loss / len(train_dataloader)
        train_acc = accuracy_score(train_labels, train_preds)
        print("Train Loss: {0:.5f}".format(train_loss))
        print("Train Accuracy: {0:.5f}".format(train_acc))

        
        model.eval()
        val_labels, val_preds = [], []
        with torch.no_grad():
            for step, batch in tqdm(enumerate(val_dataloader), total=len(val_dataloader), desc=f'Epoch: {epoch}'):
                x_val, y_val = batch
                x_val = x_val.to(DEVICE)
                y_val = y_val.to(DEVICE)
                
                preds = model(x_val)

                preds = preds.argmax(dim=1)
                val_labels.extend(y_val.numpy())
                val_preds.extend(preds.numpy())

        val_acc = accuracy_score(val_labels, val_preds)
        print("Val Accuracy: {0:.5f}".format(val_acc))

        if val_acc > best_acc:
            best_model = copy.deepcopy(model)
            best_acc = val_acc
            best_epoch = epoch
            torch.save(best_model, f'{checkpoint_save_path}/checkpoint_{best_acc}_{best_epoch}ep.pth')
        
        scheduler.step(loss)

        wandb.log({"Train/Loss" : train_loss}, step=epoch)
        wandb.log({"Train/Accuracy" : train_acc}, step=epoch)
        wandb.log({"Val/Accuracy" : val_acc}, step=epoch)

In [15]:
train_model()

Epoch: 0: 100%|██████████| 4813/4813 [15:08<00:00,  5.30it/s]


Train Loss: 1.23348
Train Accuracy: 0.86822


Epoch: 0: 100%|██████████| 918/918 [01:02<00:00, 14.71it/s]


Val Accuracy: 0.67693


Epoch: 1: 100%|██████████| 4813/4813 [15:19<00:00,  5.23it/s]


Train Loss: 0.86243
Train Accuracy: 0.91196


Epoch: 1: 100%|██████████| 918/918 [00:59<00:00, 15.35it/s]


Val Accuracy: 0.66685


Epoch: 2: 100%|██████████| 4813/4813 [15:16<00:00,  5.25it/s]


Train Loss: 0.61193
Train Accuracy: 0.94208


Epoch: 2: 100%|██████████| 918/918 [01:00<00:00, 15.18it/s]


Val Accuracy: 0.69709


Epoch: 3: 100%|██████████| 4813/4813 [15:23<00:00,  5.21it/s]


Train Loss: 0.33966
Train Accuracy: 0.97081


Epoch: 3: 100%|██████████| 918/918 [00:59<00:00, 15.37it/s]


Val Accuracy: 0.62599


Epoch: 4: 100%|██████████| 4813/4813 [15:08<00:00,  5.30it/s]


Train Loss: 0.20606
Train Accuracy: 0.98343


Epoch: 4: 100%|██████████| 918/918 [01:00<00:00, 15.28it/s]


Val Accuracy: 0.69109


Epoch: 5: 100%|██████████| 4813/4813 [15:29<00:00,  5.18it/s]


Train Loss: 0.16574
Train Accuracy: 0.98624


Epoch: 5: 100%|██████████| 918/918 [01:01<00:00, 14.98it/s]


Val Accuracy: 0.65704


Epoch: 6: 100%|██████████| 4813/4813 [52:13<00:00,  1.54it/s]     


Train Loss: 0.12215
Train Accuracy: 0.98857


Epoch: 6: 100%|██████████| 918/918 [01:00<00:00, 15.14it/s]


Val Accuracy: 0.69327


Epoch: 7: 100%|██████████| 4813/4813 [16:08<00:00,  4.97it/s]


Train Loss: 0.11229
Train Accuracy: 0.99096


Epoch: 7: 100%|██████████| 918/918 [01:05<00:00, 14.06it/s]


Val Accuracy: 0.62926


Epoch: 8: 100%|██████████| 4813/4813 [16:55<00:00,  4.74it/s]


Train Loss: 0.09479
Train Accuracy: 0.99200


Epoch: 8: 100%|██████████| 918/918 [01:05<00:00, 14.11it/s]


Val Accuracy: 0.64696


Epoch: 9:  15%|█▍        | 715/4813 [02:31<14:18,  4.77it/s]wandb: Network error (ReadTimeout), entering retry loop.
Epoch: 9: 100%|██████████| 4813/4813 [17:11<00:00,  4.67it/s]


Train Loss: 0.08483
Train Accuracy: 0.99288


Epoch: 9: 100%|██████████| 918/918 [01:06<00:00, 13.71it/s]

Val Accuracy: 0.61536





## Testing

In [20]:
path = 'data/test/'
folders = glob.glob(f'{path}/*/*/*/*/')
len(folders)

34

In [21]:
h, w = 224, 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

test_transforms = transforms.Compose([
            transforms.Resize((h,w)),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
            ])

In [26]:
true_labels = []
predictions = []

for folder in tqdm(folders):
    images = [os.path.join(folder, i) for i in os.listdir(folder)]

    preds = []
    for img_path in images:
        img = Image.open(img_path)
        img_tensor = test_transforms(img)
        img_tensor = torch.unsqueeze(img_tensor,dim=0)
        
        with torch.no_grad():
            pred = model(img_tensor)
        pred_label = torch.argmax(pred, dim=1).item()

        preds.append(pred_label)
    true_labels.append(int(folder.split('/')[2]))
    predictions.append(np.mean(preds))

100%|██████████| 34/34 [01:34<00:00,  2.79s/it]


In [32]:
accuracy_score(true_labels, predictions)

0.8310669456066946