In [1]:
import gc
import torch.utils.data
from utils.data import get_data_split
from utils.models import create_video_ResNet
from utils.trainner import train_silence
from utils.model_performance import plot_losses, get_predictions, calculate_metrics

# Config data

In [2]:
EPOCHS = 100
BATCH_SIZE = 4
TRANSFER_LEARNING_RATE = 3e-3
DEVICE = torch.device("cuda" if torch.cuda else "cpu")
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load data

In [3]:
train_dataset, val_dataset, test_dataset, label_dict = get_data_split(base_path="../../../datasets/ucf-101-Sampled-10-clases", batch_size=BATCH_SIZE, seq_lenght=64, num_workers=0)

In [4]:
label_dict

{0: 'BalanceBeam',
 1: 'BaseballPitch',
 2: 'Bowling',
 3: 'Basketball',
 4: 'BandMarching',
 5: 'BabyCrawling',
 6: 'ApplyEyeMakeup',
 7: 'BodyWeightSquats',
 8: 'Archery',
 9: 'ApplyLipstick'}

In [5]:
next(iter(train_dataset))[0].shape



torch.Size([4, 64, 3, 240, 320])

# Transfer Learning

In [None]:
model, preprocess = create_video_ResNet(n_classes=len(label_dict), device=DEVICE)

model.requires_grad = False
model.fc.requires_grad = True

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=TRANSFER_LEARNING_RATE)

train_loss, val_loss = train_silence(model=model, train_dataset=train_dataset, val_dataset=val_dataset, preprocess=preprocess, device=DEVICE, criterion=criterion, optimizer=optimizer, epochs=EPOCHS, num_classes=len(label_dict))

y_pred, y = get_predictions(model=model, dataloader=test_dataset, preprocess=preprocess, device=DEVICE)

plot_losses(train_loss, val_loss)
calculate_metrics(y, y_pred, labels=[v for k,v in label_dict.items()])


  0%|          | 0/100 [00:00<?, ?it/s]

