In [37]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms.v2 as transforms
from torchvision.models.video import r2plus1d_18, R2Plus1D_18_Weights
from os import path
import torchinfo

from train_model import TrainModel
from video_tensor_dataset import VideoTensorDataset

BATCH_SIZE = 3
NUM_EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device {DEVICE}")

Using device cuda


In [38]:
MY_NAME_IS = 'Nitzan'

DATA_FOLDER_DICT = {
    'Victor': ['E:\DeepFakeDetection\dfdc_train_all','E:\DeepFakeDetection\smalldata'],
    'Nitzan': ['D:\dfdc','D:\dfdc_small5'],
    'Netanel':['F:\input','F:\input']
  }

BIG_DATA_FOLDER, SMALL_DATA_FOLDER = DATA_FOLDER_DICT[MY_NAME_IS]

TRAIN_PARTS = [1, 2, 3, 4, 5, 6, 7, 8, 9]
VALIDATION_PARTS = [10, 11, 12]

In [39]:
model = r2plus1d_18(weights=R2Plus1D_18_Weights.DEFAULT)

In [40]:
# Replace head

model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 128, device=DEVICE),
    nn.ReLU(),
    nn.Linear(128, 32, device=DEVICE),
    nn.ReLU(),
    nn.Linear(32, 1, device=DEVICE),
)

# Freeze all layers except the new head
for key, params in model.named_parameters():
    if not (key.startswith('fc') or key.startswith('layer4')):
        params.requires_grad = False
    # print(key, params.requires_grad)

model.to(device=DEVICE)
torchinfo.summary(model, input_size=(BATCH_SIZE, 3, 16, 112, 112))



Layer (type:depth-idx)                        Output Shape              Param #
VideoResNet                                   [3, 1]                    --
├─R2Plus1dStem: 1-1                           [3, 64, 16, 56, 56]       --
│    └─Conv3d: 2-1                            [3, 45, 16, 56, 56]       (6,615)
│    └─BatchNorm3d: 2-2                       [3, 45, 16, 56, 56]       (90)
│    └─ReLU: 2-3                              [3, 45, 16, 56, 56]       --
│    └─Conv3d: 2-4                            [3, 64, 16, 56, 56]       (8,640)
│    └─BatchNorm3d: 2-5                       [3, 64, 16, 56, 56]       (128)
│    └─ReLU: 2-6                              [3, 64, 16, 56, 56]       --
├─Sequential: 1-2                             [3, 64, 16, 56, 56]       --
│    └─BasicBlock: 2-7                        [3, 64, 16, 56, 56]       --
│    │    └─Sequential: 3-1                   [3, 64, 16, 56, 56]       (111,008)
│    │    └─Sequential: 3-2                   [3, 64, 16, 56, 56]       (

In [41]:
# Print number of learnable parameters
num_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Number of learnable parameters: {num_learnable_params}")

Number of learnable parameters: 23565349


In [42]:
# Taken from https://github.com/pytorch/vision/tree/main/references/video_classification
mean = torch.tensor([0.43216, 0.394666, 0.37645], device=DEVICE)[None, None, None, :]
std = torch.tensor([0.22803, 0.22145, 0.216989], device=DEVICE)[None, None, None, :]

# Initialize train and validation datasets
# TODO videodataset root paths and transforms - this depends on Victor's offline preprocessing 
train_roots = [path.join(SMALL_DATA_FOLDER, str(i)) for i in TRAIN_PARTS]
train_transform = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32),
    transforms.Lambda(lambda video: torch.narrow(video, 0, 0, 95)),
    transforms.Lambda(lambda video: (video - mean) / std),
    # video is in shape (T, H, W, C), we need to permute it to (C, T, H, W)
    transforms.Lambda(lambda video: video.permute(3, 0, 1, 2)),
])

train_ds = VideoTensorDataset(
  original_data_path=BIG_DATA_FOLDER,
  device=DEVICE, 
  tensor_data_paths=train_roots, 
  transform=train_transform
)

validation_roots = [path.join(SMALL_DATA_FOLDER, str(i)) for i in VALIDATION_PARTS]
validation_transform = train_transform

validation_ds = VideoTensorDataset(
  original_data_path=BIG_DATA_FOLDER, 
  device=DEVICE,
  tensor_data_paths=validation_roots, 
  transform=validation_transform
)

# Initialize dataloaders

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
validation_dl = DataLoader(validation_ds, batch_size=BATCH_SIZE, shuffle=False)

In [43]:
batch, labels = next(iter(train_dl))
print(batch.shape, labels.shape)

torch.Size([3, 3, 95, 112, 112]) torch.Size([3])


In [44]:
from torchvision.ops import sigmoid_focal_loss
from torchmetrics.classification import BinaryMatthewsCorrCoef

# optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

optimizer = torch.optim.SGD([
    { 'params': model.fc.parameters(), 'lr': 8e-4 },
    { 'params': model.layer4.parameters(), 'lr': 1e-4 },
], lr=3e-4)

epoch_size = len(train_dl) // BATCH_SIZE
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, mode='triangular2', base_lr=3e-4, max_lr=8e-3, step_size_up=2*epoch_size)

bce = nn.BCEWithLogitsLoss(reduction='mean', pos_weight=torch.tensor([0.2], device=DEVICE))
def loss_fn(y_pred, y_true):
    # return sigmoid_focal_loss(y_pred, torch.unsqueeze(y_true, 1), gamma=2, alpha=0.161, reduction='mean')
    return bce(y_pred, torch.unsqueeze(y_true, 1))


bmcc = BinaryMatthewsCorrCoef().to(device=DEVICE)
def score_fn(y_pred, y_true): 
    return bmcc(y_pred, torch.unsqueeze(y_true, 1))

In [45]:
# Train the model!

model, train_loss, train_score, validation_loss, validation_score, _ = TrainModel(
  model, 
  train_dl, 
  validation_dl, 
  optimizer, 
  NUM_EPOCHS, 
  loss_fn, 
  score_fn,
)

Epoch    1 / 10 | Train Loss:  0.152 | Val Loss:  0.373 | Train Score: -0.088 | Val Score:  0.464 | Epoch Time: 620.15 | <-- Checkpoint! |
Epoch    2 / 10 | Train Loss:  0.143 | Val Loss:  0.416 | Train Score:  0.886 | Val Score:  0.464 | Epoch Time: 843.25 |
Val - Iteration: 273 / 318, loss: 0.05728759