In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision.models.video import s3d, S3D_Weights

import numpy as np

# custom libraries
from utils import *
from celebdf2 import *
from main import train_s3d

In [None]:
# issues:
# Resolved 1. model is not training
# Resolved 2. when batch size more than 1 need to constict the number of frames
# Resolved 3. crop a small amount then resize
# 4. train using adversial data like black box attacks (random noise)
# Resolved 5. use interval for frames after clipping
# Resolved 6. add logging & save model
# 7. not whole dataset is used currently only testing samples
# Resolved 8. save logged results in pth file

# optical flow
# Resolved 1. resize to 256,256
# Resolved 2. limit frames to 10s
# Resolved 3. sample using interval 15 frames per 1s = 150 frames

In [6]:
# # Load the RAFT model with pre-trained weights
# model = s3d(weights=S3D_Weights.DEFAULT)
# model.eval()

S3D(
  (features): Sequential(
    (0): TemporalSeparableConv(
      (0): Conv3dNormActivation(
        (0): Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False)
        (1): BatchNorm3d(64, eps=0.001, momentum=0.001, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
      (1): Conv3dNormActivation(
        (0): Conv3d(64, 64, kernel_size=(7, 1, 1), stride=(2, 1, 1), padding=(3, 0, 0), bias=False)
        (1): BatchNorm3d(64, eps=0.001, momentum=0.001, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), dilation=1, ceil_mode=False)
    (2): Conv3dNormActivation(
      (0): Conv3d(64, 64, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
      (1): BatchNorm3d(64, eps=0.001, momentum=0.001, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (3): TemporalSeparableConv(
      (0): Conv3d

### Grid search sample size

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 3
epochs = 1
train_s3d(dataset_path,batch_size,device,epochs) # 5GB

Input Shape: torch.Size([3, 3, 150, 256, 256])
label Shape: torch.Size([3])
Epoch 1/1
138/138 - Time Taken: 472.832745552063 - train_loss: 0.8223 - train_accuracy: 54.1063% - val_loss: 0.6396 - val_accuracy: 69.2308%


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 3
epochs = 1
train_s3d(dataset_path,batch_size,device,epochs) # 4GB

Input Shape: torch.Size([3, 3, 100, 256, 256])
label Shape: torch.Size([3])
Epoch 1/1
138/138 - Time Taken: 334.89113879203796 - train_loss: 0.7019 - train_accuracy: 63.5266% - val_loss: 0.6262 - val_accuracy: 66.3462%


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 3
epochs = 1
train_s3d(dataset_path,batch_size,device,epochs) # 3GB

Input Shape: torch.Size([3, 3, 60, 256, 256])
label Shape: torch.Size([3])
Epoch 1/1
138/138 - Time Taken: 244.2112157344818 - train_loss: 0.7697 - train_accuracy: 61.8357% - val_loss: 0.8672 - val_accuracy: 62.5000%


### Search batch size

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 4 
epochs = 1
train_log, val_log = train_s3d(dataset_path,batch_size,device,epochs) # 6.5GB

Training size: 414
Validation size: 104
Input Shape: torch.Size([4, 3, 150, 256, 256])
label Shape: torch.Size([4])
Epoch 1/1
104/104 - Time Taken: 476.6902725696564 - train_loss: 0.6867 - train_accuracy: 60.3865% - val_loss: 0.6363 - val_accuracy: 70.1923%


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 5
epochs = 1
train_log, val_log = train_s3d(dataset_path,batch_size,device,epochs) # 7.5GB

Training size: 414
Validation size: 104
Input Shape: torch.Size([5, 3, 150, 256, 256])
label Shape: torch.Size([5])
Epoch 1/1
83/83 - Time Taken: 474.66077041625977 - train_loss: 0.7348 - train_accuracy: 53.8647% - val_loss: 0.6217 - val_accuracy: 72.1154%


## Final Parameters

### SGD with momentum

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 5
epochs = 10
train_log, val_log = train_s3d(dataset_path,batch_size,device,epochs) # 7.5GB

Training size: 6011
Validation size: 518
Input Shape: torch.Size([5, 3, 150, 256, 256])
label Shape: torch.Size([5])
Epoch 1/10
1203/1203 - Time Taken: 90.05221639076869 - train_loss: 0.3938 - train_accuracy: 87.8722% - val_loss: 0.7109 - val_accuracy: 65.6371%
Epoch 2/10
1203/1203 - Time Taken: 92.00469410419464 - train_loss: 0.3750 - train_accuracy: 87.9055% - val_loss: 0.7437 - val_accuracy: 66.2162%
Epoch 3/10
1203/1203 - Time Taken: 93.73733503023783 - train_loss: 0.3690 - train_accuracy: 87.8556% - val_loss: 0.7848 - val_accuracy: 65.8301%
Epoch 4/10
1203/1203 - Time Taken: 94.91303302844365 - train_loss: 0.3646 - train_accuracy: 87.7391% - val_loss: 0.8214 - val_accuracy: 65.8301%
Epoch 5/10
1203/1203 - Time Taken: 96.48807940483093 - train_loss: 0.3622 - train_accuracy: 87.7059% - val_loss: 0.8608 - val_accuracy: 65.4440%
Epoch 6/10


KeyboardInterrupt: 

In [3]:
path = "s3d_rgb.pth"

# replace final layer with new one with appropriate num of classes
# model = s3d(weights=S3D_Weights.DEFAULT)
# model.classifier[1] = nn.Conv3d(1024, 2, kernel_size=1, stride=1)

# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

checkpoint = torch.load(path, weights_only=True)
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
train_log = checkpoint['train']
val_log = checkpoint['val']

print(train_log)
print(val_log)

[(0.6998144680836115, 0.6280193236714976)]
[(0.7193568689482552, 0.4326923076923077)]


### Adam

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset_path = "data"
batch_size = 5
epochs = 1
train_log, val_log = train_s3d(dataset_path,batch_size,device,epochs) # 7.5GB

Training size: 414
Validation size: 104
Input Shape: torch.Size([5, 3, 150, 256, 256])
label Shape: torch.Size([5])
Epoch 1/1
83/83 - Time Taken: 7.870128365357717 - train_loss: 0.7115 - train_accuracy: 62.5604% - val_loss: 0.6736 - val_accuracy: 63.4615%
