In [1]:
from utils.video_dataset import VideoFrameDataset
from config.torch_config import multiple_transform, device
from config.dataset import get_dataset_path


In [2]:
NUM_SEGMENTS = 1
FRAMES_PER_SEGMENT = 5
BATCH_SIZE = 64

In [3]:
data_path, model_path = get_dataset_path()


In [4]:
dataset = VideoFrameDataset(
    root_path=data_path,
    transform=multiple_transform,
    num_segments=NUM_SEGMENTS,
    frames_per_segment=FRAMES_PER_SEGMENT,
)

classes = dataset.classes


In [5]:
from utils.loader import split_dataset


In [6]:
train_loader, test_loader = split_dataset(
    dataset, train_split=0.7, batch_size=BATCH_SIZE
)


In [7]:
from utils.balance import check_balance_status

In [8]:
# check_balance_status(test_loader, classes)
# check_balance_status(train_loader, classes)


In [9]:
from lib.simple_model import CNNModel, SimpleNet, CNN3D, HPNet
from lib.model import R3DClassifier


In [10]:
# model = R3DClassifier(len(classes), (BATCH_SIZE, FRAMES_PER_SEGMENT, 3, 224, 224), pretrained=True)
# model = CNNModel(len(classes))
# # model = SimpleNet(len(classes))
# model = R3DClassifier(
#     len(classes), (BATCH_SIZE, FRAMES_PER_SEGMENT, 3, 32, 32), pretrained=True
# )

# model = CNN3D(t_dim=FRAMES_PER_SEGMENT, img_x=224, img_y=224, num_classes=len(classes))


In [11]:
model = HPNet(
    num_classes=len(classes),
    batch_size=BATCH_SIZE,
    num_frames=FRAMES_PER_SEGMENT,
    image_size=224,
)
model

HPNet(
  (conv3d_1): Sequential(
    (0): Conv3d(5, 32, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
    (1): LeakyReLU(negative_slope=0.01)
  )
  (conv3d_2): Sequential(
    (0): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
    (1): LeakyReLU(negative_slope=0.01)
  )
  (batch): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (flat): Flatten(start_dim=2, end_dim=-1)
  (lin_1): Sequential(
    (0): Linear(in_features=3136, out_features=1568, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (lin_2): Sequential(
    (0): Linear(in_features=1568, out_features=784, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (lin_3): Linear(in_features=784, out_features=10, bias=True)
  (soft): Softmax(dim=1)
  (loss): NLLLoss()
  (drop): Dropout(p=0.15, inplace=False)
)

In [12]:
from lib.train import train_model

In [13]:
train_model(model, train_loader, device, learning_rate=0.001, num_epochs=10)


Training on device: cuda
Cost at epoch 1 is 6.44839
Cost at epoch 2 is 6.09808
Cost at epoch 3 is 5.75405
Cost at epoch 4 is 5.50406
Cost at epoch 5 is 5.48797
Cost at epoch 6 is 5.48796
Cost at epoch 7 is 5.48796
Cost at epoch 8 is 5.48796
Cost at epoch 9 is 5.48796


KeyboardInterrupt: 