In [None]:
# import all the modules

import torch
from torch import nn
from torch.utils import data

import torchvision
import torchvision.transforms as transforms
import cv2
import json

In [None]:
# preparing the datasets for use

# given a video id, return the greyscale version of the video
def processID(Id):
  video = cv2.VideoCapture(f'subset/data/{Id}.webm')
  
  greyScaleVideo = []

  # read each frame, convert the frame data to greyscale & append to the new array to be returned.
  newFrame, data = video.read()
  while newFrame:
    greyScaleVideo.append(cv2.cvtColor(data, cv2.COLOR_BGR2GRAY))
    newFrame, data = video.read()

  return greyScaleVideo

# training/validation data set json files (describes which videos belong to which category)
with open('subset/subset-train.json') as file:
  trainingSetInfo = json.load(file)

with open('subset/subset-validation.json') as file:
  validationSetInfo = json.load(file)


#custom dataset class. Allows us to load only the data to be used into memory, since the videos are otherwise way too large
class somethingDataset(data.Dataset):
  #setInfo for either the training or validation set. The object loaded in above, used to know which videos belong to each set
  def __init__(self, setInfo):
    self.setInfo = setInfo
  
  # how many elements to the dataset
  def __len__(self):
    return len(self.setInfo)

  # called whenever an item needs to be retrieved from the dataset. Current implementation is to
  # find the video at the given index, load that video/convert to greyscale, then return that + the class (which is the template string)
  def __getitem__(self, index):
    vid = self.setInfo[index]
    video = processID(vid['id'])
    label = vid['template']
    print(len(video))
    return video, label

# prepare the dataloaders that will work with the dataset class to get the specific data we want when we request it.
# the first argument is the dataset we want to use (training or validation). Shuffle is if we want to randomize the order
# of the dataset, and num_workers allows for some multithreading (should speed things up, but if we notice problems we can remove
# so it will be slower but all on 1 thread)
trainDataLoader = data.DataLoader(somethingDataset(trainingSetInfo), batch_size=64, shuffle=True, num_workers=3)
validationDataLoader = data.DataLoader(validationSetInfo, batch_size=64, shuffle=True, num_workers=3)

In [None]:
# Very simple convolutional neural network class.
class CNN(nn.Module):
  def __init__(self):
    super().__init__() # instantiate the pytorch module

    # Layer 1: a 3d convolution
    #                      greyscale (3 for rgb)   num filters
    self.conv1 = nn.Conv3d(in_channels=1,          out_channels=5, kernel_size=(5,5,5))
    self.relu1 = nn.ReLU()
    self.maxpool1 = nn.MaxPool3d(kernel_size=(2,2), stride=(2,2))

    # Layer 2: linear (hidden)
    # PROBABLE ERROR: NOT SURE WHAT SIZE THE in_features OF fc1 SHOULD BE. RIGHT NOW IT IS 800 AS JUST 'SOME BIG NUMBER', BUT PROBABLY NEED TO BE CHANGED. 
    # check the size of the output of flatten in the forward function to determine what this should be
    self.fc1 = nn.Linear(in_features=800, out_features=500)
    self.relu3 = nn.ReLU()

    # Layer 3: linear (output)
    #                                  have 3 classes
    self.fc2 = nn.Linear(in_features=500, out_features=3)
    self.logSoftmax = nn.LogSoftmax(dim=1)

  # forward function for the model. Training using backward propagation, with the backward function being handled by
  # pytorch so we only need to create the forward.
  def forward(self, x):
    # Layer 1: 3d convolution + maxpool
    x = self.conv1(x)
    x = self.relu1(x)
    x = self.maxpool1(x)

    # Layer 2: flatten the network, then use linear
    x = torch.flatten(x,1)
    x = self.fc1(x)
    x = self.relu3(x)

    # Layer 3: linear with softmax for our output
    x = self.fc2(x)
    output = self.logSoftmax(x)

    return output


In [None]:
# Running the model


device = 'cpu' # or 'cuda' in future
model = CNN().to(device) # instantiate the CNN

EPOCHS = 1
LEARNING_RATE = 0.001

loss = nn.CrossEntropyLoss() # loss function 

# optimizer function (what we are using to minimize the loss) (Adam is essentially a better version of gradient descent)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

train_loss_list = [] # we will want to keep a history of the loss values so we can plot it
for epoch in range(EPOCHS):
  train_loss = 0

  model.train()
  for (x,y) in trainDataLoader: # currently breaks here due to different size frames
    pass


