In [None]:
'''For testing our network'''
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from psutil import cpu_count
from torchvision import transforms
from torch.utils.data import DataLoader
from dataloader import PicklebotDataset, custom_collate
from mobilenet import MobileNetLarge2D, MobileNetSmall2D, MobileNetSmall3D,MobileNetLarge3D
from movinet import MoViNetA2
from helpers import calculate_accuracy, calculate_accuracy_bce

torch.manual_seed(1234)

def forward_pass(loader,model):
    model.eval()
    test_losses = [] 
    test_samples = 0
    test_correct = 0

    #calculate the loss
    for test_features,test_labels in tqdm(loader):
        test_features = test_features.to(torch.bfloat16).to(device)
        test_labels = (test_labels.long()).to(device) 
        test_outputs = model(test_features)
        # test_labels = test_labels.unsqueeze(1)
        test_loss = criterion(test_outputs,test_labels)
        test_losses.append(test_loss.item())
        
        test_correct += calculate_accuracy(test_outputs,test_labels)
        
        test_samples += len(test_labels)
        

    return test_losses, test_correct, test_samples


@torch.no_grad()
def estimate_loss():
    #evaluate the model
    balls_losses, balls_correct, balls_samples = forward_pass(ball_loader,model)
    strikes_losses,strikes_correct, strikes_samples = forward_pass(strike_loader,model)
    test_losses = balls_losses + strikes_losses
    avg_test_loss = np.mean(test_losses)
    print(balls_correct)
    print(strikes_correct)
    balls_accuracy = balls_correct / balls_samples 
    strikes_accuracy = strikes_correct / strikes_samples 
    return avg_test_loss, balls_accuracy, strikes_accuracy 

def state_dict_converter(state_dict):
    for key in list(state_dict.keys()):
        if key.startswith("_orig_mod."):
            new_key = key.replace("_orig_mod.", "")
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
    return state_dict


device = 'cuda' if torch.cuda.is_available() else 'cpu'
std = (0.2104, 0.1986, 0.1829)
mean = (0.3939, 0.3817, 0.3314)
batch_size = 4 
#annotations paths
strike_annotations_file = '/home/henry/Documents/PythonProjects/picklebotdataset/strike_test_labels.csv'
ball_annotations_file = '/home/henry/Documents/PythonProjects/picklebotdataset/ball_test_labels.csv'

#video paths
test_video_paths = '/home/henry/Documents/PythonProjects/picklebotdataset/test_all_together'

#establish our normalization using transforms, 
#note that we are doing this in our dataloader as opposed to in the training loop like with dali
transform = transforms.Normalize(mean,std)


ball_dataset = PicklebotDataset(ball_annotations_file,test_video_paths,transform=transform)
strike_dataset = PicklebotDataset(strike_annotations_file,test_video_paths,transform=transform)
ball_loader = DataLoader(ball_dataset, batch_size=batch_size,shuffle=True,collate_fn=custom_collate,num_workers=cpu_count())
strike_loader = DataLoader(strike_dataset, batch_size=batch_size,shuffle=True,collate_fn=custom_collate,num_workers=cpu_count())


model = MobileNetSmall3D(num_classes=2)
criterion = nn.CrossEntropyLoss()
model.to(device)
state_dict = torch.load(f'/home/henry/Documents/PythonProjects/Picklebot/checkpoints/MobileNetSmall3D31.pth')


model.load_state_dict(state_dict)
model.to(torch.bfloat16)
avg_test_loss,balls_accuracy,strikes_accuracy = estimate_loss()
print(f'Mobilenet Small test loss: {avg_test_loss:.4f}, ball test accuracy: {balls_accuracy * 100:.2f}% strike test accuracy: {strikes_accuracy * 100:.2f}%')

In [None]:
from mobilenet import MobileNetLarge3D
import torch
import os
from torch.cuda.amp import autocast
import torch.nn.functional as F
import torchvision

def state_dict_converter(state_dict):
    for key in list(state_dict.keys()):
        if key.startswith("_orig_mod."):
            new_key = key.replace("_orig_mod.", "")
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
    return state_dict

def transform(video, mean, std):
    return (video - mean) / std

std = (0.2104, 0.1986, 0.1829)
mean = (0.3939, 0.3817, 0.3314)
# Convert the mean and std to tensors
std = torch.tensor(std).view(1, 3, 1, 1, 1)
mean = torch.tensor(mean).view(1, 3, 1, 1, 1)

test_video_paths = '/home/henry/Documents/PythonProjects/picklebotdataset/test/balls'
state_dict = torch.load(f'/home/henry/Documents/PythonProjects/Picklebot/weights/MobileNetLarge.pth')
model = MobileNetLarge3D()
model.load_state_dict(state_dict_converter(state_dict))

model.eval()

video_paths = os.listdir(test_video_paths)

for video_path in video_paths:
    video = test_video_paths + '/' + video_path
    video = torchvision.io.read_video(video,pts_unit='sec')[0].permute(-1,0,1,2).unsqueeze(0)/255
    video = transform(video,mean,std)
    with autocast():
        out = model(video)
    out_prob = F.softmax(out,dim=1)
    print(out_prob,video_path)


In [42]:
'''Calculate the number of parameters in each model, for comparison purposes. 
   Note that movinet is about 2.8x larger than mobilenet small, and mobilenet large is about 2.5x larger than mobilenet small.'''

from movinet import MoViNetA2
from mobilenet import MobileNetLarge3D
movinet = MoViNetA2()
mobilenet_large = MobileNetLarge3D()
mobilenet_small = MobileNetSmall3D()

movinet_params = sum(p.numel() for p in movinet.parameters())
mobilenet_large_params = sum(p.numel() for p in mobilenet_large.parameters())
mobilenet_small_params = sum(p.numel() for p in mobilenet_small.parameters())
print(f"number of parameters in movinet: {movinet_params}")
print(f"number of parameters in mobilenet large: {mobilenet_large_params}")
print(f"number of parameters in mobilenet small: {mobilenet_small_params}")

number of parameters in movinet: 4660762
number of parameters in mobilenet large: 4191584
number of parameters in mobilenet small: 1672816


In [1]:
from torch.utils.data import DataLoader
from dataloader import custom_collate
from torch.utils.data import Dataset
from torchvision.io import read_video
import pandas as pd
import os
import torch
from ffmpegio.video import read
from torchvision import transforms

class PicklebotDataset(Dataset):
    def __init__(self, annotations_file, video_dir, transform=None,target_transform=None,dtype=torch.float32):
        self.video_labels = pd.read_csv(annotations_file,engine='pyarrow',header=None)
        self.video_dir = video_dir
        self.transform = transform
        self.target_transform = target_transform
        self.dtype = dtype

    def __len__(self):
        return len(self.video_labels)
        
    def __getitem__(self,idx):
        video_path = os.path.join(self.video_dir, self.video_labels.iloc[idx,0])
        video = ((read_video(video_path,output_format="TCHW",pts_unit='sec')[0]).to(self.dtype))/255
        label = self.video_labels.iloc[idx,1]
        if self.transform:
            video = self.transform(video)
        if self.target_transform:
            label = self.target_transform(label)
        return video, label

class PicklebotDatasetFFMPEGIO(Dataset):
    def __init__(self, annotations_file, video_dir, transform=None,target_transform=None,dtype=torch.float32):
        self.video_labels = pd.read_csv(annotations_file,engine='pyarrow',header=None)
        self.video_dir = video_dir
        self.transform = transform
        self.target_transform = target_transform
        self.dtype = dtype

    def __len__(self):
        return len(self.video_labels)
        
    def __getitem__(self,idx):
        video_path = os.path.join(self.video_dir, self.video_labels.iloc[idx,0])
        video = ((torch.tensor(read(video_path)[1])).to(self.dtype))/255
        video = video.permute(0,3,1,2)
        label = self.video_labels.iloc[idx,1]
        if self.transform:
            video = self.transform(video)
        if self.target_transform:
            label = self.target_transform(label)
        return video, label
    
std = (0.2104, 0.1986, 0.1829)
mean = (0.3939, 0.3817, 0.3314)
batch_size=8

dtype = torch.bfloat16
#annotations paths
train_annotations_file = '/home/henry/Documents/PythonProjects/picklebotdataset/train_labels.csv'
val_annotations_file = '/home/henry/Documents/PythonProjects/picklebotdataset/val_labels.csv'

#video paths
train_video_paths = '/home/henry/Documents/PythonProjects/picklebotdataset/train_all_together'
val_video_paths = '/home/henry/Documents/PythonProjects/picklebotdataset/val_all_together'

#establish our normalization using transforms, 
#note that we are doing this in our dataloader as opposed to in the training loop like with dali
transform = transforms.Normalize(mean,std)

#dataset     
train_dataset = PicklebotDataset(train_annotations_file,train_video_paths,transform=transform,dtype=dtype)
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True,collate_fn=custom_collate,num_workers=8)

ffmpegio_dataset = PicklebotDatasetFFMPEGIO(train_annotations_file,train_video_paths,transform=transform,dtype=dtype)
ffmpegio_loader = DataLoader(ffmpegio_dataset, batch_size=batch_size,shuffle=True,collate_fn=custom_collate,num_workers=8)

import time
start = time.time()
for i, (video, label) in enumerate(train_loader):
    pass

print(f"Time to load videos with torchvision.io: {time.time() - start}") #5625.347644805908 seconds

start = time.time()

for i, (video, label) in enumerate(ffmpegio_loader):
    pass

print(f"Time to load videos with ffmpegio: {time.time() - start}")


Time to load videos with torchvision.io: 5600.379940986633
Time to load videos with ffmpegio: 6023.769095897675
