In [3]:
import torch

def get_dataset(path_train, path_val, batch_size, cuda=False):
    """
    Cette fonction charge le dataset
    """

    train_dataset = torch.load(path_train)
    val_dataset = torch.load(path_val)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, shuffle=True, pin_memory=cuda, num_workers=2)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                        batch_size=batch_size, shuffle=False, pin_memory=cuda, num_workers=2)

    return train_loader, val_loader

In [2]:
import time

from utils import *

class Metric: 
    def __init__(self):
        self.loss_train = []
        self.loss_test = []
        self.acc_train = []
        self.acc_test = []
        
def epoch(data, model, criterion, optimizer=None, cuda=False):
    """
    Make a pass (called epoch in English) on the data `data` with the
     model `model`. Evaluates `criterion` as loss.
     If `optimizer` is given, perform a training epoch using
     the given optimizer, otherwise, perform an evaluation epoch (no backward)
     of the model.
    """

    # indicates whether the model is in eval or train mode (some layers behave differently in train and eval)
    model.eval() if optimizer is None else model.train()

    # objects to store metric averages
    avg_loss = AverageMeter()
    avg_top1_acc = AverageMeter()
    avg_top5_acc = AverageMeter()
    avg_batch_time = AverageMeter()

    # we iterate on the batches
    tic = time.time()
    for i, (input, target) in enumerate(data):

        if cuda: # only with GPU, and not with CPU
            input = input.cuda()
            target = target.cuda()

        # forward
        output = model(input)
        loss = criterion(output, target)

        # backward if we are training
        if optimizer:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # compute metrics
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        batch_time = time.time() - tic
        tic = time.time()

        # update
        avg_loss.update(loss.item())
        avg_top1_acc.update(prec1.item())
        avg_top5_acc.update(prec5.item())
        avg_batch_time.update(batch_time)
        if optimizer:
            loss_plot.update(avg_loss.val)

    return avg_top1_acc, avg_top5_acc, avg_loss

In [3]:
import torch.nn as nn

def main(train_path, val_path, batch_size=128, lr=0.2, epochs=35, device='cpu', patience=5):
    best_dropout_rate = None
    best_validation_accuracy = 0.0
    best_metrics = None

    criterion = nn.CrossEntropyLoss()
    
    global loss_plot
    loss_plot = TrainLossPlot()

    # define model, loss, optim
    model = CustomNet()
    optimizer = torch.optim.Adam(model.parameters(), lr)

    # Set the learning rate scheduler
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs//4, eta_min=1/(10**patience)) 

    if device != 'cpu':
        if device == 'cuda':
            # Set the cudnn.benchmark flag to True for cudnn to use the built-in autotuner to find the best algorithm for the hardware
            torch.backends.cudnn.benchmark = True

        # Set the pin_memory flag to True for DataLoader to use pinned (page-locked) memory, which speeds up data transfer between CPU and GPU
        pin_memory = True

        model = model.to(device)
        criterion = criterion.to(device)

    # Get the data
    train, test = get_dataset(train_path, val_path, batch_size, device)

    # Initialize a list to store metrics
    listm = []

    best_val_loss = float('inf')
    epochs_without_improvement = 0

    for i in range(epochs):
        m = Metric()

        # Train phase
        top1_acc, avg_top5_acc, loss = epoch(train, model, criterion, optimizer, device)
        # Update learning rate
        scheduler.step()

        # Test phase
        top1_acc_test, top5_acc_test, loss_test = epoch(test, model, criterion, device=device)

        m.acc_train = top1_acc.avg
        m.acc_test = top1_acc_test.avg
        m.loss_train = loss.avg
        m.loss_test = loss_test.avg
        listm.append(m)
        print(f"Epoch {i+1} - acc train={m.acc_train:.2f}%, acc test={m.acc_test:.2f}%, loss train={m.loss_train:.3f}, loss test={m.loss_test:.3f}")

        # Early stopping
        if m.loss_test < best_val_loss:
            best_val_loss = m.loss_test
            epochs_without_improvement = 0
            # Save the best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print("Early stopping triggered")
                break

    loss_train= [listm[i].loss_train for i in range(len(listm))]
    loss_test= [listm[i].loss_test for i in range(len(listm))]
    acc_test= [listm[i].acc_test for i in range(len(listm))]
    acc_train= [listm[i].acc_train for i in range(len(listm))]

    x = range(1,len(listm)+1)
    plt.figure(figsize=(5, 5))
    plt.plot(x,loss_train,label='loss train')
    plt.plot(x,loss_test,label='loss test')
    plt.legend(loc="upper right")
    plt.show()
    plt.figure(figsize=(10, 10))
    plt.plot(x,acc_train,label='acc train')
    plt.plot(x,acc_test,label='acc test')
    plt.legend(loc="upper left")
    plt.show()

    return

In [4]:
# Define the model
# A simple 3D convolutional network with 6 convolutional layers and 2 fully connected layers
# The network is defined in the __init__ method
# The forward method defines the forward pass of the network
# The _make_layer method is used to create the convolutional layers
# The _initialize_weights method is used to initialize the weights of the network

class CustomNet(nn.Module): # inherit from nn.Module
    def __init__(self, num_classes=101): # constructor
        super(CustomNet, self).__init__()   # call the constructor of the parent class
        self.conv1 = nn.Conv3d(3, 64, kernel_size=3, stride=1, padding=1) # 3 input channels, 64 output channels, 3x3 kernel, stride 1, padding 1
        self.bn1 = nn.BatchNorm3d(64) # batch normalization
        self.relu = nn.ReLU(inplace=True) # ReLU activation
        self.maxpool = nn.MaxPool3d(kernel_size=2, stride=2) # max pooling
        self.layer1 = self._make_layer(64, 64, 2) # 2 layers of 64 filters
        self.layer2 = self._make_layer(64, 128, 2, stride=2)    # 2 layers of 128 filters, stride 2
        self.layer3 = self._make_layer(128, 256, 2, stride=2)  # 2 layers of 256 filters, stride 2
        self.layer4 = self._make_layer(256, 512, 2, stride=2) # 2 layers of 512 filters, stride 2
        self.avgpool = nn.AvgPool3d((4, 7, 7), stride=1) # average pooling
        self.fc = nn.Linear(512 * 1 * 1 * 1, num_classes) # fully connected layer
        self._initialize_weights() # initialize the weights

    def _make_layer(self, inplanes, planes, blocks, stride=1): # create the convolutional layers
        downsample = None # downsample is used to downsample the input to match the output of the convolutional layer
        if stride != 1 or inplanes != planes: # if the stride is not 1 or the number of input channels is not equal to the number of output channels
            downsample = nn.Sequential( # downsample the input
                nn.Conv3d(inplanes, planes, kernel_size=1, stride=stride, bias=False), # 1x1 convolution
                nn.BatchNorm3d(planes), # batch normalization
            ) 

        layers = [] # create a list of layers
        layers.append(BasicBlock(inplanes, planes, stride, downsample)) # append the first layer
        for i in range(1, blocks): # append the remaining layers
            layers.append(BasicBlock(planes, planes)) # append the layer

        return nn.Sequential(*layers) # return the layers
    
    def _initialize_weights(self): # initialize the weights
        for m in self.modules(): # loop over the layers
            if isinstance(m, nn.Conv3d): # if the layer is a convolutional layer
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # initialize the weights
                if m.bias is not None: # if the layer has a bias
                    nn.init.constant_(m.bias, 0) # initialize the bias to 0
            elif isinstance(m, nn.BatchNorm3d): # if the layer is a batch normalization layer
                nn.init.constant_(m.weight, 1) # initialize the weights to 1
                nn.init.constant_(m.bias, 0) # initialize the bias to 0
            elif isinstance(m, nn.Linear): # if the layer is a fully connected layer
                nn.init.normal_(m.weight, 0, 0.01) # initialize the weights to 0
                nn.init.constant_(m.bias, 0)    # initialize the bias to 0

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x
    

# Define the basic block
# The basic block is used to create the convolutional layers
# The forward method defines the forward pass of the block

class BasicBlock(nn.Module):
    expansion = 1 # expansion is used to downsample the input to match the output of the convolutional layer

    def __init__(self, inplanes, planes, stride=1, downsample=None): # constructor
        super(BasicBlock, self).__init__() # call the constructor of the parent class
        self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) # 3x3 convolution
        self.bn1 = nn.BatchNorm3d(planes) # batch normalization
        self.relu = nn.ReLU(inplace=True) # ReLU activation
        self.conv2 = nn.Conv3d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) # 3x3 convolution
        self.bn2 = nn.BatchNorm3d(planes) # batch normalization
        self.downsample = downsample # downsample the input
        self.stride = stride # stride

    def forward(self, x): 
        residual = x # residual is the input

        out = self.conv1(x) # apply the first convolution
        out = self.bn1(out) # apply batch normalization
        out = self.relu(out) # apply ReLU activation

        out = self.conv2(out) # apply the second convolution
        out = self.bn2(out) # apply batch normalization

        if self.downsample is not None: # if the input needs to be downsampled
            residual = self.downsample(x) # downsample the input

        out += residual # add the input to the output
        out = self.relu(out) # apply ReLU activation

        return out

In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

for dirname, _, filenames in os.walk('./data/preparedMac'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

./data/preparedMac/dataset_frames16_fold1_resize120_coljit0.1/trainset.pt
./data/preparedMac/dataset_frames16_fold1_resize120_coljit0.1/valset.pt
./data/preparedMac/dataset_frames16_fold2_resize120_coljit0.1/trainset.pt
./data/preparedMac/dataset_frames16_fold2_resize120_coljit0.1/valset.pt
./data/preparedMac/dataset_frames16_fold3_resize120_coljit0.1/trainset.pt
./data/preparedMac/dataset_frames16_fold3_resize120_coljit0.1/valset.pt


In [6]:
train_path = "./data/preparedMac/dataset_frames16_fold1_resize120_coljit0.1/trainset.pt"
val_path = "./data/preparedMac/dataset_frames16_fold1_resize120_coljit0.1/valset.pt"

metrics = main(train_path, val_path, batch_size=3, epochs=50, device='mps')



TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/datasets/ucf101.py", line 128, in __getitem__
    video = self.transform(video)
            ^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/_container.py", line 51, in forward
    sample = transform(sample)
             ^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/_transform.py", line 44, in forward
    flat_outputs = [
                   ^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/_transform.py", line 45, in <listcomp>
    self._transform(inpt, params) if needs_transform else inpt
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/_misc.py", line 175, in _transform
    return F.normalize(inpt, mean=self.mean, std=self.std, inplace=self.inplace)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/functional/_misc.py", line 66, in normalize
    return inpt.normalize(mean=mean, std=std, inplace=inplace)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/datapoints/_image.py", line 253, in normalize
    output = self._F.normalize_image_tensor(self.as_subclass(torch.Tensor), mean=mean, std=std, inplace=inplace)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/theodoredwernicki/anaconda3/envs/AI-computervision/lib/python3.11/site-packages/torchvision/transforms/v2/functional/_misc.py", line 20, in normalize_image_tensor
    raise TypeError(f"Input tensor should be a float tensor. Got {image.dtype}.")
TypeError: Input tensor should be a float tensor. Got torch.uint8.


<Figure size 640x480 with 0 Axes>