06/10/2023

Uso questo script per ricreare i datasets cercando di strutturarli meglio
- dataset che prende movies e labels come inputs,
- dataset che prende dataset_path e movie ids come inputs,
- dataset che gestisce l'inference con o senza ground truth

In [18]:
# reload modules automatically
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import numpy as np
import torch
import logging
import math
import ntpath
import os

from typing import List, Dict, Union, Tuple, Any

import imageio
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from PIL.ExifTags import TAGS
from scipy.interpolate import interp1d
from scipy.ndimage import convolve
from torch import nn
from torch.utils.data import Dataset
from torchvision.transforms import GaussianBlur
from torch.utils.data import DataLoader

from config import config, TrainingConfig
from data.data_processing_tools import detect_spark_peaks
from utils.in_out_tools import load_annotations_ids, load_movies_ids
from utils.training_script_utils import init_model, init_dataset

In [20]:
# create a TrainingConfig object
params = TrainingConfig()
params.inference_dataset_size = "minimal"
params.inference_batch_size = 2

# create a sparkdataset
dataset = init_dataset(params=params, sample_ids=["05", "34"])

[19:06:35] [  INFO  ] [utils.training_script_utils] <132 > -- Samples in training dataset: 31


In [21]:
# Create a dataloader
dataset_loader = DataLoader(
    dataset,
    batch_size=params.inference_batch_size,
    shuffle=False,
    num_workers=params.num_workers,
    pin_memory=params.pin_memory,
)

In [22]:
# get item from dataloader
item = next(iter(dataset_loader))

In [23]:
item.keys()

dict_keys(['movie_id', 'original_duration', 'data', 'labels', 'sample_id'])

In [24]:
item["movie_id"], item["data"].shape, item["labels"].shape

(tensor([0, 0]), torch.Size([2, 256, 64, 512]), torch.Size([2, 256, 64, 512]))

In [25]:
# Create a U-Net
network = init_model(params=params)

In [26]:
params.device, params.inference

(device(type='cuda'), 'overlap')

TODO: riorganizzare le fcts do_inference e get_preds

In [35]:
def run_batch_in_unet(
    network: torch.nn.Module, batch_data: torch.Tensor, unet_steps: int
) -> torch.Tensor:
    """
    Run a batch of data through a U-Net network, where the input is first padded
    to suit U-Net's conditions on the input size, and the output is cropped to
    the original size afterwards.

    Args:
        network (torch.nn.Module): The U-Net network to run the data through.
        batch_data (torch.Tensor): The batch of data to run through the network.
        unet_steps (int): TODO

    Returns:
        torch.Tensor: The output of the network after running the batch of data through it.
    """
    # Calculate the required padding for both height and width:
    _, _, h, w = batch_data.shape

    h_pad = 2**unet_steps - h % 2**unet_steps if h % 2**unet_steps != 0 else 0
    w_pad = 2**unet_steps - w % 2**unet_steps if w % 2**unet_steps != 0 else 0

    # Pad the input tensor once with calculated padding values
    batch_data = F.pad(
        batch_data,
        (w_pad // 2, w_pad // 2 + w_pad % 2, h_pad // 2, h_pad // 2 + h_pad % 2),
    )

    batch_preds = network(batch_data[:, None])
    # b x 4 x d x 64 x 512 with 3D-UNet
    # b x 4 x 64 x 512 with LSTM-UNet -> not implemented yet

    # Crop the output tensor based on the padding
    crop_h_start = h_pad // 2
    crop_h_end = -(h_pad // 2 + h_pad % 2) if h_pad > 0 else None
    crop_w_start = w_pad // 2
    crop_w_end = -(w_pad // 2 + w_pad % 2) if w_pad > 0 else None
    batch_preds = batch_preds[..., crop_h_start:crop_h_end, crop_w_start:crop_w_end]

    return batch_preds

In [36]:
chunks_per_movie, predictions_per_movie, durations_per_movie = do_inference(
    network=network, params=params, test_dataloader=dataset_loader, device=params.device
)

batch data size before padding:  torch.Size([2, 256, 64, 512])
batch data dtype torch.float32


TypeError: conv3d() received an invalid combination of arguments - got (Tensor, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (Tensor, !Parameter!, !Parameter!, !tuple of (int, int, int)!, !tuple of (bool, bool, bool)!, !tuple of (bool, bool, bool)!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (Tensor, !Parameter!, !Parameter!, !tuple of (int, int, int)!, !tuple of (bool, bool, bool)!, !tuple of (bool, bool, bool)!, int)


In [31]:
@torch.no_grad()
def do_inference(
    network: nn.Module,
    params: TrainingConfig,
    test_dataloader: torch.utils.data.DataLoader,
    device: torch.device,
    compute_loss: bool = False,
    inference_types: List[str] = [],
    return_dict: bool = False,
) -> torch.Tensor:
    """
    Given a trained network and a dataloader, run the data through the network
    and perform inference.

    TODO
    """
    if len(inference_types) == 0:
        inference_types = [params.inference]

    network.to(device)
    network.eval()

    # Loop over batches and store results and counts of chunks for each movie
    predictions_per_movie = {}
    chunks_per_movie = {}
    durations_per_movie = {}

    for batch in test_dataloader:
        batch_movie_ids = batch["movie_id"]
        batch_data = batch["data"].to(device, non_blocking=True)

        # Run the network on the batch
        batch_preds = run_batch_in_unet(
            network=network, batch_data=batch_data, unet_steps=params.unet_steps
        ).cpu()
        # add each movie_id in the batch to the dict if not already present
        for movie_id in batch_movie_ids:
            if movie_id not in predictions_per_movie:
                predictions_per_movie[movie_id] = []
                chunks_per_movie[movie_id] = 0
                durations_per_movie[movie_id] = batch["original_duration"][movie_id]

            chunks_per_movie[movie_id] += 1
            predictions_per_movie[movie_id].append(batch[int(movie_id)])

    return chunks_per_movie, predictions_per_movie, durations_per_movie

??? maybe the padding of the input can be defined in the forward pass method of the network's definition ???