In [9]:
%load_ext autoreload
%autoreload 2

In [1]:
from typing import Optional
import yaml
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from torchvision.transforms import v2 as T # type: ignore
from torchvision.models import resnet18, ResNet18_Weights
from geovision.config.basemodels import ExperimentConfig # noqa
from geovision.data.module import ImageDatasetDataModule
from geovision.training.module import ClassificationModule

transforms: dict[str, T.Transform | None] = {
    "image_transform": T.Compose([T.ToImage(), T.ToDtype(torch.float32)]),
    "target_transform": None,
    "common_transform": None,
}

config = ExperimentConfig.from_config_file("config.yaml", transforms)
datamodule = ImageDatasetDataModule(config)
classifier = ClassificationModule(
    config = config, 
    model = resnet18(num_classes = config.dataset.num_classes)
)

In [4]:
datamodule.setup("fit")
image, target, idx = datamodule.train_dataset[0]

In [40]:
from torch import float32
image.dtype == float32

True

In [33]:
# Testing Dataset, DataLoader and Datamodule; What can go wrong?
# Dataset must have a specific interface -> DatasetABC
# Dataset must have a specific default_behaviour -> PyTest
# Dataset must verify root, split, df and transforms -> Validator
# Dataset must have a specific df schema -> Pandera.DataFrameSchema
#   1. Make sure all required columns are present and the right dtype
#   2. Make sure the sampling algorithms work as expected (no overlaps in tabular sampling, etc)
# Dataset must have a specific output shape and type -> PyTest 
#   1. Make sure dataset can load and output every single image in the df 
#   2. Make sure all dataset samples are the same shape 
# Dataset: plot_one_sample() and plot_batch()
# DataLoader: 

ClassificationModule(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True

In [14]:
import torch
conf1 = torch.nn.Conv2d(3, 1, 3, 3, 0, 1, 1, bias = True, padding_mode = "zeros")
conf1.bias


Parameter containing:
tensor([0.1223], requires_grad=True)