In [1]:
## Environment Variables
from dotenv import load_dotenv
load_dotenv(".env");

## System Modules
from pathlib import Path

## General Purpose Libraries 
import torch
import matplotlib.pyplot as plt

## Paths and Directory Management
from etl.pathfactory import PathFactory
from etl.etl import reset_dir

## Datasets and Datamodules
from data.datamodules import ImageDatasetDataModule 
from datasets.inria import (
    InriaBase, InriaImageFolder, InriaStreaming
)

## Transforms
import torchvision.transforms.v2 as t

## Tasks
from training.tasks import SegmentationTask 

## Loggers
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import WandbLogger, CSVLogger
from lightning import seed_everything

##Trainers
from lightning import Trainer

#Types
from typing import Literal

In [2]:
# Train Val Test Splits
# 1. Random Split: Each location will be split based on the test_split and val_split parameters
# 2. Continental Split: Train on Europe, Test on NA or Vice Versa 
# 3. Cultural Split: Train on Developed Locations Like Paris, Chicago and Zurich and Test on Rawanda, Kenya and Rio  
# 4. Unsupervised Split: Unsupervised Training on Inria-Test and Finetune on Inria-Train (with varying fractions of training data)

In [3]:
LOGS_DIR = Path.cwd() / "logs"
CHECKPOINTS_DIR = LOGS_DIR / "checkpoints"
reset_dir(LOGS_DIR)
reset_dir(CHECKPOINTS_DIR)

def setup_checkpoint(
        checkpoint_dir: Path,
        monitor_metric: str,
        mode: Literal["min", "max"],
    ) -> ModelCheckpoint:
    return ModelCheckpoint(
        dirpath=checkpoint_dir,
        filename="{epoch}-{" + f"{monitor_metric}" + ":2d}",
        monitor=monitor_metric,
        mode=mode,
        save_top_k=1,
        save_last=True,
        save_on_train_epoch_end=True,
    )

def setup_logger(
        logs_dir: Path,
        name: str,
        version: int
    ):
    return CSVLogger(
        save_dir=logs_dir,
        name=name,
        version=version)

In [4]:
experiment = {
    "dataset_name": "inria",
    "task": "segmentation",
    "random_seed": 69,
    "tile_size": (512, 512),
    "tile_stride": (512, 512),

    "val_split": 0.2,
    "test_split": 0.2,
    "batch_size": 4,
    "grad_accum": 1,
    "num_workers": 4,

    "num_classes": 1,
    "loss": "cross_entropy",
    "optimizer": "adam",
    "learning_rate": 1e-5,

    "checkpoint_metric": "val_macro_accuracy",
    "checkpoint_mode": "max"
}
seed_everything(experiment["random_seed"]);

model_ckpt = setup_checkpoint(
    CHECKPOINTS_DIR, 
    experiment["checkpoint_metric"],
    experiment["checkpoint_mode"]
)

logger = setup_logger(
    LOGS_DIR,
    experiment["dataset_name"] + '-' + experiment["task"],
    version = 1 
)

paths = PathFactory(experiment["dataset_name"], experiment["task"])
dataset = InriaStreaming

image_transform = t.Compose([
    t.ToImage(),
    t.ToDtype(torch.float32, scale=True),
])

mask_transform = t.Compose([
    t.ToImage(),
    t.ToDtype(torch.float32, scale=True),
    #t.ToDtype(torch.int32, scale=False),
])

augmentations = t.Compose([
    t.Identity()
])

InriaBase.write_to_hdf(paths.path, **experiment)
#datamodule = ImageDatasetDataModule(
    #root = Path.home() / "dataset" / "urban-footprint",
    #is_remote = False,
    #is_streaming = False,
    #dataset_constructor = dataset, 
    #image_transform = image_transform,
    #target_transform = mask_transform,
    #common_transform = augmentations,
    #**experiment
#)

Seed set to 69


Local Dataset (.path): /home/sambhav/datasets/inria
Local Shards (.shards_path): /home/sambhav/datasets/inria
Remote Dataset (.url): /home/sambhav/datasets/inria
Local Shards (.shards_url): /home/sambhav/datasets/inria


TypeError: InriaBase.write_to_hdf() missing 5 required positional arguments: 'root', 'hdf5_file_name_with_suffix', 'val_split', 'test_split', and 'random_seed'

In [None]:
#datamodule.setup("fit")
#ds = datamodule.train_dataset
#dl = datamodule.train_dataloader()

In [None]:
## Models
#from torchgeo.models import FarSeg
#model = FarSeg(
    #backbone="resnet18",
    #classes=1,
    #backbone_pretrained=True
#)
from torchgeo.models import FCN
model = FCN(
    in_channels = 3,
    classes = 1,
    num_filters = 32 
)

In [None]:
last_ckpt_path = (CHECKPOINTS_DIR / "last.ckpt").as_posix() if (CHECKPOINTS_DIR / "last.ckpt").is_file() else None
trainer = Trainer(
    logger = logger,
    callbacks = model_ckpt,
    #fast_dev_run=True
    max_epochs=1
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(
    model = SegmentationTask(model, **experiment),
    datamodule = datamodule,
    ckpt_path = last_ckpt_path 
)

Because `predownload` was not specified, it will default to 8*batch_size if batch_size is not None, otherwise 64. Prior to Streaming v0.7.0, `predownload` defaulted to max(batch_size, 256 * batch_size // num_canonical_nodes).
Because `predownload` was not specified, it will default to 8*batch_size if batch_size is not None, otherwise 64. Prior to Streaming v0.7.0, `predownload` defaulted to max(batch_size, 256 * batch_size // num_canonical_nodes).
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name                  | Type                  | Params
----------------------------------------------------------------
0 | model                 | FCN                   | 37.9 K
1 | criterion             | CrossEntropyLoss      | 0     
2 | val_metrics           | MetricCollection      | 0     
3 | val_cohen_kappa       | BinaryCohenKappa      | 0     
4 | val_confusion_matrix  | BinaryConfusionMatrix | 0     
5 | test_metrics          | MetricCollection      | 0     
6 | test_cohen_kappa      |

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/sambhav/miniconda3/envs/dev/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/home/sambhav/miniconda3/envs/dev/lib/python3.10/site-packages/lightning/pytorch/utilities/data.py:121: Your `IterableDataset` has `__len__` defined. In combination with multi-process data loading (when num_workers > 1), `__len__` could be inaccurate if each worker is not configured independently to avoid having duplicate data.
Because `num_canonical_nodes` was not specified, and `shuffle_algo` is py1e, it will default to be equal to physical nodes. Prior to Streaming v0.7.0, `num_canonical_nodes` defaulted to 64 * physical nodes.
Because `shuffle_block_size` was not specified, it will default to max(4_000_000 // num_canonical_nodes, 1 << 18) if num_canonical_nodes is n

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

ValueError: Unknown format code 'd' for object of type 'float'