<a href="https://colab.research.google.com/github/citruscaesar/building-footprint-extraction/blob/main/experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Download Dependencies

In [None]:
%%capture
!pip install wandb
!pip install lightning
!pip install torchmetrics
!pip install segmentation-models-pytorch
!pip install rasterio

In [None]:
!git clone https://github.com/citruscaesar/building-footprint-extraction.git
!mkdir -p /content/src
!cp -r /content/building-footprint-extraction/src/ /content/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir -p ./inria/train

In [None]:
!ls

In [None]:
%%capture
'''
#!cp -r /content/drive/MyDrive/datasets/inria/AerialImageDataset/train /content/inria/train
#!cp -r /content/drive/MyDrive/datasets/inria/AerialImageDataset/tiled/* /content/inria/tiled
#!mkdir -p inria/tiled
'''

!mkdir -p ./inria/train
!cp /content/drive/MyDrive/datasets/inria/archive/tiled.zip ./inria  
!unzip inria/tiled.zip -d ./inria/

In [None]:
!ls ./inria/

In [None]:
from pathlib import Path
tiled = {
    "drive": Path("/content/drive/MyDrive/datasets/inria/AerialImageDataset/tiled/512x512"),
    "runtime": Path("/content/inria/tiled/512x512")
}
train_scenes = tiled["runtime"] / "train" / "scenes"

In [None]:
assert (len(list(train_scenes.iterdir()))) == 18600

### Imports and Config

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio as rio

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import pytorch_lightning as pl
import torchmetrics 
from torchmetrics import MetricCollection
from torchmetrics.classification import BinaryJaccardIndex, BinaryConfusionMatrix, BinaryF1Score, BinaryCohenKappa

import wandb
from pytorch_lightning.loggers import WandbLogger, CSVLogger

import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn

from src.datamodule import InariaDataModule
from src.lightningmodule import SegmentationModule

In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Pytorch Version: {torch.__version__}")
print(f"Lightning Version: {pl.__version__}")
print(f"Pytorch Running on : {DEVICE}")
#print(torch.__config__.show())

Pytorch Version: 1.13.1
Lightning Version: 2.0.0
Pytorch Running on : cuda


In [4]:
DATA = Path.cwd().parent / "datasets" / "inria" / "train" 
#DATA = Path("/content/drive/MyDrive/datasets/inria/AerialImageDataset/train")
#DATA = Path("/media/sambhav/30AC4696AC46568E/datasets/inaria/train")

In [6]:
print(list(DATA.iterdir()))
print(list((DATA.parent / "tiled").iterdir()))

[WindowsPath('c:/Users/Sambhav Chandra/dev/datasets/inria/train/gt'), WindowsPath('c:/Users/Sambhav Chandra/dev/datasets/inria/train/images')]
[WindowsPath('c:/Users/Sambhav Chandra/dev/datasets/inria/tiled/512x512')]


### Experiments

In [23]:
#TODO: Create Config Object, Setup to Return Config Dict 
#config dict
host = "local"
assert host in ["local", "colab"]
config = {
    "colab": {
        "tile_shape": (512, 512),
        "batch_size": 32,
        "accumulate": 0,
        "num_workers": 2,
        "learning_rate": 3e-4,
        "num_epochs": 5,
        "seed": 69
    },

    "local": {
        "tile_shape": (512, 512),
        "batch_size": 4,
        "accumulate": 4,
        "num_workers": 4,
        "learning_rate": 3e-4,
        "num_epochs": 1,
        "seed": 69
    }
}
config = config[host]

In [27]:
GLOBAL_SEED = pl.seed_everything(seed = config["seed"], workers = True)

Global seed set to 69


In [28]:
unet = smp.Unet("resnet18", encoder_weights="imagenet", in_channels=3, classes = 1)
preprocess = get_preprocessing_fn("resnet18", "imagenet")

In [40]:
model = SegmentationModule(unet, 
                           preprocess, 
                           loss = "dice", 
                           learning_rate = config["learning_rate"], 
                           batch_size = config["batch_size"])

In [36]:
dm = InariaDataModule(DATA, 
                      tile_shape = config["tile_shape"], 
                      batch_size = config["batch_size"], 
                      num_workers = config["num_workers"])

In [31]:
'''
#os.environ["WANDB_NOTEBOOK_NAME"] = "experiments"
wandb.finish()
logs_dir = Path.cwd() / "logs"
logs_dir.mkdir(exist_ok = True)
wandblogger = WandbLogger(
    project = "building-segmentation-tests",
    save_dir = logs_dir 
) 
'''

'\n#os.environ["WANDB_NOTEBOOK_NAME"] = "experiments"\nwandb.finish()\nlogs_dir = Path.cwd() / "logs"\nlogs_dir.mkdir(exist_ok = True)\nwandblogger = WandbLogger(\n    project = "building-segmentation-tests",\n    save_dir = logs_dir \n) \n'

In [34]:
dev_trainer = pl.Trainer(
    accelerator = "auto",
    fast_dev_run = 1,
)

trainer = pl.Trainer(
    accelerator = "auto",
    #accumulate_grad_batches = config["accumulate"],
    #limit_train_batches = .25,
    max_epochs = config["num_epochs"],
    precision = 16,
    #logger = [wandblogger],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [35]:
dev_trainer.fit(model, datamodule = dm)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type             | Params
--------------------------------------------------
0 | model        | Unet             | 14.3 M
1 | loss         | DiceLoss         | 0     
2 | val_metrics  | MetricCollection | 0     
3 | test_metrics | MetricCollection | 0     
--------------------------------------------------
14.3 M    Trainable params
0         Non-trainable params
14.3 M    Total params
57.313    Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_steps=1` reached.


In [None]:
trainer.fit(model, datamodule = dm)

In [41]:
checkpoint = torch.load("./checkpoints/segmodel-checkpoint.ckpt")
model.load_state_dict(checkpoint["state_dict"])

<All keys matched successfully>

In [42]:
trainer.test(model, datamodule = dm)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

[{'test/loss': 0.1669280230998993,
  'Test BinaryF1Score': 0.8604145050048828,
  'Test BinaryJaccardIndex': 0.7550241351127625,
  'Test BinaryAccuracy': 0.9610534906387329}]

In [None]:
trainer.save_checkpoint("./segmodel-checkpoint.ckpt")

In [None]:
!cp ./segmodel-checkpoint.ckpt /content/drive/MyDrive/datasets/inria/

In [None]:
wandb.finish()

In [None]:
drive.flush_and_unmount()