In [12]:
%load_ext autoreload
%autoreload 2

import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
import numpy as np
import pandas
import matplotlib.pyplot as plt

import lightning as L
from lightning.pytorch.loggers import TensorBoardLogger

import torchmetrics
import webdataset as wds

import label_mapping

TORCH_ACCELERATOR = "cpu"

torch.cuda.get_device_name(0)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'NVIDIA GeForce GTX 1070'

In [28]:
%run datasets.ipynb

DATASET_ROOT = Path.home() / "datasets" / "im2gps" / "outputs"

# Load the s2cell-annotated dataset
annotated_df = pandas.read_pickle(DATASET_ROOT / "s2cell_2007" / "annotated.pkl")
mapping = label_mapping.LabelMapping.read_csv(DATASET_ROOT / "s2cell_2007" / "cells.csv")

mapping

<label_mapping.LabelMapping at 0x7f0b48888d50>

In [46]:

image_id_to_s2cell = {row.id: row.s2cell for row in annotated_df.itertuples()}

# total set has ~630k images
BATCH_SIZE = auto_batch_size()
print("Batch size:", BATCH_SIZE)

# Transform s2cells to labels, skipping examples without s2cell
def to_img_label(sample):
    img, meta = sample
    s2cell = image_id_to_s2cell.get(meta["id"])
    if s2cell is None:
        raise NotImplementedError("Skipping example without s2cell")
    label = mapping.get_label(s2cell)
    # TODO: this is where we transform the image
    return img, label

def urls_to_dataset(urls):
    return wds.WebDataset(urls, shardshuffle=True)\
        .shuffle(100)\
        .decode("torchrgb").to_tuple("jpg", "json")\
        .map(to_img_label, handler=wds.ignore_and_continue)\
        .batched(BATCH_SIZE)

train_dataset = urls_to_dataset(str(DATASET_ROOT / "wds" / "im2gps_2007_train_{000..031}.tar"))
val_dataset = urls_to_dataset(str(DATASET_ROOT / "wds" / "im2gps_2007_val_{000..007}.tar"))

# Visualize a few loaded samples
train_dataloader = wds.WebLoader(train_dataset, batch_size=None, num_workers=0)
for inputs, targets in train_dataloader:
    print(inputs.shape, targets.shape, targets)
    break

val_dataloader = wds.WebLoader(val_dataset, batch_size=None, num_workers=0)
for inputs, targets in val_dataloader:
    print(inputs.shape, targets.shape, targets)
    break

Unknown device: NVIDIA GeForce GTX 1070
Batch size: 1
torch.Size([1, 3, 768, 1024]) torch.Size([1]) tensor([1677])
torch.Size([1, 3, 451, 1024]) torch.Size([1]) tensor([1078])


In [39]:
# Define a LightningModule for the classifier
class NcalScalClassifierMnet3(L.LightningModule):
    def __init__(self, num_classes):
        super().__init__()

        mnet3 = models.mobilenet_v3_large(weights="IMAGENET1K_V2")

        self.features = mnet3.features
        self.avgpool = mnet3.avgpool
        hidden_size = 2048
        self.classifier = nn.Sequential(
            nn.Linear(mnet3.classifier[0].in_features, hidden_size),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(hidden_size, num_classes), # out is 1776
        )

        torch.nn.init.xavier_uniform_(self.classifier[0].weight)
        torch.nn.init.xavier_uniform_(self.classifier[3].weight)

        self.accuracy = torchmetrics.classification.Accuracy(task='multiclass', num_classes=2)

    def forward(self, x):
        with torch.no_grad():
            x = self.features(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = nn.CrossEntropyLoss()(z, y)
        self.log("train_loss", loss, prog_bar=True)

        preds = torch.argmax(z, dim=1)
        self.accuracy(preds, y)
        self.log('train_acc_step', self.accuracy, prog_bar=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        val_loss = nn.CrossEntropyLoss()(z, y)
        self.log("val_loss", val_loss, prog_bar=True)

        preds = torch.argmax(z, dim=1)
        self.accuracy(preds, y)
        self.log('val_acc', self.accuracy, on_step=False, on_epoch=True)
        return val_loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        test_loss = nn.CrossEntropyLoss()(z, y)
        self.log("test_loss", test_loss, prog_bar=True)

        preds = torch.argmax(z, dim=1)
        self.accuracy(preds, y)
        self.log('test_acc', self.accuracy, on_step=False, on_epoch=True)
        return test_loss

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)

In [48]:
mnet3_model = NcalScalClassifierMnet3(num_classes=len(mapping))

# Quick test run
L.Trainer(
    accelerator=TORCH_ACCELERATOR,
    fast_dev_run=True,
).fit(model=mnet3_model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name       | Type               | Params
--------------------------------------------------
0 | features   | Sequential         | 3.0 M 
1 | avgpool    | AdaptiveAvgPool2d  | 0     
2 | classifier | Sequential         | 5.6 M 
3 | accuracy   | MulticlassAccuracy | 0     
--------------------------------------------------
8.6 M     Trainable params
0         Non-trainable params
8.6 M     Total params
34.316    Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  1.41it/s, train_loss=7.580, train_acc_step=0.000, val_loss=7.550]

`Trainer.fit` stopped: `max_steps=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s, train_loss=7.580, train_acc_step=0.000, val_loss=7.550]


In [49]:
# Full training
trainer = L.Trainer(
    accelerator=TORCH_ACCELERATOR,
    callbacks=[
        L.pytorch.callbacks.ModelCheckpoint(monitor="val_loss", mode="min"),
        L.pytorch.callbacks.EarlyStopping(monitor="val_loss", mode="min"),
    ],
    max_epochs=1,
)
trainer.fit(model=mnet3_model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | features   | Sequential         | 3.0 M 
1 | avgpool    | AdaptiveAvgPool2d  | 0     
2 | classifier | Sequential         | 5.6 M 
3 | accuracy   | MulticlassAccuracy | 0     
--------------------------------------------------
8.6 M     Trainable params
0         Non-trainable params
8.6 M     Total params
34.316    Total estimated model params size (MB)


Epoch 0: : 1095it [04:32,  4.01it/s, v_num=2, train_loss=5.690, train_acc_step=0.000]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
