<a href="https://colab.research.google.com/github/cbun1123/map_fit/blob/main/Stage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!rm -r /content/map_fit
!git clone https://github.com/cbun1123/map_fit

!unzip /content/map_fit/nrevol/X_train_1.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_2.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_3.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_4.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_5.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_6.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_7.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_8.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_9.zip -d /content/map_fit/X_train
!unzip /content/map_fit/nrevol/X_train_10.zip -d /content/map_fit/X_train

!unzip /content/map_fit/nrevol/X_test_1.zip -d /content/map_fit/X_test
!unzip /content/map_fit/nrevol/X_test_2.zip -d /content/map_fit/X_test

!rm /content/map_fit/*.zip

In [None]:
%%capture
!pip install pytorch-lightning
!pip install tensorboardcolab
!pip install torchmetrics
%env PYTHONPATH=.:$PYTHONPATH

In [None]:
!nvidia-smi
n = 1

In [None]:
import numpy as np
import cv2
import glob
import math
import time
import os
import gc
import natsort

from google.colab import files
from google.colab.patches import cv2_imshow
from google.colab import drive

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.onnx
from torch.utils.data import Dataset,DataLoader
from torch.utils.tensorboard import SummaryWriter

import pytorch_lightning as pl

import torchmetrics
from torchmetrics.functional import *

import torchvision
from torchvision import transforms

In [None]:
%load_ext tensorboard
%tensorboard --logdir='/content/lightning_logs'

In [None]:
## Training data

filenames = glob.glob("/content/map_fit/X_train/*.tiff")
filenames = natsort.natsorted(filenames)
X_train = [cv2.imread(img,-1) for img in filenames]

y_train = np.load('/content/map_fit/nrevol/Y_train.npy')
print(f'Training set : {np.shape(X_train)}')

## Testing data

filenames = glob.glob("/content/map_fit/X_test/*.tiff")
filenames = natsort.natsorted(filenames)
X_test = [cv2.imread(img,-1) for img in filenames]

y_test = np.load('/content/map_fit/nrevol/Y_test.npy')
print(f'Testing set : {np.shape(X_test)}')

In [None]:
SIZE = 224

# Define mask
lin = np.linspace(-1,1,SIZE)
[Xm,Ym] = np.meshgrid(lin,lin)
idx = ((Xm**2+Ym**2)<1)
idx = np.stack([idx,idx,idx], axis=0)
idx_t = torch.from_numpy(idx)

# Define datesets
class imageDataset(Dataset):
    def __init__(self,X,y):
        'Initialization'
        self.X = np.array(X).astype(np.float32) # /(2**16)
        self.y = np.array(y).astype(np.float32) # /(2**16)
        self.n_samples = self.X.shape[0]
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        X = self.X[index]
        X = self.transform(X)
        X = torch.mul(X,idx_t)

        y = self.y[index]
        y = torch.from_numpy(y)
        sample = X,y
        return sample

    def __len__(self):
        return self.n_samples

# Init datasets
trainset = imageDataset(X_train,y_train)
testset = imageDataset(X_test,y_test)

# Garbage collection
del X_train
del y_train
del X_test
del y_test
del filenames
del Xm
del Ym
del lin
del idx
gc.collect()

88

Freezing weights:
```
for param in backbone.parameters():
    param.requires_grad = False
self.net = backbone
```

Replacing last layer:
```
backbone.fc = nn.Linear(in_features=backbone.fc.in_features, out_features=2, bias=True)
self.net = backbone
```

Adding new last layer:
```
added_layer = nn.Linear(in_features=backbone.fc.out_features, out_features=2, bias=True)
self.net = nn.Sequential(backbone,nn.ReLU(inplace=True),added_layer)
```

Dropout testing
~~~
def append_dropout(model, rate):
    for name, module in model.named_children():
        if len(list(module.children())) > 0:
            append_dropout(module,rate)
        if isinstance(module, nn.ReLU):
            module.register_forward_hook(lambda m, inp, out: F.dropout(out, p=rate, training=m.training))
            setattr(model, name, new)
~~~

In [None]:
# model
model_name = "shufflenet_v2_x1_0" # shufflenet_v2_x1_0

# hyperparameters
BATCH_SIZE = 512
num_epochs = 10
val_check_interval = 0.5
learning_rate = 0.001
betas = [0.9,0.999] # Gradient decay factor, Squared
eps = 1e-8
weight_decay = 1e-3 # L2 Regularization
step_size = 3 # Learn rate drop period
gamma = 0.8 # Learn rate drop factor
rate = 0.0 # Dropout rate, 0 for none

# Defining model
class LitModel(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # init a pretrained resnet
        backbone = torch.hub.load('pytorch/vision:v0.10.0', model_name, pretrained=True)
        backbone.fc = nn.Linear(in_features=backbone.fc.in_features, out_features=2, bias=True)
        self.net = backbone

        self.batch_size = BATCH_SIZE
        self.lr = learning_rate

    def train_dataloader(self):
        return DataLoader(dataset=trainset, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(dataset=testset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(dataset=testset, batch_size=self.batch_size)

    def forward(self,x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        input, labels = batch
        output = self.net(input)
        loss = mean_squared_log_error(output,labels) # mean_squared_log_error(output,labels) # F.mse_loss(output,labels)

        # Logging to TensorBoard by default
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        input, labels = batch
        output = self.net(input)
        loss = mean_squared_log_error(output,labels)

        # Logging to TensorBoard by default
        self.log("val_loss", loss, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        input, labels = batch
        output = self.net(input)
        loss = torch.abs(torch.div(output-labels,output)).mean()
        self.log("test_loss", loss)
        return loss

    def configure_optimizers(self):
        # optimizer = torch.optim.RMSprop(self.parameters(), lr=self.lr, alpha=0.86, eps=eps, weight_decay=weight_decay, momentum=0.9)
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr, betas=betas, eps=eps, weight_decay=weight_decay)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=step_size,gamma=gamma)
        return [optimizer], [lr_scheduler]

In [None]:
torch.cuda.empty_cache()
gc.collect()

# model init
model = LitModel() #.load_from_checkpoint("/content/checkpoints/last.ckpt")

# logger
logger = pl.loggers.TensorBoardLogger(save_dir=os.getcwd(), version=1, name="lightning_logs")

# callbacks
checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath="/content/checkpoints",
                                      save_last=True,
                                      save_top_k=1,
                                      monitor="val_loss",
                                      mode="min",
                                      filename=model_name+"-{epoch:02d}-{val_loss:.5f}"
                                      )

stagnate_callback = pl.callbacks.EarlyStopping(monitor="val_loss",
                                               mode="min",
                                               check_finite=True,
                                               patience=np.ceil(num_epochs/val_check_interval)
                                               )
swa_callback = pl.callbacks.StochasticWeightAveraging(swa_lrs=1e-4)

# Defining trainer
trainer = pl.Trainer(accelerator="gpu",             # CPU, GPU or TPU
                     val_check_interval=val_check_interval,
                     auto_lr_find=False,            # True or False   
                     auto_scale_batch_size=None,    # None or "binsearch"
                     deterministic=False,           # True or False
                     fast_dev_run=False,            # True or False or Epoch count
                     logger=logger,                 # logger or False
                     max_epochs=1000,               # -1 for Infinite
                     precision=32,                  # Default 32
                     profiler=None,                 # None, "simple" or "advanced"
                     enable_checkpointing=True,     # True or False
                     callbacks=[checkpoint_callback, stagnate_callback, swa_callback],
                     gradient_clip_val=0.25,
                     log_every_n_steps=1
                     #, overfit_batches=1, detect_anomaly=True
                     )                  

# Autotune hyperparameters
trainer.tune(model=model)

# Training
trainer.fit(model=model)

In [None]:
# Loading best checkpoint
t_loss = trainer.test()[0].get('test_loss')

# save for inference
filename = f'/content/{model_name}_{t_loss:.3f}_inf.onnx'

# Input to the model
x = torch.randn(1, 3, SIZE, SIZE, requires_grad=True)

# Export the model
model.to_onnx(filename, x, export_params=True)