## Load libraries

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os,sys
import re
import math
from datetime import datetime
import time
sys.dont_write_bytecode = True

In [None]:
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.transform import resize

from pathlib import Path
from typing import List, Set, Dict, Tuple, Optional, Iterable, Mapping, Union, Callable

from pprint import pprint
from ipdb import set_trace as brpt

In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from  torch.linalg import norm as tnorm
from torch.utils.data import Dataset, DataLoader, random_split

from torchvision import datasets, transforms

import pytorch_lightning as pl
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning import loggers as pl_loggers
# Select Visible GPU
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

## Set Path 
1. Add project root and src folders to `sys.path`
2. Set DATA_ROOT to `maptile_v2` folder

In [None]:
this_nb_path = Path(os.getcwd())
ROOT = this_nb_path.parent
SRC = ROOT/'src'
DATA_ROOT = Path("/data/hayley-old/maptiles_v2/")
paths2add = [this_nb_path, ROOT]

print("Project root: ", str(ROOT))
print('Src folder: ', str(SRC))
print("This nb path: ", str(this_nb_path))


for p in paths2add:
    if str(p) not in sys.path:
        sys.path.insert(0, str(p))
        print(f"\n{str(p)} added to the path.")
        
# print(sys.path)



In [None]:
# from src.data.datasets.maptiles import Maptiles, MapStyles
# from src.data.datamodules.mnist_datamodule import MNISTDataModule
# from src.data.datamodules.maptiles_datamodule import MaptilesDataModule
from src.data.datamodules.multisource_maptiles_datamodule import MultiMaptilesDataModule


# from src.models.plmodules.three_fcs import ThreeFCs
# from src.models.plmodules.vanilla_vae import VanillaVAE
# from src.models.plmodules.beta_vae import BetaVAE
from src.models.plmodules.bilatent_vae import BiVAE

from src.visualize.utils import show_timgs
from src.utils.misc import info, get_next_version_path, n_iter_per_epoch


## Start experiment 
Given a maptile, predict its style as one of OSM, CartoVoyager

In [None]:
# # Instantiate MNIST Datamodule
# in_shape = (1,32,32)
# batch_size = 32
# dm = MNISTDataModule(data_root=ROOT/'data', 
#                        in_shape=in_shape,
#                       batch_size=batch_size)
# dm.setup('fit')
# print("DM: ", dm.name)

In [None]:
# Instantiate Multisource Maptiles DataModule for OSMnxRoads
data_root = Path("/data/hayley-old/osmnx_data/images")

# Collect all images from these cities
all_cities = sorted(
    ['la', 'charlotte', 'vegas', 'boston', 'paris', \
     'amsterdam', 'shanghai', 'seoul', 'chicago', 'manhattan', \
     'berlin', 'montreal', 'rome']#'london'
) 

# Style parameters
edge_color = 'cyan'
lw_factor = 0.5
bgcolors = ['r','g','b'] # ['k', 'r', 'g', 'b', 'y']
    
cities = ['paris'] #all_cities # ['berlin', 'rome', 'la', 'amsterdam', 'seoul'] #['paris']
styles =[f'OSMnxR-{bgcolor}-{edge_color}-{lw_factor}' for bgcolor in bgcolors]#['StamenTonerBackground','OSMDefault', 'CartoVoyagerNoLabels']#'StamenWatercolor']#, 'StamenTonerLines']
zooms = ['14']
in_shape = (3, 64, 64)
batch_size = 32
print('cities: ', cities)
print('styes: ', styles)

dm = MultiMaptilesDataModule(
    data_root=data_root,
    cities=cities,
    styles=styles,
    zooms=zooms,
    in_shape=in_shape,
    batch_size=batch_size,
)
dm.setup('fit')

In [None]:
# # Pickle this datamodule
# import joblib
# nb_name = '16-a'
# joblib.dump(dm, ROOT/'cache'/f'dm_{nb_name}.pkl')

In [None]:
print('train size: ', len(dm.train_ds))
# show a batch
dl = dm.train_dataloader()
batch = next(iter(dl))
x, label_c, label_s = dm.unpack(batch)
info(x)
show_timgs(x, titles=label_s.tolist(), cmap='gray' if in_shape[0]==1 else None)
print(label_c)
print(label_s)

In [36]:
# Instantiate the pl Module
from src.models.plmodules.bilatent_vae import BiVAE

# betas = [0.1 * 3**i for i in range(10)]
# for kld_weight in [1.0]
n_styles = len(styles)
latent_dim = 20
hidden_dims = [32, 64, 128, 256, 512]
adversary_dims = [32,32,32]
act_fn = nn.LeakyReLU()
learning_rate = 1e-3

is_contrasive = True
kld_weight = 1024.0 #1.0 # vae_loss = recon_loss + kld_weight * kld_weight; betas[0];
adv_loss_weight = 45.#15. # loss = vae_loss + adv_loss_weight * adv_loss

# enc_type = 'resnet'
enc_type = 'conv'

# dec_type = 'conv'
dec_type = 'resnet'

if enc_type == 'resnet':
    hidden_dims = [32, 32, 64, 128, 256]

model = BiVAE(
    in_shape=in_shape, 
    n_styles=n_styles,
    latent_dim=latent_dim,
    hidden_dims=hidden_dims,
    adversary_dims=adversary_dims,
    learning_rate=learning_rate,
    act_fn=act_fn,
    is_contrasive=is_contrasive,
    kld_weight=kld_weight,
    adv_loss_weight=adv_loss_weight,
    enc_type=enc_type,
    dec_type=dec_type,
)


In [37]:
model.name

'BiVAE-C-conv-resnet-1024.0-45.0'

In [38]:
# Instantiate a PL `Trainer` object
# Start the experiment
max_epochs = 500
exp_name = f'{model.name}_{dm.name}'
tb_logger = pl_loggers.TensorBoardLogger(save_dir=f'{ROOT}/temp-logs', 
                                         name=exp_name,
                                         log_graph=False,
                                        default_hp_metric=False)
print("Log dir: ", tb_logger.log_dir)

log_dir = Path(tb_logger.log_dir)
if not log_dir.exists():
    log_dir.mkdir(parents=True)
    print("\nCreated: ", log_dir)
    

# Log computational graph
# model_wrapper = ModelWrapper(model)
# tb_logger.experiment.add_graph(model_wrapper, model.example_input_array.to(model.device))
# tb_logger.log_graph(model)

trainer_config = {
    'gpus':1,
    'max_epochs': max_epochs,
    'progress_bar_refresh_rate':0,
    'terminate_on_nan':True,
    'check_val_every_n_epoch':10,
    'logger':tb_logger,
#     'callbacks':callbacks,
}


Missing logger folder: /data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1024.0-45.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14


Log dir:  /data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1024.0-45.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14/version_0

Created:  /data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1024.0-45.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14/version_0


### Training is logged to (on May 18, 2021)

- /data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1.0-15.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14/version_0


- Log dir:  /data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1.0-15.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14/version_1

In [39]:
# trainer = pl.Trainer(fast_dev_run=3)
trainer = pl.Trainer(**trainer_config)
# trainer.tune(model=model, datamodule=dm)
print("\nMetrics: ", trainer.callback_metrics.keys())# todo: delete

# Fit model
trainer.fit(model, dm)
print(f"Finished at ep {trainer.current_epoch, trainer.batch_idx}")

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]

   | Name               | Type          | Params
------------------------------------------------------
0  | act_fn             | LeakyReLU     | 0     
1  | out_fn             | Tanh          | 0     
2  | encoder            | Sequential    | 1.6 M 
3  | fc_flatten2qparams | Linear        | 82.0 K
4  | fc_latent2flatten  | Linear        | 43.0 K
5  | decoder            | ResNetDecoder | 6.3 M 
6  | out_layer          | Sequential    | 84    
7  | adversary          | Sequential    | 2.6 K 
8  | train_style_acc    | Accuracy      | 0     
9  | val_style_acc      | Accuracy      | 0     
10 | test_style_acc     | Accuracy      | 0     
------------------------------------------------------
8.0 M     Trainable params
0         Non-trainable params
8.0 M     Total params



Metrics:  dict_keys([])




Ep: 0, batch: 0
Ep: 0, batch: 0
Ep: 10, batch: 0
Ep: 20, batch: 0
Ep: 30, batch: 0
Ep: 40, batch: 0
Epoch    41: reducing learning rate of group 0 to 1.0000e-04.
Ep: 50, batch: 0
Ep: 60, batch: 0
Ep: 70, batch: 0
Ep: 80, batch: 0
Ep: 90, batch: 0
Epoch    91: reducing learning rate of group 0 to 1.0000e-05.
Ep: 100, batch: 0
Ep: 110, batch: 0
Ep: 120, batch: 0
Ep: 130, batch: 0
Epoch   131: reducing learning rate of group 0 to 1.0000e-06.
Ep: 140, batch: 0
Epoch   142: reducing learning rate of group 0 to 1.0000e-07.
Ep: 150, batch: 0
Epoch   153: reducing learning rate of group 0 to 1.0000e-08.
Ep: 160, batch: 0
Ep: 170, batch: 0
Ep: 180, batch: 0
Ep: 190, batch: 0
Ep: 200, batch: 0
Ep: 210, batch: 0
Ep: 220, batch: 0
Ep: 230, batch: 0
Ep: 240, batch: 0
Ep: 250, batch: 0
Ep: 260, batch: 0
Ep: 270, batch: 0
Ep: 280, batch: 0
Ep: 290, batch: 0
Ep: 300, batch: 0
Ep: 310, batch: 0
Ep: 320, batch: 0
Ep: 330, batch: 0
Ep: 340, batch: 0
Ep: 350, batch: 0
Ep: 360, batch: 0
Ep: 370, batch: 0
E

In [40]:
model.current_epoch, model.logger.log_dir

(499,
 '/data/hayley-old/Tenanbaum2000/temp-logs/BiVAE-C-conv-resnet-1024.0-45.0_Maptiles_paris_OSMnxR-b-cyan-0.5-OSMnxR-g-cyan-0.5-OSMnxR-r-cyan-0.5_14/version_0')

## Log  hparmeters and `best_score` to tensorboard

In [41]:
hparams = model.hparams.copy()
hparams.update(dm.hparams)
best_score = trainer.checkpoint_callback.best_model_score.item()
metrics = {'hparam/best_score': best_score} #todo: define a metric and use it here
pprint(hparams)
pprint(metrics)

{'act_fn': LeakyReLU(negative_slope=0.01),
 'adv_loss_weight': 45.0,
 'adversary_dims': [32, 32, 32],
 'batch_size': 32,
 'cities': ['paris'],
 'dec_type': 'resnet',
 'enc_type': 'conv',
 'hidden_dims': [32, 64, 128, 256, 512],
 'in_shape': (3, 64, 64),
 'is_contrasive': True,
 'kld_weight': 1024.0,
 'latent_dim': 20,
 'learning_rate': 0.001,
 'n_contents': 1,
 'n_styles': 3,
 'out_fn': Tanh(),
 'size_average': False,
 'source_names': ['OSMnxR-b-cyan-0.5',
                  'OSMnxR-g-cyan-0.5',
                  'OSMnxR-r-cyan-0.5'],
 'styles': ['OSMnxR-b-cyan-0.5', 'OSMnxR-g-cyan-0.5', 'OSMnxR-r-cyan-0.5'],
 'zooms': ['14']}
{'hparam/best_score': 339217.46875}


In [42]:
# Use pl.Logger's method "log_hyperparameters" which handles the 
# hparams' element's formats to be suitable for Tensorboard logging
# See: 
# https://sourcegraph.com/github.com/PyTorchLightning/pytorch-lightning@be3e8701cebfc59bec97d0c7717bb5e52afc665e/-/blob/pytorch_lightning/loggers/tensorboard.py#explorer:~:text=def%20log_hyperparams
best_score = trainer.checkpoint_callback.best_model_score.item()
metrics = {'hparam/best_score': best_score} #todo: define a metric and use it here
trainer.logger.log_hyperparams(hparams, metrics)

# Evaluations

In [None]:
from src.models.plmodules.utils import get_best_ckpt, load_model, load_best_model
from pytorch_lightning.utilities.cloud_io import load as pl_load


Load best model recorded during the training


In [None]:
ckpt_path = get_best_ckpt(model, verbose=True)
ckpt = pl_load(ckpt_path, map_location=lambda storage, loc: storage)  # dict object
print(ckpt['epoch'])

In [None]:
# Load bestmodel
model.load_state_dict(ckpt['state_dict'])


## Reconstruction
    
    

In [None]:
from torch.utils.tensorboard import SummaryWriter
from src.visualize.utils import unnormalize
def show_recon(model: BiVAE, 
               tb_writer: SummaryWriter=None,
               global_step:int=0,
               unnorm:bool=True, 
               to_show:bool=True, 
               verbose:bool=False):
    model.eval()
    dm = model.trainer.datamodule
    cmap = 'gray' if dm.size()[0] ==1 else None
    train_mean, train_std = dm.train_mean, dm.train_std
    with torch.no_grad():
        for mode in ['train', 'val']:
            dl = getattr(model, f"{mode}_dataloader")()
            batch = next(iter(dl))
            
            x = batch['img']
#             label_c = batch['digit']  # digit/content label (int) -- currently not used
#             label_s = batch['color']
            x = x.to(model.device)
            x_recon = model.generate(x)
            
            # Move to cpu for visualization
            x = x.cpu()
            x_recon = x_recon.cpu()
            
            if verbose: 
                info(x, f"{mode}_x")
                info(x_recon, f"{mode}_x_recon")
                
            if unnorm:
                x_unnormed = unnormalize(x, train_mean, train_std)
                x_recon_unnormed = unnormalize(x_recon, train_mean, train_std)
                if verbose:
                    print("===After unnormalize===")
                    info(x_unnormed, f"{mode}_x_unnormed")
                    info(x_recon_unnormed, f"{mode}_x_recon_unnormed")
                    
            if to_show:
                _x = x_unnormed if unnorm else x
                _x_recon = x_recon_unnormed if unnorm else x_recon
                show_timgs(_x, title=f"Input: {mode}", cmap=cmap)
#                 show_timgs(_x_recon, title=f"Recon: {mode}", cmap=cmap)
                show_timgs(LinearRescaler()(_x_recon), title=f"Recon(linearized): {mode}", cmap=cmap)

            # Log input-recon grid to TB
            if tb_writer is not None:
                input_grid = torchvision.utils.make_grid(x_unnormed) # (C, gridh, gridw)
                recon_grid = torchvision.utils.make_grid(x_recon_unnormed) # (C, gridh, gridw)
                normed_recon_grid = torchvision.utils.make_grid(LinearRescaler()(x_recon_unnormed))
                
                grid = torch.cat([input_grid, normed_recon_grid], dim=-1) #inputs | recons
                tb_writer.add_image(f"{mode}/recons", grid, global_step=global_step)


In [None]:
show_recon(model, tb_logger.experiment, global_step=1, verbose=True)