## Load libraries

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os,sys
import re
import math
from datetime import datetime
import time
sys.dont_write_bytecode = True

In [3]:
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.transform import resize

from pathlib import Path
from typing import List, Set, Dict, Tuple, Optional, Iterable, Mapping, Union, Callable

from pprint import pprint
from ipdb import set_trace as brpt

In [4]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from  torch.linalg import norm as tnorm
from torch.utils.data import Dataset, DataLoader, random_split

from torchvision import datasets, transforms

import pytorch_lightning as pl
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning import loggers as pl_loggers
# Select Visible GPU
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

## Set Path 
1. Add project root and src folders to `sys.path`
2. Set DATA_ROOT to `maptile_v2` folder

In [5]:
this_nb_path = Path(os.getcwd())
ROOT = this_nb_path.parent
SRC = ROOT/'src'
DATA_ROOT = Path("/data/hayley-old/maptiles_v2/")
paths2add = [this_nb_path, ROOT]

print("Project root: ", str(ROOT))
print('Src folder: ', str(SRC))
print("This nb path: ", str(this_nb_path))


for p in paths2add:
    if str(p) not in sys.path:
        sys.path.insert(0, str(p))
        print(f"\n{str(p)} added to the path.")
        
# print(sys.path)



Project root:  /data/hayley-old/Tenanbaum2000
Src folder:  /data/hayley-old/Tenanbaum2000/src
This nb path:  /data/hayley-old/Tenanbaum2000/nbs

/data/hayley-old/Tenanbaum2000 added to the path.


In [9]:
# from src.data.datasets.maptiles import Maptiles, MapStyles
from src.data.datamodules.mnist_datamodule import MNISTDataModule
from src.data.datamodules.maptiles_datamodule import MaptilesDataModule

from src.models.plmodules.three_fcs import ThreeFCs
from src.models.plmodules.vanilla_vae import VanillaVAE
from src.models.plmodules.beta_vae import BetaVAE

from src.visualize.utils import show_timgs

## Start experiment 
Given a maptile, predict its style as one of OSM, CartoVoyager

In [14]:
# Instantiate MNIST Datamodule
in_shape = (1,32,32)
batch_size = 32
dm = MNISTDataModule(data_root=ROOT/'data', 
                       in_shape=in_shape,
                      batch_size=batch_size)
dm.setup('fit')
print("DM: ", dm.name)

DM:  MNIST


In [13]:
# # Instantiate data module
# all_cities = ['la', 'charlotte', 'vegas', 'boston', 'paris', \
#               'amsterdam', 'shanghai', 'seoul', 'chicago', 'manhattan', \
#              'berlin', 'montreal', 'rome']
# cities = all_cities #['berlin']#['paris']
# styles = ['StamenTonerBackground']#['OSMDefault', 'CartoVoyagerNoLabels']
# zooms = ['14']
# in_shape = (1, 64, 64)
# batch_size = 32
# dm = MaptilesDataModule(data_root=DATA_ROOT,
#                         cities=cities,
#                         styles=styles,
#                         zooms=zooms,
#                        in_shape=in_shape,
#                        batch_size=batch_size
#                        )
# dm.setup('fit')
# print("DM: ", dm.name)

# # Instantiate the pl Module
# latent_dim = 10
# hidden_dims = [32,64,128,256,512]
# act_fn = nn.LeakyReLU()
# learning_rate = 3e-4
# model = VanillaVAE(
#     in_shape=in_shape,
#     latent_dim=latent_dim,
#     hidden_dims=hidden_dims,
#     learning_rate=learning_rate,
#     act_fn=act_fn
# )
# print(model.hparams)

Unique styles:  ['StamenTonerBackground']


KeyboardInterrupt: 

In [17]:
# Instantiate the pl Module
betas = [0.1 * 3**i for i in range(10)]
# for kld_weight in [1.0]
latent_dim = 10
hidden_dims = [32,64,128,256]#,512]
act_fn = nn.LeakyReLU()
learning_rate = 3e-4
kld_weight = betas[0]
model = BetaVAE(
    in_shape=in_shape, 
    latent_dim=latent_dim,
    hidden_dims=hidden_dims,
    learning_rate=learning_rate,
    act_fn=act_fn,
    kld_weight=kld_weight
)


In [21]:
model.name

'betaVAE-0.100'

In [22]:
# Instantiate a PL `Trainer` object
# Start the experiment
max_epochs = 200
exp_name = f'{model.name}_{dm.name}'
tb_logger = pl_loggers.TensorBoardLogger(save_dir=f'{ROOT}/temp-logs', 
                                         name=exp_name,
                                         log_graph=False,
                                        default_hp_metric=False)
print("Log dir: ", tb_logger.log_dir)

log_dir = Path(tb_logger.log_dir)
if not log_dir.exists():
    log_dir.mkdir(parents=True)
    print("Created: ", log_dir)
    

# Log computational graph
# model_wrapper = ModelWrapper(model)
# tb_logger.experiment.add_graph(model_wrapper, model.example_input_array.to(model.device))
# tb_logger.log_graph(model)

trainer_config = {
    'gpus':1,
    'max_epochs': max_epochs,
    'progress_bar_refresh_rate':0,
    'terminate_on_nan':True,
    'check_val_every_n_epoch':10,
    'logger':tb_logger,
#     'callbacks':callbacks,
}

Missing logger folder: /data/hayley-old/Tenanbaum2000/temp-logs/betaVAE-0.100_MNIST


Log dir:  /data/hayley-old/Tenanbaum2000/temp-logs/betaVAE-0.100_MNIST/version_0
Created:  /data/hayley-old/Tenanbaum2000/temp-logs/betaVAE-0.100_MNIST/version_0


In [None]:
# trainer = pl.Trainer(fast_dev_run=3)
trainer = pl.Trainer(**trainer_config)
# trainer.tune(model=model, datamodule=dm)

# Start exp
# Fit model
trainer.fit(model, dm)
print(f"Finished at ep {trainer.current_epoch, trainer.batch_idx}")

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]


BetaVAE is called



  | Name              | Type       | Params | In sizes        | Out sizes      
-------------------------------------------------------------------------------------
0 | act_fn            | LeakyReLU  | 0      | [1, 32, 16, 16] | [1, 32, 16, 16]
1 | encoder           | Sequential | 388 K  | [1, 1, 32, 32]  | [1, 256, 2, 2] 
2 | fc_mu             | Linear     | 10.2 K | [1, 1024]       | [1, 10]        
3 | fc_var            | Linear     | 10.2 K | [1, 1024]       | [1, 10]        
4 | fc_latent2decoder | Linear     | 11.3 K | [1, 10]         | [1, 1024]      
5 | decoder           | Sequential | 387 K  | [1, 256, 2, 2]  | [1, 32, 16, 16]
6 | final_layer       | Sequential | 301    | [1, 32, 16, 16] | [1, 1, 32, 32] 
-------------------------------------------------------------------------------------
808 K     Trainable params
0         Non-trainable params
808 K     Total params


Ep: 0, batch: 0
{'kld': tensor(0.0119, device='cuda:0'),
 'loss': tensor(33562.8828, device='cuda:0'),
 'recon_loss': tensor(33562.8828, device='cuda:0')}
Ep: 0, batch: 0
{'kld': tensor(0.0105, device='cuda:0'),
 'loss': tensor(26996.2949, device='cuda:0'),
 'recon_loss': tensor(26996.2930, device='cuda:0')}
Ep: 0, batch: 0
{'kld': tensor(5.4438, device='cuda:0', grad_fn=<MeanBackward1>),
 'loss': tensor(23900.6543, device='cuda:0', grad_fn=<AddBackward0>),
 'recon_loss': tensor(23900.1094, device='cuda:0', grad_fn=<MseLossBackward>)}
Ep: 0, batch: 300
{'kld': tensor(48.0854, device='cuda:0', grad_fn=<MeanBackward1>),
 'loss': tensor(12250.5068, device='cuda:0', grad_fn=<AddBackward0>),
 'recon_loss': tensor(12245.6982, device='cuda:0', grad_fn=<MseLossBackward>)}
Ep: 0, batch: 600
{'kld': tensor(53.0465, device='cuda:0', grad_fn=<MeanBackward1>),
 'loss': tensor(11050.9785, device='cuda:0', grad_fn=<AddBackward0>),
 'recon_loss': tensor(11045.6738, device='cuda:0', grad_fn=<MseLossBac

In [None]:
#2
# Instantiate data module
cities = ['paris']
styles = ['OSMDefault', 'CartoVoyagerNoLabels']
zooms = ['14']
dm = MaptilesDataModule(data_root=DATA_ROOT,
                        cities=cities,
                        styles=styles,
                        zooms=zooms,
                       bs=1)

# Instantiate the pl Module
in_shape = (3,64,64)
latent_dim = 10
hidden_dims = [32,64,128,256,512]
act_fn = nn.LeakyReLU()
model = VanillaVAE64(in_shape, 
                     latent_dim,
                     hidden_dims,
                     act_fn)
print(model.hparams)
# Instantiate a PL `Trainer` object
# -- most basic trainer: uses good defaults, eg: auto-tensorboard logging, checkpoints, logs, etc.
# -- Pass the data module along with a pl module
# ref: https://www.learnopencv.com/tensorboard-with-pytorch-lightning/
tb_logger = pl_loggers.TensorBoardLogger(save_dir='lightning_logs', name='vanilla_vae')
trainer_config = {
#     'gpus':1,
    'max_epochs': 200,
    'progress_bar_refresh_rate':20,
    'auto_lr_find': True,
    'terminate_on_nan':True,
    'val_check_interval': 0.25, #iterations
    'logger':tb_logger
}
trainer = pl.Trainer(**trainer_config)
# trainer = pl.Trainer(fast_dev_run=True)

trainer.fit(model, dm)

# Finally,
# Log this model's hyperparmeters to tensorboard
# hparams = dict(model.hparams)
# metrics = {'hparam/acc': model.hparams["loss"]}
# model.logger.experiment.add_hparams(hparam_dict=hparams,
#                                     metric_dict=metrics) #how to store the 'best' value of the metric?
# Alternatively, use pl.Logger's method "log_hyperparameters"
#         logger.log_hyperparams(hparams, metrics)

In [None]:
#3
# Instantiate data module
cities = ['paris']
styles = ['OSMDefault', 'CartoVoyagerNoLabels']
zooms = ['14']
dm = MaptilesDataModule(data_root=DATA_ROOT,
                        cities=cities,
                        styles=styles,
                        zooms=zooms,
                       bs=1)

# Instantiate the pl Module
in_shape = (3,64,64)
latent_dim = 20
hidden_dims = [32,64,128,256,512]
act_fn = nn.LeakyReLU()
model = VanillaVAE64(in_shape, 
                     latent_dim,
                     hidden_dims,
                     act_fn)
print(model.hparams)
# Instantiate a PL `Trainer` object
# -- most basic trainer: uses good defaults, eg: auto-tensorboard logging, checkpoints, logs, etc.
# -- Pass the data module along with a pl module
# ref: https://www.learnopencv.com/tensorboard-with-pytorch-lightning/
tb_logger = pl_loggers.TensorBoardLogger(save_dir='lightning_logs', name='vanilla_vae')
trainer_config = {
#     'gpus':1,
    'max_epochs': 200,
    'progress_bar_refresh_rate':20,
    'auto_lr_find': True,
    'terminate_on_nan':True,
    'val_check_interval': 0.25, #iterations
    'logger':tb_logger
}
trainer = pl.Trainer(**trainer_config)
# trainer = pl.Trainer(fast_dev_run=True)

trainer.fit(model, dm)

# Finally,
# Log this model's hyperparmeters to tensorboard
# hparams = dict(model.hparams)
# metrics = {'hparam/acc': model.hparams["loss"]}
# model.logger.experiment.add_hparams(hparam_dict=hparams,
#                                     metric_dict=metrics) #how to store the 'best' value of the metric?
# Alternatively, use pl.Logger's method "log_hyperparameters"
#         logger.log_hyperparams(hparams, metrics)


In [None]:
# 4
# Instantiate data module
cities = ['paris']
styles = ['OSMDefault', 'CartoVoyagerNoLabels']
zooms = ['14']
dm = MaptilesDataModule(data_root=DATA_ROOT,
                        cities=cities,
                        styles=styles,
                        zooms=zooms,
                       bs=1)

# Instantiate the pl Module
in_shape = (3,64,64)
latent_dim = 30
hidden_dims = [32,64,128,256,512]
act_fn = nn.LeakyReLU()
model = VanillaVAE64(in_shape, 
                     latent_dim,
                     hidden_dims,
                     act_fn)
print(model.hparams)
# Instantiate a PL `Trainer` object
# -- most basic trainer: uses good defaults, eg: auto-tensorboard logging, checkpoints, logs, etc.
# -- Pass the data module along with a pl module
# ref: https://www.learnopencv.com/tensorboard-with-pytorch-lightning/
tb_logger = pl_loggers.TensorBoardLogger(save_dir='lightning_logs', name='vanilla_vae')
trainer_config = {
#     'gpus':1,
    'max_epochs': 200,
    'progress_bar_refresh_rate':20,
    'auto_lr_find': True,
    'terminate_on_nan':True,
    'val_check_interval': 0.25, #iterations
    'logger':tb_logger
}
trainer = pl.Trainer(**trainer_config)
# trainer = pl.Trainer(fast_dev_run=True)

trainer.fit(model, dm)

# Finally,
# Log this model's hyperparmeters to tensorboard
# hparams = dict(model.hparams)
# metrics = {'hparam/acc': model.hparams["loss"]}
# model.logger.experiment.add_hparams(hparam_dict=hparams,
#                                     metric_dict=metrics) #how to store the 'best' value of the metric?
# Alternatively, use pl.Logger's method "log_hyperparameters"
#         logger.log_hyperparams(hparams, metrics)

## pl.Metrics Module
python-lightning provides a class of metrics that inherits from `nn.Module`
`Metrics` base class's `forward(x)` method does the 2 following actions:
- Calls `update()` on its input `x`
- Simultaneously, returns the value of the metric over the input

Other key methods:
- `Metric.update()`
- `Metric.compute()`
- `Metric.reset()`


    

In [None]:
pl.metrics.

In [None]:
from src.visualize.utils import show_timgs, show_timg

In [None]:
n_samples = 36
with torch.no_grad():
    sampled_recons = model.samaple(n_samples, model.device)
    show_timgs(sampled_recons.detach())

In [None]:
# recons
with torch.no_grad():
    for n in range(n_samples):
        x,y = next(iter(dm.train_dataloader()))
        mu, log_var,recon = model(x)["mu"], model(x)["log_var"], model(x)["recon"]
        show_timg(recon.detach().squeeze())
        plt.show()
