<a href="https://colab.research.google.com/github/GianmarcoLattaruolo/Vision_Project/blob/main/test_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import os
os.chdir(r'C:\Users\latta\GitHub\Vision_Project\GeoEstimation')
sys.path.append(r'C:\Users\latta\GitHub\Vision_Project\GeoEstimation')
from pathlib import Path
from math import ceil

import pandas as pd
import torch
import pytorch_lightning as pl

from classification.train_base import MultiPartitioningClassifier # class defining our model
from classification.dataset import FiveCropImageDataset # class for preparing the images before giving them to the NN

## Load the model

In [2]:
# where model's params and hyperparams are saved
checkpoint = "models/base_M/epoch=014-val_loss=18.4833.ckpt"
hparams = "models/base_M/hparams.yaml"

In [27]:
#this cell is just to explore the number of attributes of the classes we have to work with
methods_MultiPar = [method_name for method_name in dir(MultiPartitioningClassifier)
                  if callable(getattr(MultiPartitioningClassifier, method_name))]
display(len(methods_MultiPar))

#MultiPartioningClassifier is child of pl.LightningModule
print(MultiPartitioningClassifier.__bases__)
methods_pl_Ligh = [method_name for method_name in dir(pl.LightningModule)
                  if callable(getattr(pl.LightningModule, method_name))]
display(len(methods_pl_Ligh))

#pl.LightningModule is child of torch.nn.modules.module.Module and several other PyTorch lightning classes
print(pl.LightningModule.__bases__)
methods_pytorch_nn = [method_name for method_name in dir(torch.nn.modules.module.Module)
                  if callable(getattr(torch.nn.modules.module.Module, method_name))]
display(len(methods_pytorch_nn))

#torch.nn.modules.module.Module is not a child class
print(torch.nn.modules.module.Module.__bases__)

#only 4 attributes/methods from MultiPartitioningClassifier are new w.r.t. pl.LightningModule
#but I guess some are overwritten
display(set(methods_MultiPar)-set(methods_pl_Ligh)) 
display(set(methods_pl_Ligh)-set(methods_MultiPar))

148

(<class 'pytorch_lightning.core.lightning.LightningModule'>,)


144

(<class 'abc.ABC'>, <class 'pytorch_lightning.utilities.device_dtype_mixin.DeviceDtypeModuleMixin'>, <class 'pytorch_lightning.core.grads.GradInformation'>, <class 'pytorch_lightning.core.saving.ModelIO'>, <class 'pytorch_lightning.core.hooks.ModelHooks'>, <class 'pytorch_lightning.core.hooks.DataHooks'>, <class 'pytorch_lightning.core.hooks.CheckpointHooks'>, <class 'torch.nn.modules.module.Module'>)


68

(<class 'object'>,)


{'_MultiPartitioningClassifier__build_model',
 '_MultiPartitioningClassifier__init_partitionings',
 '_multi_crop_inference',
 'inference'}

set()

In [24]:
# load_from_checkpoint is a static method from pytorch lightning, inherited by MultiPartitioningClassifier
# it permits to load a model previously saved, in the form of a checkpoint file, and one with hyperparameters
# MultiPartitioningClassifier is the class defining our model
model = MultiPartitioningClassifier.load_from_checkpoint(
    checkpoint_path=checkpoint,
    hparams_file=hparams,
    map_location=None,
    stric = False #Whether to strictly enforce that the keys in checkpoint_path match
    # the keys returned by this module’s state dict.
)
#I put some the function's variables from the documentation, with some comments
wanted_precision = 32
trainer = pl.Trainer(callbacks=None, #Add a callback or list of callbacks.
                     gradient_clip_val=None, #The value at which to clip gradients. Passing gradient_clip_val=None disables gradient clipping
                     track_grad_norm= -1, #-1 = no track, otherwise tracks the p-norm. May be set to ‘inf’ infinity-norm. If using Automatic Mixed Precision (AMP), the gradients will be unscaled before logging them. 
                     check_val_every_n_epoch=1, # Perform a validation loop every after every N training epochs.
                     max_epochs=None, # Stop training once this number of epochs is reached. Disabled by default (None). If both max_epochs and max_steps are not specified, defaults to max_epochs = 1000. To enable infinite training, set max_epochs = -1.
                     max_steps = -1, #Stop training after this number of steps. 
                     log_every_n_steps=50, #How often to log within steps. Default: 50
                     accelerator=None, # different accelerator types (“cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”)
                     precision=wanted_precision, #Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16).
                     resume_from_checkpoint=None, #Deprecated since version v1.5:use Trainer.fit(..., ckpt_path=...) instead.
                     auto_lr_find=False, #If set to True, will make trainer.tune() run a learning rate finder, trying to optimize initial learning for faster convergence.
                     auto_scale_batch_size=False) #If set to True, will initially run a batch size finder trying to find the largest batch size that fits into memory. 

GPU available: False, used: False
INFO:lightning:GPU available: False, used: False
TPU available: False, using: 0 TPU cores
INFO:lightning:TPU available: False, using: 0 TPU cores


In [25]:
# I want to train on the second 3k-images test set
image_dir = r"resources\images\im2gps3ktest"
meta_csv = r"resources\images\im2gps3k_places365.csv"
#FiveCropImageDataset is the class for preparing the images before giving them to the NN
# in particular, it creates five different crops for every image
dataset = FiveCropImageDataset(meta_csv, image_dir)
batch_size = 64
dataloader = torch.utils.data.DataLoader(
                    dataset,
                    batch_size=ceil(batch_size / 5),  #you divide by 5 because for each image you generate 5 different crops
                    shuffle=False,
                    num_workers=4 #number ot threads used for parallelism (cores of CPU?)
                )

Read resources\images\im2gps3k_places365.csv


In [26]:
#we need to specify the validation data since we don't have the file:
#'resources/yfcc_25600_places365_mapping_h3.json'

new_training = trainer.fit(model = model, #model to  fit
                           train_dataloader=dataloader, # Pytorch DataLoader with training samples. 
                           #If the model has a predefined train_dataloader method this will be skipped 
                           val_dataloaders=None, #Either a single Pytorch Dataloader or a list of them, 
                           # specifying validation samples. If the model has a predefined val_dataloaders 
                           # method this will be skipped
                           datamodule=None)#A instance of LightningDataModule, optional


  | Name       | Type       | Params
------------------------------------------
0 | model      | Sequential | 23 M  
1 | classifier | ModuleList | 47 M  
INFO:lightning:
  | Name       | Type       | Params
------------------------------------------
0 | model      | Sequential | 23 M  
1 | classifier | ModuleList | 47 M  


FileNotFoundError: [Errno 2] No such file or directory: 'resources/yfcc_25600_places365_mapping_h3.json'

# Some useful links

[load_from_checkpoints](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.core.saving.ModelIO.html)

[pytorch.Trainer](https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html)

[transfer learning](https://pytorch-lightning.readthedocs.io/en/stable/advanced/finetuning.html)

[pytorch lightning 1.0.1 full documentation](https://pytorch-lightning.readthedocs.io/_/downloads/en/1.0.1/pdf/)
unfornatuly we need to watch this since several functions arguments have changed.