# Basic tutorial: image data
#### Author: Matteo Caorsi

This short tutorial provides you with the basic functioning of *giotto-deep* API.

The main steps of the tutorial are the following:
 1. creation of a dataset
 2. creation of a model
 3. define metrics and losses
 4. run benchmarks
 5. visualise results interactively

In [40]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np

import torch
from torch import nn

from gdeep.models import FFNet

from gdeep.visualisation import  persistence_diagrams_of_activations

from torch.utils.tensorboard import SummaryWriter
from gdeep.data import TorchDataLoader


from gtda.diagrams import BettiCurve

from gtda.plotting import plot_betti_surfaces

import optuna

# Initialize the tensorboard writer

In order to analyse the reuslts of your models, you need to start tensorboard.
On the terminal, move inside the `/example` folder. There run the following command:

```
tensorboard --logdir=runs
```

Then go [here](http://localhost:6006/) after the training to see all the visualisation results.

In [41]:
writer = SummaryWriter()

# Create your dataset

In [42]:
from torch.utils.data.sampler import SubsetRandomSampler

dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(10240):
    train_indices.append(i)

print(len(train_indices))
dl_tr, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(train_indices))

print(len(dl_tr))

test_indices = []
for i in range(3072):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(test_indices))

dl_val = dl_ts

print(len(dl_ts))

10240
Files already downloaded and verified
Files already downloaded and verified
20
Files already downloaded and verified
Files already downloaded and verified
6


## Define and train your model

In [43]:
import torchvision.models as models
from gdeep.pipeline import Pipeline

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))

In [44]:
from torch.optim import SGD, Adam, RMSprop
from gdeep.search import gridsearch

# print(model)
loss_fn = nn.CrossEntropyLoss()

# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer)
# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer, True, "loss", 5)
pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline(model, [dl_tr, dl_ts], loss_fn, writer)

# search = gridsearch.Gridsearch(pipe, "loss", 5)
# search.search([SGD, Adam], 1, lr=[0.001, 0.01])

# train the model
pipe.train(SGD, 1, cross_validation = True, batch_size = 512, lr=0.01)
# pipe.train([SGD, Adam], 1, lr=[0.001, 0.01])
# pipe.train([SGD, Adam], 1, lr=0.01)



Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
TOTAL EPOCHS  1
Epoch 1
-------------------------------
Training loss: 1.574904  [20/20]
Time taken for this epoch: 74s
Validation results: 
 Accuracy: 3.3%,                 Avg loss: 0.000161 

Test results: 
 Accuracy: 3.3%,                 Avg loss: 0.000162 

Done!


# Gridsearch

In [45]:
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline([model1, model2, ...], [[dl_tr, dl_val, dl_ts], [dl_tr2, dl_val2, dl_ts2], ...], loss_fn, writer)

search = Gridsearch(pipe, "loss", 1)
search.start([SGD, Adam], 1, lr=[0.001, 0.01])

[32m[I 2021-07-29 16:07:12,363][0m A new study created in memory with name: no-name-5acfb4a2-753f-4551-9f7a-d23151cdf12c[0m


Epoch 1
-------------------------------
Training loss: 2.167293  [20/20]]
Time taken for this epoch: 74s


[32m[I 2021-07-29 16:08:29,158][0m Trial 0 finished with value: 2.167293071746826 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.003606973846401288}. Best is trial 0 with value: 2.167293071746826.[0m


Validation results: 
 Accuracy: 2.0%,                 Avg loss: 0.000234 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.167293071746826
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.003606973846401288


# Gridsearch with multiple pipelines (models/datasets)

In [46]:
# from gdeep.search.gridsearch import Gridsearch

# pipe1 = Pipeline(model1, [dl_tr, dl_ts, dl_ts], loss_fn, writer)
# pipe2 = Pipeline(model2, [dl_tr, dl_ts, dl_ts], loss_fn, writer)

# search1 = Gridsearch(pipe1, "loss", 2)
# search1.search([SGD, Adam], 1, lr=[0.001, 0.01])

# search2 = Gridsearch(pipe1, "loss", 2)
# search2.search([SGD, Adam], 1, lr=[0.001, 0.01])

# Benchmarking a single model on multiple datasets

## Preparing multiple datasets

In [47]:
dataloaders_dicts = []
dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(5120):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(1024):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_5000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)


train_indices = []
for i in range(10240):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(2048):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_10000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)

print(dataloaders_dicts[1]["name"])


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
CIFAR10_10000


## Benchmarking model

In [48]:
# from gdeep.pipeline import benchmark

# bench = benchmark.Benchmark(writer)

# bench.benchmark_model(model, dataloaders_dicts, loss_fn, SGD, 1, 0.01)  

# Benchmarking a single dataset on multiple models

## Preparing multiple models

In [49]:
models_dicts = []

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "resnet18"
temp_dict["model"] = model

models_dicts.append(temp_dict)

model = nn.Sequential(models.vgg16(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "vgg16"
temp_dict["model"] = model

models_dicts.append(temp_dict)

## Benchmarking data

In [50]:
# bench = benchmark.Benchmark(writer)

# bench.benchmark_data(model_dicts, (dl_tr, dl_ts), loss_fn, SGD, 1, 0.001)

## Benchmarking both

In [51]:
# from gdeep.search.benchmark import Benchmark
# from torch.optim import SGD, Adam, RMSprop

# loss_fn = nn.CrossEntropyLoss()

# bench = Benchmark(writer)

# bench.benchmark(models_dicts, dataloaders_dicts, loss_fn, optimizer = SGD, epochs = 1, learning_rate = 0.01, batch_size = 1024)

## Benchmarking + Gridsearch

In [59]:
from gdeep.search.benchmark import Benchmark
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

bench = Benchmark(models_dicts, dataloaders_dicts, loss_fn, writer)

search = Gridsearch(bench, "loss", 1)
search.start([SGD, Adam], 1, 512, lr=[0.001, 0.01])

# bench.benchmark(optimizer = [SGD,Adam], epochs = 1, learning_rate = [0.001,0.01], batch_size = 1024)

[autoreload of gdeep.search.gridsearch failed: Traceback (most recent call last):
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 347, in update_generic
    update(a, b)
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 347, in update_generic
    update(a, b)
  File "d:\anaconda3\envs\giotto-deep2\lib\site-packages\IPython\extensions\autoreload.py", line 266, in update_function
    setattr(old, name, getattr(new, name))
Va

****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 3.083695  [ 4/ 4]
Time taken for this epoch: 14s


[32m[I 2021-07-29 16:23:15,828][0m Trial 0 finished with value: 3.0836946964263916 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.00194738155636073}. Best is trial 0 with value: 3.0836946964263916.[0m
[32m[I 2021-07-29 16:23:15,828][0m A new study created in memory with name: no-name-7a99e89c-1658-4e5a-af50-e6e26e507269[0m


Validation results: 
 Accuracy: 0.3%,                 Avg loss: 0.000083 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  3.0836946964263916
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.00194738155636073
****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.345125  [ 4/ 4]
Time taken for this epoch: 32s


[32m[I 2021-07-29 16:23:49,503][0m Trial 0 finished with value: 2.3451247215270996 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.0015950385251406213}. Best is trial 0 with value: 2.3451247215270996.[0m
[32m[I 2021-07-29 16:23:49,503][0m A new study created in memory with name: no-name-3bb3a01b-7581-44d5-b3c7-6068caa10450[0m


Validation results: 
 Accuracy: 0.1%,                 Avg loss: 0.000046 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.3451247215270996
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.0015950385251406213
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 3.018966  [ 8/ 8]
Time taken for this epoch: 28s


[32m[I 2021-07-29 16:24:17,879][0m Trial 0 finished with value: 3.018965721130371 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.0025285098276113817}. Best is trial 0 with value: 3.018965721130371.[0m
[32m[I 2021-07-29 16:24:17,879][0m A new study created in memory with name: no-name-44a92847-d0af-43d9-b51a-c4ee2795323b[0m


Validation results: 
 Accuracy: 0.7%,                 Avg loss: 0.000109 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  3.018965721130371
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.0025285098276113817
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.305908  [ 8/ 8]
Time taken for this epoch: 71s


[32m[I 2021-07-29 16:25:33,486][0m Trial 0 finished with value: 2.305908203125 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0013368589659000776}. Best is trial 0 with value: 2.305908203125.[0m


Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000092 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.305908203125
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0013368589659000776
