# Basic tutorial: image data
#### Author: Matteo Caorsi

This short tutorial provides you with the basic functioning of *giotto-deep* API.

The main steps of the tutorial are the following:
 1. creation of a dataset
 2. creation of a model
 3. define metrics and losses
 4. run benchmarks
 5. visualise results interactively

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np

import torch
from torch import nn

from gdeep.models import FFNet

from gdeep.visualisation import  persistence_diagrams_of_activations

from torch.utils.tensorboard import SummaryWriter
from gdeep.data import TorchDataLoader


from gtda.diagrams import BettiCurve

from gtda.plotting import plot_betti_surfaces

import optuna

# Initialize the tensorboard writer

In order to analyse the reuslts of your models, you need to start tensorboard.
On the terminal, move inside the `/example` folder. There run the following command:

```
tensorboard --logdir=runs
```

Then go [here](http://localhost:6006/) after the training to see all the visualisation results.

In [2]:
writer = SummaryWriter()

# Create your dataset

In [3]:
from torch.utils.data.sampler import SubsetRandomSampler

dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(10240):
    train_indices.append(i)

print(len(train_indices))
dl_tr, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(train_indices))

print(len(dl_tr))

test_indices = []
for i in range(3072):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(test_indices))

dl_val = dl_ts

print(len(dl_ts))

10240
Files already downloaded and verified
Files already downloaded and verified
20
Files already downloaded and verified
Files already downloaded and verified
6


## Define and train your model

In [4]:
import torchvision.models as models
from gdeep.pipeline import Pipeline

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))

In [5]:
from torch.optim import SGD, Adam, RMSprop
from gdeep.search import gridsearch

# print(model)
loss_fn = nn.CrossEntropyLoss()

# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer)
# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer, True, "loss", 5)
pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline(model, [dl_tr, dl_ts], loss_fn, writer)

# search = gridsearch.Gridsearch(pipe, "loss", 5)
# search.search([SGD, Adam], 1, lr=[0.001, 0.01])

# train the model
pipe.train(SGD, 1, cross_validation = True, batch_size = 512, lr=0.01)
# pipe.train([SGD, Adam], 1, lr=[0.001, 0.01])
# pipe.train([SGD, Adam], 1, lr=0.01)



Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
TOTAL EPOCHS  1
Epoch 1
-------------------------------


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Training loss: 1.575110  [20/20]
Time taken for this epoch: 71s
Validation results: 
 Accuracy: 3.2%,                 Avg loss: 0.000159 

Test results: 
 Accuracy: 3.3%,                 Avg loss: 0.000159 

Done!


# Gridsearch

In [6]:
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline([model1, model2, ...], [[dl_tr, dl_val, dl_ts], [dl_tr2, dl_val2, dl_ts2], ...], loss_fn, writer)

search = Gridsearch(pipe, "loss", 1)
search.start([SGD, Adam], 1, lr=[0.001, 0.01])

[32m[I 2021-07-29 15:09:39,249][0m A new study created in memory with name: no-name-45626f93-f233-4ba7-a40e-356dfcc853cb[0m


Epoch 1
-------------------------------




Training loss: 1.098912  [20/20]
Time taken for this epoch: 72s


[32m[I 2021-07-29 15:10:53,831][0m Trial 0 finished with value: 1.0989115238189697 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0015958022260856906}. Best is trial 0 with value: 1.0989115238189697.[0m


Validation results: 
 Accuracy: 4.1%,                 Avg loss: 0.000125 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  1.0989115238189697
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0015958022260856906


# Gridsearch with multiple pipelines (models/datasets)

In [7]:
# from gdeep.search.gridsearch import Gridsearch

# pipe1 = Pipeline(model1, [dl_tr, dl_ts, dl_ts], loss_fn, writer)
# pipe2 = Pipeline(model2, [dl_tr, dl_ts, dl_ts], loss_fn, writer)

# search1 = Gridsearch(pipe1, "loss", 2)
# search1.search([SGD, Adam], 1, lr=[0.001, 0.01])

# search2 = Gridsearch(pipe1, "loss", 2)
# search2.search([SGD, Adam], 1, lr=[0.001, 0.01])

# Benchmarking a single model on multiple datasets

## Preparing multiple datasets

In [8]:
dataloaders_dicts = []
dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(5120):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(1024):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_5000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)


train_indices = []
for i in range(10240):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(2048):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_10000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)

print(dataloaders_dicts[1]["name"])


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
CIFAR10_10000


## Benchmarking model

In [9]:
# from gdeep.pipeline import benchmark

# bench = benchmark.Benchmark(writer)

# bench.benchmark_model(model, dataloaders_dicts, loss_fn, SGD, 1, 0.01)  

# Benchmarking a single dataset on multiple models

## Preparing multiple models

In [10]:
models_dicts = []

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "resnet18"
temp_dict["model"] = model

models_dicts.append(temp_dict)

model = nn.Sequential(models.vgg16(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "vgg16"
temp_dict["model"] = model

models_dicts.append(temp_dict)

## Benchmarking data

In [11]:
# bench = benchmark.Benchmark(writer)

# bench.benchmark_data(model_dicts, (dl_tr, dl_ts), loss_fn, SGD, 1, 0.001)

## Benchmarking both

In [12]:
# from gdeep.search.benchmark import Benchmark
# from torch.optim import SGD, Adam, RMSprop

# loss_fn = nn.CrossEntropyLoss()

# bench = Benchmark(writer)

# bench.benchmark(models_dicts, dataloaders_dicts, loss_fn, optimizer = SGD, epochs = 1, learning_rate = 0.01, batch_size = 1024)

## Benchmarking + Gridsearch

In [13]:
from gdeep.search.benchmark import Benchmark
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

bench = Benchmark(models_dicts, dataloaders_dicts, loss_fn, writer)

search = Gridsearch(bench, "loss", 1)
search.start([SGD, Adam], 1, 512, lr=[0.001, 0.01])

# bench.benchmark(optimizer = [SGD,Adam], epochs = 1, learning_rate = [0.001,0.01], batch_size = 1024)

[32m[I 2021-07-29 15:11:02,119][0m A new study created in memory with name: no-name-beb19209-347e-4912-b7e0-7c0854c3ab69[0m


****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 2.562519  [ 4/ 4]
Time taken for this epoch: 14s


[32m[I 2021-07-29 15:11:16,997][0m Trial 0 finished with value: 2.5625193119049072 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0012954971263344564}. Best is trial 0 with value: 2.5625193119049072.[0m
[32m[I 2021-07-29 15:11:16,998][0m A new study created in memory with name: no-name-d5ad4273-92fa-421b-99c6-b164fe15e183[0m


Validation results: 
 Accuracy: 0.4%,                 Avg loss: 0.000041 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.5625193119049072
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0012954971263344564
****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.516128  [ 4/ 4]
Time taken for this epoch: 33s


[32m[I 2021-07-29 15:11:52,099][0m Trial 0 finished with value: 2.5161280632019043 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0012724480460049447}. Best is trial 0 with value: 2.5161280632019043.[0m
[32m[I 2021-07-29 15:11:52,100][0m A new study created in memory with name: no-name-d398577d-a2a3-4484-bc46-c20d1201e266[0m


Validation results: 
 Accuracy: 0.1%,                 Avg loss: 0.000048 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.5161280632019043
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0012724480460049447
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 2.091243  [ 8/ 8]
Time taken for this epoch: 28s


[32m[I 2021-07-29 15:12:21,248][0m Trial 0 finished with value: 2.091243028640747 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.002391287633739927}. Best is trial 0 with value: 2.091243028640747.[0m
[32m[I 2021-07-29 15:12:21,249][0m A new study created in memory with name: no-name-392a60aa-dc21-4f88-9fd5-5ee073391f56[0m


Validation results: 
 Accuracy: 0.7%,                 Avg loss: 0.000078 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.091243028640747
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.002391287633739927
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.323467  [ 8/ 8]
Time taken for this epoch: 60s


[32m[I 2021-07-29 15:13:24,642][0m Trial 0 finished with value: 2.3234665393829346 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.003507416933614802}. Best is trial 0 with value: 2.3234665393829346.[0m


Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000093 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.3234665393829346
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.003507416933614802
