# Basic tutorial: image data
#### Author: Matteo Caorsi

This short tutorial provides you with the basic functioning of *giotto-deep* API.

The main steps of the tutorial are the following:
 1. creation of a dataset
 2. creation of a model
 3. define metrics and losses
 4. run benchmarks
 5. visualise results interactively

In [27]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np

import torch
from torch import nn

from gdeep.models import FFNet

from gdeep.visualisation import  persistence_diagrams_of_activations

from torch.utils.tensorboard import SummaryWriter
from gdeep.data import TorchDataLoader


from gtda.diagrams import BettiCurve

from gtda.plotting import plot_betti_surfaces

import optuna

# Initialize the tensorboard writer

In order to analyse the reuslts of your models, you need to start tensorboard.
On the terminal, move inside the `/example` folder. There run the following command:

```
tensorboard --logdir=runs
```

Then go [here](http://localhost:6006/) after the training to see all the visualisation results.

In [28]:
writer = SummaryWriter()

# Create your dataset

In [29]:
from torch.utils.data.sampler import SubsetRandomSampler

dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(10240):
    train_indices.append(i)

print(len(train_indices))
dl_tr, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(train_indices))

print(len(dl_tr))

test_indices = []
for i in range(3072):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(test_indices))

dl_val = dl_ts

print(len(dl_ts))

10240
Files already downloaded and verified
Files already downloaded and verified
20
Files already downloaded and verified
Files already downloaded and verified
6


## Define and train your model

In [30]:
import torchvision.models as models
from gdeep.pipeline import Pipeline

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))

In [31]:
from torch.optim import SGD, Adam, RMSprop
from gdeep.search import gridsearch

# print(model)
loss_fn = nn.CrossEntropyLoss()

# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer)
# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer, True, "loss", 5)
pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline(model, [dl_tr, dl_ts], loss_fn, writer)

# search = gridsearch.Gridsearch(pipe, "loss", 5)
# search.search([SGD, Adam], 1, lr=[0.001, 0.01])

# train the model
pipe.train(SGD, 2, cross_validation = True, batch_size = 512, lr=0.01)
# pipe.train([SGD, Adam], 1, lr=[0.001, 0.01])
# pipe.train([SGD, Adam], 1, lr=0.01)



TOTAL EPOCHS  2
Epoch 1
-------------------------------
Training loss: 1.587334  [20/20]
Time taken for this epoch: 5s
Validation results: 
 Accuracy: 3.3%,                 Avg loss: 0.000161 

Epoch 2
-------------------------------
Training loss: 1.308019  [20/20]
Time taken for this epoch: 5s
Validation results: 
 Accuracy: 4.0%,                 Avg loss: 0.000122 

Test results: 
 Accuracy: 4.0%,                 Avg loss: 0.000122 

Done!


# Gridsearch

In [32]:
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline([model1, model2, ...], [[dl_tr, dl_val, dl_ts], [dl_tr2, dl_val2, dl_ts2], ...], loss_fn, writer)

search = Gridsearch(pipe, "loss", 1)
search.start([SGD, Adam], 1, lr=[0.001, 0.01])

[32m[I 2021-07-28 14:37:43,443][0m A new study created in memory with name: no-name-b146e5c5-68eb-48a0-b3dc-dd2ea21c0098[0m


Epoch 1
-------------------------------
Training loss: 4.345394  [20/20]]
Time taken for this epoch: 5s


[32m[I 2021-07-28 14:37:49,383][0m Trial 0 finished with value: 4.345393657684326 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.003496633883137287}. Best is trial 0 with value: 4.345393657684326.[0m


Validation results: 
 Accuracy: 2.2%,                 Avg loss: 0.000315 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  4.345393657684326
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.003496633883137287


# Gridsearch with multiple pipelines (models/datasets)

In [33]:
# from gdeep.search.gridsearch import Gridsearch

# pipe1 = Pipeline(model1, [dl_tr, dl_ts, dl_ts], loss_fn, writer)
# pipe2 = Pipeline(model2, [dl_tr, dl_ts, dl_ts], loss_fn, writer)

# search1 = Gridsearch(pipe1, "loss", 2)
# search1.search([SGD, Adam], 1, lr=[0.001, 0.01])

# search2 = Gridsearch(pipe1, "loss", 2)
# search2.search([SGD, Adam], 1, lr=[0.001, 0.01])

# Benchmarking a single model on multiple datasets

## Preparing multiple datasets

In [34]:
dataloaders_dicts = []
dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(5120):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(1024):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_5000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)


train_indices = []
for i in range(10240):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(2048):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_10000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)

print(dataloaders_dicts[1]["name"])


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
CIFAR10_10000


## Benchmarking model

In [35]:
# from gdeep.pipeline import benchmark

# bench = benchmark.Benchmark(writer)

# bench.benchmark_model(model, dataloaders_dicts, loss_fn, SGD, 1, 0.01)  

# Benchmarking a single dataset on multiple models

## Preparing multiple models

In [36]:
models_dicts = []

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "resnet18"
temp_dict["model"] = model

models_dicts.append(temp_dict)

model = nn.Sequential(models.vgg16(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "vgg16"
temp_dict["model"] = model

models_dicts.append(temp_dict)

## Benchmarking data

In [37]:
# bench = benchmark.Benchmark(writer)

# bench.benchmark_data(model_dicts, (dl_tr, dl_ts), loss_fn, SGD, 1, 0.001)

## Benchmarking both

In [38]:
# from gdeep.search.benchmark import Benchmark
# from torch.optim import SGD, Adam, RMSprop

# loss_fn = nn.CrossEntropyLoss()

# bench = Benchmark(writer)

# bench.benchmark(models_dicts, dataloaders_dicts, loss_fn, optimizer = SGD, epochs = 1, learning_rate = 0.01, batch_size = 1024)

## Benchmarking + Gridsearch

In [39]:
from gdeep.search.benchmark import Benchmark
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

bench = Benchmark(models_dicts, dataloaders_dicts, loss_fn, writer)

search = Gridsearch(bench, "loss", 1)
search.start([SGD, Adam], 1, 512, lr=[0.001, 0.01])

# bench.benchmark(optimizer = [SGD,Adam], epochs = 1, learning_rate = [0.001,0.01], batch_size = 1024)

[32m[I 2021-07-28 14:37:58,742][0m A new study created in memory with name: no-name-688fd14d-2d65-4bc4-a123-0c5b91c2c083[0m


****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 2.818259  [ 3/ 4]

[32m[I 2021-07-28 14:37:59,958][0m Trial 0 finished with value: 2.6184284687042236 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.004082089833978034}. Best is trial 0 with value: 2.6184284687042236.[0m
[32m[I 2021-07-28 14:37:59,959][0m A new study created in memory with name: no-name-3cbc17c2-7a0f-471e-a617-ad1791da6dc1[0m


Training loss: 2.618428  [ 4/ 4]
Time taken for this epoch: 1s
Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000051 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.6184284687042236
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.004082089833978034
****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.614710  [ 4/ 4]
Time taken for this epoch: 2s


[32m[I 2021-07-28 14:38:02,127][0m Trial 0 finished with value: 2.6147100925445557 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.0011836768807636912}. Best is trial 0 with value: 2.6147100925445557.[0m
[32m[I 2021-07-28 14:38:02,128][0m A new study created in memory with name: no-name-4eed753a-e77f-4cc6-97bf-ee75b4653249[0m


Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000050 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.6147100925445557
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.0011836768807636912
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 1.977874  [ 8/ 8]
Time taken for this epoch: 2s


[32m[I 2021-07-28 14:38:04,504][0m Trial 0 finished with value: 1.9778743982315063 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.006186748398487935}. Best is trial 0 with value: 1.9778743982315063.[0m
[32m[I 2021-07-28 14:38:04,506][0m A new study created in memory with name: no-name-9b827b3a-1e71-4dd9-a0a2-723be132e835[0m


Validation results: 
 Accuracy: 0.7%,                 Avg loss: 0.000078 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  1.9778743982315063
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.006186748398487935
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.298507  [ 8/ 8]
Time taken for this epoch: 4s


[32m[I 2021-07-28 14:38:08,733][0m Trial 0 finished with value: 2.298506736755371 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0013277014946581002}. Best is trial 0 with value: 2.298506736755371.[0m


Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000097 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.298506736755371
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0013277014946581002
