# Basic tutorial: image data
#### Author: Matteo Caorsi

This short tutorial provides you with the basic functioning of *giotto-deep* API.

The main steps of the tutorial are the following:
 1. creation of a dataset
 2. creation of a model
 3. define metrics and losses
 4. run benchmarks
 5. visualise results interactively

In [27]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np

import torch
from torch import nn

from gdeep.models import FFNet

from gdeep.visualisation import  persistence_diagrams_of_activations

from torch.utils.tensorboard import SummaryWriter
from gdeep.data import TorchDataLoader


from gtda.diagrams import BettiCurve

from gtda.plotting import plot_betti_surfaces

import optuna

# Initialize the tensorboard writer

In order to analyse the reuslts of your models, you need to start tensorboard.
On the terminal, move inside the `/example` folder. There run the following command:

```
tensorboard --logdir=runs
```

Then go [here](http://localhost:6006/) after the training to see all the visualisation results.

In [28]:
writer = SummaryWriter()

# Create your dataset

In [29]:
from torch.utils.data.sampler import SubsetRandomSampler

dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(10240):
    train_indices.append(i)

print(len(train_indices))
dl_tr, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(train_indices))

print(len(dl_tr))

test_indices = []
for i in range(3072):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=512, sampler=SubsetRandomSampler(test_indices))

dl_val = dl_ts

print(len(dl_ts))

10240
Files already downloaded and verified
Files already downloaded and verified
20
Files already downloaded and verified
Files already downloaded and verified
6


## Define and train your model

In [30]:
import torchvision.models as models
from gdeep.pipeline import Pipeline

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))

In [31]:
from torch.optim import SGD, Adam, RMSprop
from gdeep.search import gridsearch

# print(model)
loss_fn = nn.CrossEntropyLoss()

# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer)
# pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer, True, "loss", 5)
pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline(model, [dl_tr, dl_ts], loss_fn, writer)

# search = gridsearch.Gridsearch(pipe, "loss", 5)
# search.search([SGD, Adam], 1, lr=[0.001, 0.01])

# train the model
pipe.train(SGD, 1, cross_validation = True, batch_size = 512, lr=0.01)
# pipe.train([SGD, Adam], 1, lr=[0.001, 0.01])
# pipe.train([SGD, Adam], 1, lr=0.01)



Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()
TOTAL EPOCHS  1
Epoch 1
-------------------------------
Training loss: 1.630912  [20/20]
Time taken for this epoch: 75s
Validation results: 
 Accuracy: 3.2%,                 Avg loss: 0.000162 

Test results: 
 Accuracy: 3.3%,                 Avg loss: 0.000162 

Done!


# Gridsearch

In [32]:
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

pipe = Pipeline(model, [dl_tr, dl_val, dl_ts], loss_fn, writer)
# pipe = Pipeline([model1, model2, ...], [[dl_tr, dl_val, dl_ts], [dl_tr2, dl_val2, dl_ts2], ...], loss_fn, writer)

search = Gridsearch(pipe, "loss", 1)
search.start([SGD, Adam], 1, lr=[0.001, 0.01])

[32m[I 2021-07-29 15:40:01,501][0m A new study created in memory with name: no-name-c8a5d63d-1d08-434a-a878-de1e02486c19[0m


Epoch 1
-------------------------------
Training loss: 1.330187  [20/20]
Time taken for this epoch: 75s


[32m[I 2021-07-29 15:41:19,026][0m Trial 0 finished with value: 1.3301867246627808 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.003933444767262241}. Best is trial 0 with value: 1.3301867246627808.[0m


Validation results: 
 Accuracy: 3.6%,                 Avg loss: 0.000141 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  1.3301867246627808
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.003933444767262241


# Gridsearch with multiple pipelines (models/datasets)

In [33]:
# from gdeep.search.gridsearch import Gridsearch

# pipe1 = Pipeline(model1, [dl_tr, dl_ts, dl_ts], loss_fn, writer)
# pipe2 = Pipeline(model2, [dl_tr, dl_ts, dl_ts], loss_fn, writer)

# search1 = Gridsearch(pipe1, "loss", 2)
# search1.search([SGD, Adam], 1, lr=[0.001, 0.01])

# search2 = Gridsearch(pipe1, "loss", 2)
# search2.search([SGD, Adam], 1, lr=[0.001, 0.01])

# Benchmarking a single model on multiple datasets

## Preparing multiple datasets

In [34]:
dataloaders_dicts = []
dl = TorchDataLoader(name="CIFAR10")
train_indices = []
for i in range(5120):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(1024):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_5000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)


train_indices = []
for i in range(10240):
    train_indices.append(i)

dl_tr, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(train_indices))

test_indices = []
for i in range(2048):
    test_indices.append(i)

dl_ts, dl_temp = dl.build_dataloader(batch_size=1024, sampler=SubsetRandomSampler(test_indices))

temp_dict = {}
temp_dict["name"] = "CIFAR10_10000"
temp_dict["dataloaders"] = (dl_tr, dl_ts)

dataloaders_dicts.append(temp_dict)

print(dataloaders_dicts[1]["name"])


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
CIFAR10_10000


## Benchmarking model

In [35]:
# from gdeep.pipeline import benchmark

# bench = benchmark.Benchmark(writer)

# bench.benchmark_model(model, dataloaders_dicts, loss_fn, SGD, 1, 0.01)  

# Benchmarking a single dataset on multiple models

## Preparing multiple models

In [36]:
models_dicts = []

model = nn.Sequential(models.resnet18(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "resnet18"
temp_dict["model"] = model

models_dicts.append(temp_dict)

model = nn.Sequential(models.vgg16(pretrained=True), nn.Linear(1000,10))
temp_dict = {}
temp_dict["name"] = "vgg16"
temp_dict["model"] = model

models_dicts.append(temp_dict)

## Benchmarking data

In [37]:
# bench = benchmark.Benchmark(writer)

# bench.benchmark_data(model_dicts, (dl_tr, dl_ts), loss_fn, SGD, 1, 0.001)

## Benchmarking both

In [38]:
# from gdeep.search.benchmark import Benchmark
# from torch.optim import SGD, Adam, RMSprop

# loss_fn = nn.CrossEntropyLoss()

# bench = Benchmark(writer)

# bench.benchmark(models_dicts, dataloaders_dicts, loss_fn, optimizer = SGD, epochs = 1, learning_rate = 0.01, batch_size = 1024)

## Benchmarking + Gridsearch

In [39]:
from gdeep.search.benchmark import Benchmark
from gdeep.search.gridsearch import Gridsearch
from torch.optim import SGD, Adam, RMSprop

loss_fn = nn.CrossEntropyLoss()

bench = Benchmark(models_dicts, dataloaders_dicts, loss_fn, writer)

search = Gridsearch(bench, "loss", 1)
search.start([SGD, Adam], 1, 512, lr=[0.001, 0.01])

# bench.benchmark(optimizer = [SGD,Adam], epochs = 1, learning_rate = [0.001,0.01], batch_size = 1024)

[32m[I 2021-07-29 15:41:27,884][0m A new study created in memory with name: no-name-15fbe155-4807-466e-a666-818da488f4cf[0m


****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 2.231724  [ 4/ 4]
Time taken for this epoch: 15s


[32m[I 2021-07-29 15:41:43,178][0m Trial 0 finished with value: 2.2317237854003906 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.009474864160284832}. Best is trial 0 with value: 2.2317237854003906.[0m
[32m[I 2021-07-29 15:41:43,179][0m A new study created in memory with name: no-name-4f60c4e7-45ce-48db-be56-d15b449c7fc7[0m


Validation results: 
 Accuracy: 0.3%,                 Avg loss: 0.000043 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.2317237854003906
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.009474864160284832
****************************************
Performing Gridsearch on Dataset: CIFAR10_5000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 3.674537  [ 4/ 4]]
Time taken for this epoch: 37s


[32m[I 2021-07-29 15:42:22,849][0m Trial 0 finished with value: 3.674537181854248 and parameters: {'optimizer': <class 'torch.optim.adam.Adam'>, 'lr': 0.0021876585430790373}. Best is trial 0 with value: 3.674537181854248.[0m
[32m[I 2021-07-29 15:42:22,850][0m A new study created in memory with name: no-name-3c274748-e9f9-442e-be6b-4459b71cd17d[0m


Validation results: 
 Accuracy: 0.1%,                 Avg loss: 0.000080 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  3.674537181854248
  Params: 
    optimizer: <class 'torch.optim.adam.Adam'>
    lr: 0.0021876585430790373
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: resnet18
Epoch 1
-------------------------------
Training loss: 2.012295  [ 8/ 8]
Time taken for this epoch: 30s


[32m[I 2021-07-29 15:42:53,359][0m Trial 0 finished with value: 2.0122947692871094 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.0010679146909753814}. Best is trial 0 with value: 2.0122947692871094.[0m
[32m[I 2021-07-29 15:42:53,360][0m A new study created in memory with name: no-name-a51eaa28-b8d9-4867-91f0-b6e4921bec5e[0m


Validation results: 
 Accuracy: 0.8%,                 Avg loss: 0.000075 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.0122947692871094
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.0010679146909753814
****************************************
Performing Gridsearch on Dataset: CIFAR10_10000, Model: vgg16
Epoch 1
-------------------------------
Training loss: 2.323925  [ 8/ 8]
Time taken for this epoch: 68s


[32m[I 2021-07-29 15:44:05,617][0m Trial 0 finished with value: 2.3239247798919678 and parameters: {'optimizer': <class 'torch.optim.sgd.SGD'>, 'lr': 0.006599893457521791}. Best is trial 0 with value: 2.3239247798919678.[0m


Validation results: 
 Accuracy: 0.2%,                 Avg loss: 0.000094 

Done!
Study statistics: 
  Number of finished trials:  1
  Number of pruned trials:  0
  Number of complete trials:  1
Best trial:
  Value:  2.3239247798919678
  Params: 
    optimizer: <class 'torch.optim.sgd.SGD'>
    lr: 0.006599893457521791
