# Basic tutorial: tabular data
#### Author: Matteo Caorsi

This short tutorial provides you with the basic functioning of *giotto-deep* API.

The main steps of the tutorial are the following:
 1. creation of a dataset
 2. creation of a model
 3. define metrics and losses
 4. run benchmarks
 5. visualise results interactively

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import plotly.express as px
import torch
from torch import nn
import pandas as pd

from sklearn import datasets

from gdeep.models import FFNet

from gdeep.visualisation import  persistence_diagrams_of_activations

from torch.utils.tensorboard import SummaryWriter
from gdeep.data import TorchDataLoader

from gtda.diagrams import BettiCurve

from gtda.plotting import plot_betti_surfaces

# Initialize the tensorboard writer

In order to analyse the reuslts of your models, you need to start tensorboard.
On the terminal, move inside the `/example` folder. There run the following command:

```
tensorboard --logdir=runs
```

Then go [here](http://localhost:6006/) after the training to see all the visualisation results.

In [2]:
writer = SummaryWriter()

# Create your dataset

In [3]:
dl = TorchDataLoader(name="DoubleTori")
dl_tr, dl_ts = dl.build_dataloader(batch_size=1)

## Define and train your model

In [4]:
from gdeep.pipeline import Pipeline

model = nn.Sequential(nn.Flatten(), FFNet(0, arch=[3, 50, 3]))


In [5]:
from torch.optim import SGD

print(model)
loss_fn = nn.CrossEntropyLoss()

pipe = Pipeline(model, (dl_tr, dl_ts), loss_fn, writer)

# train the model
pipe.train(SGD, 5, batch_size=1, lr=0.01)



Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): FFNet(
    (layer0): Linear(in_features=3, out_features=50, bias=True)
    (layer1): Linear(in_features=50, out_features=3, bias=True)
  )
)
TOTAL EPOCHS  5
Epoch 1
-------------------------------
Training loss: 0.563694  [160/160]
Time taken for this epoch: 0s
Validation results: 
 Accuracy: 5.0%,                 Avg loss: 0.243561 

Epoch 2
-------------------------------
Training loss: 0.551450  [160/160]
Time taken for this epoch: 0s
Validation results: 
 Accuracy: 5.0%,                 Avg loss: 0.246287 

Epoch 3
-------------------------------
Training loss: 1.503457  [160/160]
Time taken for this epoch: 0s
Validation results: 
 Accuracy: 5.0%,                 Avg loss: 0.247075 

Epoch 4
-------------------------------
Training loss: 0.552362  [160/160]
Time taken for this epoch: 0s
Validation results: 
 Accuracy: 5.0%,                 Avg loss: 0.247561 

Epoch 5
-------------------------------
Training loss: 0.843754 

# Simply use interpretability tools

In [6]:
from gdeep.analysis.interpretability import Interpreter

#inter = Interpreter(model, writer, method="LayerIntegratedGradients")

#inter.interpret_text("I am writing about money", 0, prec.vocabulary, prec.tokenizer, layer="embedding")

inter = Interpreter(model)
inter.interpret_tabular(next(iter(dl_tr))[0], next(iter(dl_tr))[1]);


               activations. The hooks and attributes will be removed
            after the attribution is finished


# Extract inner data from your models

In [7]:
from gdeep.models import ModelExtractor

me = ModelExtractor(model, loss_fn)

lista = me.get_layers_param()

for k, item in lista.items():
    print(k,item.shape)


1.layer0.weight torch.Size([50, 3])
1.layer0.bias torch.Size([50])
1.layer1.weight torch.Size([3, 50])
1.layer1.bias torch.Size([3])


In [8]:
x = next(iter(dl_tr))[0][0]
if x.dtype is not torch.int64:
    res = me.get_decision_boundary(x, n_epochs=100)
    res.shape

Executing the decison boundary computations:
Step: 99/100

In [9]:
x = next(iter(dl_tr))[0]
list_activations = me.get_activations(x)
len(list_activations)


5

In [10]:
x, target = next(iter(dl_tr))
if x.dtype is torch.float:
    for gradient in me.get_gradients(x, target=target)[1]:
        print(gradient.shape)

torch.Size([50, 3])
torch.Size([50])
torch.Size([3, 50])
torch.Size([3])


# Visualise activations and other topological aspects of your model

In [11]:
from gdeep.visualisation import Visualiser

vs = Visualiser(pipe)

vs.plot_data_model()
vs.plot_activations(x)
vs.plot_persistence_diagrams(x)


AttributeError: module 'tensorflow._api.v2.io.gfile' has no attribute 'get_filesystem'

In [None]:

vs.plot_decision_boundary()

In [None]:
vs.betti_plot_layers((0, 1), x)

In [None]:
plt = vs.plot_interpreter_tabular(inter)