#SIGRADI 2023#

#Aprendizagem de máquina aplicada na avaliação de projetos aeroportuários no Brasil baseados em modelos BIM#


Autores:
*   Ítalo Guedes dos Santos¹ – italo.guedes@ufpe.br
*   Max Andrade¹ - max.andrade@ufpe.br
*   Cleber Zanchettin² - cz@cin.ufpe.br
*   Adriana Rolim³ - adriana.rolim@mpor.gov.br  


¹ Universidade Federal de Pernambuco (UFPE), ² Centro de Informática Universidade Federal de Pernambuco (CIn / UFPE), ³ Secretaria Nacional de Aviação Civil (SAC)

Data: agosto / 2023

#Setup

In [None]:
import pandas as pd

#Dataset import (.csv)

In [None]:
train_df = pd.read_csv('/content/sample_data/DATASET-TPS-SIGRADI-R00-TREINO.csv',encoding="ISO-8859-1", delimiter=';')
valid_df = pd.read_csv('/content/sample_data/DATASET-TPS-SIGRADI-R00-TESTE.csv',encoding="ISO-8859-1", delimiter=';')

#Data visualization

In [None]:
train_df.head()

Unnamed: 0,L1,C1,L2,C2,L3,C3,L4,C4,L5,C5,...,C11,L12,C12,L13,C13,L14,C14,L15,C15,status
0,2,5,6,28,8,1,4,2,5,5,...,4,8,19,23,6,3,6,2,27,0
1,6,5,3,16,4,4,1,3,8,5,...,5,14,18,12,5,3,2,4,49,1
2,6,5,4,15,4,2,3,2,3,2,...,8,11,19,23,3,3,4,1,18,1
3,3,2,5,15,7,5,3,2,6,7,...,8,7,14,9,8,3,4,3,25,0
4,2,4,7,25,5,1,1,5,3,7,...,4,14,9,25,4,2,5,3,8,0


#Separating the response variable

In [None]:
y_train = train_df['status']
y_valid = valid_df['status']
del train_df['status']
del valid_df['status']

In [None]:
y_train[0:10]

0    0
1    1
2    1
3    0
4    0
5    0
6    0
7    1
8    0
9    0
Name: status, dtype: int64

In [None]:
train_df.head()

Unnamed: 0,L1,C1,L2,C2,L3,C3,L4,C4,L5,C5,...,L11,C11,L12,C12,L13,C13,L14,C14,L15,C15
0,2,5,6,28,8,1,4,2,5,5,...,5,4,8,19,23,6,3,6,2,27
1,6,5,3,16,4,4,1,3,8,5,...,6,5,14,18,12,5,3,2,4,49
2,6,5,4,15,4,2,3,2,3,2,...,6,8,11,19,23,3,3,4,1,18
3,3,2,5,15,7,5,3,2,6,7,...,4,8,7,14,9,8,3,4,3,25
4,2,4,7,25,5,1,1,5,3,7,...,2,4,14,9,25,4,2,5,3,8


#Creating training /test set

In [None]:
x_train = train_df.values
x_valid = valid_df.values

In [None]:
x_train[0]

array([ 2,  5,  6, 28,  8,  1,  4,  2,  5,  5,  9,  5,  1,  6, 20,  5,  3,
       12, 12, 12,  5,  4,  8, 19, 23,  6,  3,  6,  2, 27])

# Summarizing the Training and Validation Data

In [None]:
x_train.shape

(800, 30)

In [None]:
y_train.shape

(800,)

In [None]:
x_valid.shape

(200, 30)

In [None]:
y_valid.shape

(200,)

In [None]:
x_train.min()

1

In [None]:
x_train.max()

54

#Normalize x_train and x_valid

In [None]:
x_train = x_train / 54
x_valid = x_valid / 54

In [None]:
x_train.min()

0.018518518518518517

In [None]:
x_train.max()

1.0

#Import TabNet (Deep Neural Network)

In [None]:
!pip install pytorch_tabnet



## Summarizing the Model

In [None]:
from pytorch_tabnet.tab_model import TabNetClassifier
import torch

## Defining the Model

In [None]:
clf= TabNetClassifier(optimizer_fn=torch.optim.Adam,
                       scheduler_params={"step_size":10,
                                         "gamma":0.9},
                       scheduler_fn=torch.optim.lr_scheduler.StepLR,
                      )



##Train the Model

In [None]:
clf.fit(
    x_train,y_train,
    eval_set=[(x_train, y_train), (x_valid, y_valid)],
    eval_name=['train', 'valid'],
    eval_metric=['auc','balanced_accuracy'],
    max_epochs=200, patience=60,
    batch_size=512, virtual_batch_size=512,
    num_workers=0,
    weights=1,
    drop_last=False
)

epoch 0  | loss: 0.79994 | train_auc: 0.49309 | train_balanced_accuracy: 0.50797 | valid_auc: 0.50705 | valid_balanced_accuracy: 0.4895  |  0:00:00s
epoch 1  | loss: 0.73207 | train_auc: 0.51385 | train_balanced_accuracy: 0.5     | valid_auc: 0.54505 | valid_balanced_accuracy: 0.5     |  0:00:00s
epoch 2  | loss: 0.64193 | train_auc: 0.53823 | train_balanced_accuracy: 0.5     | valid_auc: 0.56616 | valid_balanced_accuracy: 0.5     |  0:00:00s
epoch 3  | loss: 0.6062  | train_auc: 0.53728 | train_balanced_accuracy: 0.5     | valid_auc: 0.59036 | valid_balanced_accuracy: 0.5     |  0:00:00s
epoch 4  | loss: 0.6256  | train_auc: 0.55509 | train_balanced_accuracy: 0.50041 | valid_auc: 0.59098 | valid_balanced_accuracy: 0.49825 |  0:00:00s
epoch 5  | loss: 0.57788 | train_auc: 0.57539 | train_balanced_accuracy: 0.55091 | valid_auc: 0.60416 | valid_balanced_accuracy: 0.53378 |  0:00:00s
epoch 6  | loss: 0.53997 | train_auc: 0.60368 | train_balanced_accuracy: 0.53503 | valid_auc: 0.67727 | va

