# Modelo de ML: Rede Neural LSTM

## Configurações iniciais

In [65]:
# Importações de libraries importantes
import pandas as pd
import numpy as np
import datetime
import pickle as pl
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Embedding, Dense, Dropout

In [66]:
# Carregamento de datasets processados
X_train = pl.load(open('../data/processed/X_train.pkl', 'rb'))
X_test = pl.load(open('../data/processed/X_test.pkl', 'rb'))
Y_train = pl.load(open('../data/processed/Y_train.pkl', 'rb'))
Y_test = pl.load(open('../data/processed/Y_test.pkl', 'rb'))

## Construção do Modelo

In [67]:
modelo = Sequential()

### Camadas de Entrada

In [68]:
# Camada de embedding que transforma as sequências em vetores densos

dimensao_embedding = 32
tamanho_frases = 250
vocabulario_tamanho = 13000

modelo.add(Embedding(vocabulario_tamanho, dimensao_embedding, input_length=tamanho_frases))

### Camadas Escondidas

In [69]:
# Camada LSTM bidirecional para poder relembrar tanto passado --> futuro quanto futuro --> passado

neuronios = 16
modelo.add(Bidirectional(LSTM(neuronios)))

# Camada dropout

modelo.add(Dropout(0.1))

### Camadas de Saída

In [70]:
# Camada densa que passa a saída (output)
modelo.add(Dense(16, activation='relu'))
modelo.add(Dense(1, activation='sigmoid'))

## Sumário do Modelo

In [71]:
modelo.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 250, 32)           416000    
                                                                 
 bidirectional_7 (Bidirectio  (None, 32)               6272      
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_10 (Dense)            (None, 16)                528       
                                                                 
 dense_11 (Dense)            (None, 1)                 17        
                                                                 
Total params: 422,817
Trainable params: 422,817
Non-trainable params: 0
________________________________________________

## Compilação e Treinamento do Modelo 

In [72]:
modelo.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [60]:
now = datetime.datetime.now()

modelo_ = modelo.fit(X_train, Y_train, epochs=2, validation_data=(X_test, Y_test), verbose=3, batch_size=5)

now2 = datetime.datetime.now()
time = now2 - now

print(f"O tempo que levou para treinar é de {time}")

Epoch 1/2
Epoch 2/2
O tempo que levou para treinar é de 0:39:29.593908


In [61]:
history = pd.DataFrame(modelo_.history)
history

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.43019,0.808282,0.382392,0.826267
1,0.283262,0.889718,0.283666,0.890667


In [63]:
y_pred = modelo.predict(X_test)
y_pred = np.array(list(map(lambda x: 1 if x > 0.5 else 0, y_pred)))
y_pred



array([0, 0, 1, ..., 1, 1, 0])

In [64]:
from sklearn.metrics import classification_report

print(classification_report(Y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.90      0.89      3755
           1       0.90      0.88      0.89      3745

    accuracy                           0.89      7500
   macro avg       0.89      0.89      0.89      7500
weighted avg       0.89      0.89      0.89      7500

