## Packages and Assets

In [None]:
import keras.models
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

import seaborn as sns
import json
import matplotlib.pyplot as plt
import numpy as np

from keras.preprocessing.text import tokenizer_from_json

from keras.utils import pad_sequences
import pandas as pd
import re
import string
import nltk


## Dependencies

In [2]:
test_pp = pd.read_csv('../../assets/data/splits/test/preprocessed.csv')
test_pp

Unnamed: 0,title,tags,url,time,label
0,magazine luiza acao indicada investir semana v...,"['Ações', 'Ativa Investimentos', 'BTG Pactual'...",https://www.moneytimes.com.br/magazine-luiza-e...,12:49:00,0
1,ibovespa engata <NUM> alta fecha acima <NUM> m...,"['Azul', 'B2W', 'Banco do Brasil', 'Bradesco',...",https://www.moneytimes.com.br/ibovespa-engata-...,18:10:00,0
2,dividendos petrobras valem risco investir estatal,"['Ações', 'Comprar ou vender?', 'Empresas', 'G...",https://www.moneytimes.com.br/dividendos-da-pe...,13:27:00,1
3,petroleiros norte fluminense aprovam acordo pe...,"['Combustíveis', 'Empresas', 'Petrobras', 'Pet...",https://www.moneytimes.com.br/petroleiros-do-n...,17:02:00,0
4,diretor governanca conformidade petrobras plan...,"['Carreira', 'Empresas', 'Petrobras', 'Petróle...",https://www.moneytimes.com.br/diretor-de-gover...,12:56:00,0
...,...,...,...,...,...
199,ibovespa fecha alta nova york reduz impeto,"['Azul', 'B3', 'Banco Central', 'BR Malls', 'B...",https://www.moneytimes.com.br/ibovespa-fecha-e...,17:10:00,1
200,petrobras reduz diesel gasolina refinarias <NU...,"['Combustíveis', 'Diesel', 'Dólar', 'Gasolina'...",https://www.moneytimes.com.br/petrobras-reduz-...,10:40:00,1
201,ibovespa fecha alta recuperacao wall street al...,"['Bradesco', 'BRF', 'Coronavírus', 'Cyrela', '...",https://www.moneytimes.com.br/ibovespa-fecha-e...,17:07:00,1
202,petrobras utilizara tecnologia reduzir custos ...,"['Combustíveis', 'Empresas', 'Gás', 'Petrobras...",https://www.moneytimes.com.br/petrobras-utiliz...,17:06:00,1


In [None]:
model = keras.models.load_model('../../assets/deep_assets/lstm_model')
model.summary()

In [None]:
# Load the tokenizer from the file
with open('../../assets/deep_assets/tokenizer.json', 'r', encoding='utf-8') as f:
    tokenizer_json = f.read()
    tokenizer = tokenizer_from_json(tokenizer_json)
    word_index = tokenizer.word_index

## Predictions considering best deep model

In [None]:
X_test = test_pp.title
y_test = test_pp.to_numpy()[:, -1]

test_sequences = tokenizer.texts_to_sequences(X_test)
test_padded = pad_sequences(test_sequences, maxlen=8, padding='post', truncating='post')
test_padded

### Functions

In [None]:
def index2word(word_index):
    index_word = {}
    for key in word_index:
        index_word[word_index[key]] = key
    return index_word


def seq2text(seq, index_word):
    text = []
    for index in seq:
        text.append(index_word[index])
    return text

def show_confusion_matrix(cm):
        print("Confusion Matrix")
        plt.figure(figsize=(10, 7))

        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'],
                    yticklabels=['Negative', 'Positive'])
        plt.xlabel('Predicted')
        plt.ylabel('Truth')
        plt.title('Confusion Matrix')
        plt.show()

In [None]:
index_word = index2word(word_index)

In [None]:
pred_prob = model.predict(test_padded)

In [None]:
# y_pred = [1 if p > 0.5 else 0 for p in pred_prob]
y_pred = np.argmax(pred_prob, axis=1)

In [None]:
df_results = pd.DataFrame()
X_test = list(X_test)
y_test = list(y_test)
df_results['sequence'] = test_sequences
df_results['X_test'] = X_test
df_results['seq2text'] = df_results['sequence'].apply(lambda x: seq2text(x, index_word))
df_results['y_pred'] = y_pred
df_results['y_true'] = y_test
df_results

### Metrics

In [None]:
# accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
# precision = precision_score(average='macro', y_true=y_test, y_pred=y_pred)
# recall = recall_score(average='macro', y_true=y_test, y_pred=y_pred)
# f1 = f1_score(average='macro', y_true=y_test, y_pred=y_pred)
# cm = confusion_matrix(y_true=y_test, y_pred=y_pred)

In [None]:
print(classification_report(y_test,y_pred))
cm = confusion_matrix(y_true=y_test, y_pred=y_pred)
show_confusion_matrix(cm)