<a href="https://colab.research.google.com/github/ValentinCord/HandsOnAI_2/blob/main/LSTM_Testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <span> NLP : Évaluation du modèle LSTM/GRU </span>
<hr style="border-bottom: solid;background-color:light;color:black;">

* [Installations](#section-1)
* [Imports](#section-2)
* [Choix des paramètres](#section-3)
* [Lecture des données](#section-4)
* [Preprocessing](#section-5)
* [Chargement du modèle](#section-6)
* [Évaluation du modèle](#section-7)

<a name="section-1"></a>
# <span>1. Installation des packages</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
!/opt/bin/nvidia-smi
!rm -rf sample_data

/bin/bash: /opt/bin/nvidia-smi: No such file or directory


<a name="section-2"></a>
# <span>2. Imports </span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
# basics 
import os
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

# tensorflow
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, GRU
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.layers import Conv1D, MaxPooling1D

# plot 
import matplotlib.pyplot as plt 
import seaborn as sns 

# nltk 
import re
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


<a name="section-3"></a>
# <span>3. Choix des paramètres</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
MAX_SEQ_LEN = 5000

model_path = '/content/drive/MyDrive/HandOnAI_2_NLP/LSTM_model.h5'
tokenizer_path = '/content/drive/MyDrive/HandOnAI_2_NLP/tokenizer.pickle'
test_path = '/content/drive/MyDrive/HandOnAI_2_NLP/fake_test.csv'

<a name="section-4"></a>
# <span>4. Lecture des données</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
df_test = pd.read_csv(test_path)
df_test = df_test.drop(['Unnamed: 0', 'target_name'], axis = 1)

<a name="section-5"></a>
# <span>5. Preprocessing</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

## <span>5.1 Nettoyage de données</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
STOPWORDS = set(stopwords.words('french'))

def clean_text(text):
    text = text.lower() # lowercase text
    text = REPLACE_BY_SPACE_RE.sub(' ', text)
    text = ' '.join(word for word in text.split() if word not in STOPWORDS) # remove stopwors from text
    return text

In [None]:
df_test['data'] = df_test['data'].apply(clean_text)

## <span>5.2 Tokenisation des données</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
X_test = df_test.data.tolist()
y_test = df_test.label.tolist()

test_text_vec = [text for text in X_test]

# tokenize the sentences
with open(tokenizer_path, 'rb') as handle:
    tokenizer = pickle.load(handle)
test_text_vec = tokenizer.texts_to_sequences(test_text_vec)

# pad the sequences
test_text_vec = pad_sequences(test_text_vec, maxlen=MAX_SEQ_LEN)


In [None]:
# One Hot Encode Y values:
encoder = LabelEncoder()

y_test = encoder.fit_transform(df_test['label'].values)
y_test = to_categorical(y_test) 

<a name="section-6"></a>
# <span>6. Chargement du modèle</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
model = load_model(model_path)

<a name="section-7"></a>
# <span>7. Évaluation du modèle</span>
<hr style="border-bottom: solid;background-color:light;color:black;">

In [None]:
test_scores = model.evaluate(test_text_vec, y_test, verbose=1)

print("test scores:", test_scores)

test scores: [0.2265447974205017, 0.9320987462997437]
