<a href="https://colab.research.google.com/github/franklinroosevelth/deep_learning/blob/main/classification_texte_exercice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importation des packages

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import models, layers
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#Chargement des données

In [None]:
max_features = 1000  # Nombre de mots à considérer comme features
max_len =500 # Couper les textes après ce ,ombre de mots
(X_train, y_train), (X_test, y_test)=imdb.load_data(num_words=max_features)
# (X_train, y_train), (X_test, y_test)=([],[]),([],[])

#Afficher les codes en texte

In [None]:
word_index = imdb.get_word_index()
#Inverser le dictionnaire qui mappe les indices aux mots
reverse_word_index = {value:key for (key, value) in word_index.items()}
# Fonction pour convertir une critique de film de la forme d'indices à la forme de mots
def decode_review(text):
  return ' '.join([reverse_word_index.get(i-3, '?') for i in text])

decode_review = decode_review(X_train[0])
print("Critique de film décodée : ")
print(decode_review)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
Critique de film décodée : 
? this film was just brilliant casting ? ? story direction ? really ? the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same ? ? as myself so i loved the fact there was a real ? with this film the ? ? throughout the film were great it was just brilliant so much that i ? the film as soon as it was released for ? and would recommend it to everyone to watch and the ? ? was amazing really ? at the end it was so sad and you know what they say if you ? at a film it must have been good and this definitely was also ? to the two little ? that played the ? of ? and paul they were just brilliant children are often left out of the ? ? i think because the stars that play them all ? up are such a big ? for the whole film but these children are amazing and should be ? for what th

In [None]:
# Padding de textes
# Cette étape ajuste toutes les séquences de textes à une longueur fixe définie par 'max_len'
# Le padding ajoute des zéros au début des séquences plus courtes pour atteindre 'max_len'
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [None]:
# Exemple de séquencage de texte
sequences = [
    [1,2,3],
    [4,5,6,7,78],
    [8,9]
]
max_len = 5
sequences = pad_sequences(sequences, maxlen= max_len)
print(sequences)

[[ 0  0  1  2  3]
 [ 4  5  6  7 78]
 [ 0  0  0  8  9]]


## Construction du modèle de deep learning

In [None]:
model = models.Sequential()
model.add(layers.Embedding(max_features, 128, input_length=max_len))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 500, 128)          128000    
                                                                 
 flatten_4 (Flatten)         (None, 64000)             0         
                                                                 
 dense_8 (Dense)             (None, 128)               8192128   
                                                                 
 dense_9 (Dense)             (None, 1)                 129       
                                                                 
Total params: 8320257 (31.74 MB)
Trainable params: 8320257 (31.74 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
