In [1]:
from tensorflow.keras.models import load_model
from gensim.models import KeyedVectors  # Or another embedding library you used
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Load the saved model
lstm_model = load_model('lstm_model.keras')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  saveable.load_own_variables(weights_store.get(inner_path))


In [2]:
# Charger les embeddings GloVe
def load_glove_embeddings(file_path, vector_size=300):
    embeddings = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.split()
            word = parts[0]
            vector = np.array(parts[1:], dtype=np.float32)
            embeddings[word] = vector
    return embeddings

glove_file = "glove.6B/glove.6B.300d.txt"  # Chemin vers le fichier GloVe
glove_embeddings = load_glove_embeddings(glove_file)

In [3]:
def preprocess_input(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    
    # Lowercase
    text = text.lower()
    # Remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenize
    words = nltk.word_tokenize(text)
    # Remove stopwords and lemmatize
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return words


In [4]:
# Set parameters
max_sequence_length = 66
# Transformer les mots en séquences d'embeddings avec padding
def words_to_embedding_sequence(words, embeddings, max_sequence_length):
    vectors = [embeddings[word] if word in embeddings else np.zeros(len(next(iter(embeddings.values())))) for word in words]
    while len(vectors) < max_sequence_length:
        vectors.append(np.zeros(len(next(iter(embeddings.values())))))
    return vectors[:max_sequence_length]

In [5]:
def predict_feeling(phrase, model, embeddings, max_sequence_length):
    # Preprocess the input
    words = preprocess_input(phrase)
    
    # Convert to embeddings
    embedded_sequence = words_to_embedding_sequence(words, embeddings, max_sequence_length)
    
    # Add batch dimension
    embedded_sequence = np.expand_dims(embedded_sequence, axis=0)  # Shape: (1, max_sequence_length, embedding_dim)
    
    # Predict with the model
    prediction = model.predict(embedded_sequence)
    
    # Map the prediction to class labels
    class_labels = ['sadness', 'joy', 'love', 'anger', 'fear']
    predicted_class = class_labels[np.argmax(prediction)]
    
    return predicted_class, prediction


In [None]:
if __name__ == "__main__":

    print("Enter a phrase to predict its feeling (or 'exit' to quit):")
    while True:
        user_input = input("Phrase: ")
        if user_input.lower() == 'exit':
            break
        predicted_class, prediction_scores = predict_feeling(user_input, lstm_model, glove_embeddings, max_sequence_length)
        print(f"Predicted feeling: {predicted_class}")
        print(f"Confidence scores: {prediction_scores}")
