## **Import Libraries**

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import json
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout



lemmatizer = WordNetLemmatizer()

## **Téléchargement des Ressources Nécessaires**

In [15]:

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /usr/share/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

## **Chargement et Préparation des Données**

In [16]:
# Load dataset
with open('/kaggle/input/dataset/KB.json', 'r') as f:
    data = json.load(f)

# Create DataFrame
df = pd.DataFrame(data['intents'])
df = df.explode('patterns').reset_index(drop=True)
df = df.rename(columns={'patterns': 'Questions'})
df = df.explode('responses').reset_index(drop=True)
df = df.rename(columns={'responses': 'Responses'})
df

Unnamed: 0,tag,Questions,Responses,response
0,greeting,Hi,Hello there. Tell me how are you feeling today?,
1,greeting,Hi,Hi there. What brings you here today?,
2,greeting,Hi,Hi there. How are you feeling today?,
3,greeting,Hi,Great to see you. How do you feel currently?,
4,greeting,Hi,Hello there. Glad to see you're back. What's g...,
...,...,...,...,...
864996,fact-29,How do I know if I'm unwell?,"If your beliefs , thoughts , feelings or behav...",
864997,fact-30,How can I maintain social connections? What if...,"A lot of people are alone right now, but we do...",
864998,fact-31,What's the difference between anxiety and stress?,Stress and anxiety are often used interchangea...,
864999,fact-32,What's the difference between sadness and depr...,"Sadness is a normal reaction to a loss, disapp...",


In [18]:
df.head()

Unnamed: 0,tag,Questions,Responses
0,greeting,Hi,Hello there. Tell me how are you feeling today?
1,greeting,Hi,Hi there. What brings you here today?
2,greeting,Hi,Hi there. How are you feeling today?
3,greeting,Hi,Great to see you. How do you feel currently?
4,greeting,Hi,Hello there. Glad to see you're back. What's g...


## **Nettoyage et Prétraitement des Données**

In [19]:
# Text cleaning function
def clean_text(text):
    text = re.sub(r'[^A-Za-z\s]', '', text)  # Remove special characters and digits
    text = text.lower()  # Convert to lowercase
    return text


In [20]:
# Clean 'Questions' and 'Responses' columns
df['Questions'] = df['Questions'].apply(clean_text)
df['Responses'] = df['Responses'].apply(clean_text)

## **Encodage et Vectorisation**

In [21]:
# Encode tags
label_encoder = LabelEncoder()
df['tag'] = label_encoder.fit_transform(df['tag'])

In [22]:
# Vectorize questions
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['Questions'])

In [23]:
# Labels
y = df['tag']


## **Division des Données en Ensembles d'Entraînement et de Test**

In [24]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert sparse matrices to dense arrays
X_train_dense = X_train.toarray()
X_test_dense = X_test.toarray()

## **Création , Compilation et Entraînement du Modèle**

In [25]:
# Build DNN model
model = Sequential()
model.add(Dense(128, input_dim=X_train_dense.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_dense, y_train, epochs=5, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m21609/21609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 5ms/step - accuracy: 0.9104 - loss: 0.3733
Epoch 2/5
[1m21609/21609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 5ms/step - accuracy: 0.9919 - loss: 0.0269
Epoch 3/5
[1m21609/21609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 5ms/step - accuracy: 0.9944 - loss: 0.0186
Epoch 4/5
[1m21609/21609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 5ms/step - accuracy: 0.9949 - loss: 0.0167
Epoch 5/5
[1m21609/21609[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 5ms/step - accuracy: 0.9953 - loss: 0.0154


<keras.src.callbacks.history.History at 0x7f488ac46a10>

## **Évaluation et Prédiction**

In [26]:
from sklearn.metrics import  accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# Prédictions sur les données de test
y_pred = model.predict(X_test_dense)
y_pred_classes = np.argmax(y_pred, axis=1)

# Précision du modèle
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Précision du modèle : {accuracy:.2f}')

# Évaluation du modèle
loss, accuracy = model.evaluate(X_test_dense, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}') 



[1m5403/5403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step
Précision du modèle : 1.00
[1m5403/5403[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9990 - loss: 0.0058
Loss: 0.004179623909294605, Accuracy: 0.9988893270492554


## **Tester le Modèle**

In [27]:
# Fonction de test du modèle avec sélection aléatoire de la réponse
def test_model(model, question, df):
    # Nettoyer et vectoriser la question
    cleaned_question = clean_text(question)
    vectorized_question = tfidf.transform([cleaned_question])

    # Prédire le tag
    predicted_probabilities = model.predict(vectorized_question)
    predicted_tag_index = np.argmax(predicted_probabilities, axis=1)[0]
    predicted_tag = label_encoder.inverse_transform([predicted_tag_index])[0]

  
    return predicted_tag

In [28]:
import random
def get_random_response(tag):
    for intent in data["intents"]:
        if intent["tag"] == tag:
            responses = intent["responses"]
            return random.choice(responses)
    return None


In [29]:

# Exemple d'utilisation
tag = "greeting"
response = get_random_response(tag)
print(response)

Hello! What can I help you with?


In [34]:
question="who are you ?"
tag = test_model(model, question, df)
response = get_random_response(tag)
print(response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
I'm A Freind, here to provide support and guidance on your emotional journey.


In [30]:
question="HI"
tag = test_model(model, question, df)
response = get_random_response(tag)
print(response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
Hello! What can I help you with?


In [31]:
question="I am very sad"
tag = test_model(model, question, df)
response = get_random_response(tag)
print(response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
I'm here to listen. What's been troubling you?


In [35]:
question="help me ?"
tag = test_model(model, question, df)
response = get_random_response(tag)
print(response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
No problem, I'm here to help. What's the matter?


In [36]:
question="help me ?"
tag = test_model(model, question, df)
response = get_random_response(tag)
print(response)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Of course, just tell me what you need assistance with.
