<a href="https://colab.research.google.com/github/alibelhrak/python_chatbot_scratch/blob/main/Cha_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk




In [2]:
import nltk
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to /root/nltk_data...


True

# Importing Libraries

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout   , Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import categorical_crossentropy
import nltk
from nltk.stem import WordNetLemmatizer


# Preparing intentes

In [4]:
intents_data={
  "intents": [
    {
      "tag": "greeting",
      "patterns": ["Hello", "Hi", "Hey", "What's up", "Good morning"],
      "responses": ["Hello!", "Good day!", "Hey there!", "Hi! How can I assist you?"]
    },
    {
      "tag": "goodbye",
      "patterns": ["Bye", "Goodbye", "See you later", "Take care"],
      "responses": ["Goodbye!", "See you soon!", "Take care!", "Have a great day!"]
    },
    {
      "tag": "thanks",
      "patterns": ["Thank you", "Thanks", "Appreciate it"],
      "responses": ["You're welcome!", "Glad I could help!", "Anytime!"]
    },
    {
      "tag": "help",
      "patterns": ["Can you help me?", "I need assistance", "Help me"],
      "responses": ["Sure! What do you need help with?", "I'm here to assist!", "Tell me what you need help with."]
    },
    {
      "tag": "weather",
      "patterns": ["What's the weather like?", "Is it raining today?", "Tell me the forecast"],
      "responses": ["I can check the weather for you!", "Do you want today's or this week's forecast?", "Let me get that info for you."]
    },
    {
      "tag": "joke",
      "patterns": ["Tell me a joke", "Make me laugh", "Do you know any jokes?"],
      "responses": ["Why don’t skeletons fight each other? They don’t have the guts!", "Why did the scarecrow win an award? Because he was outstanding in his field!"]
    }
  ]
}


In [5]:
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [6]:
lemmatizer = WordNetLemmatizer()
words=[]
classes=[]
documents=[]
ignore_symbols = ['.','?' , ';' , ':' , '!' , '%']
from nltk.tokenize import word_tokenize

for intent in intents_data["intents"]:
    for pattern in intent["patterns"]:
        words_list = word_tokenize(pattern)
        words.append(words_list)
        documents.append((words_list, intent["tag"]))

        if intent["tag"] not in classes:
            classes.append(intent["tag"])

print("Documents:", documents)

Documents: [(['Hello'], 'greeting'), (['Hi'], 'greeting'), (['Hey'], 'greeting'), (['What', "'s", 'up'], 'greeting'), (['Good', 'morning'], 'greeting'), (['Bye'], 'goodbye'), (['Goodbye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Take', 'care'], 'goodbye'), (['Thank', 'you'], 'thanks'), (['Thanks'], 'thanks'), (['Appreciate', 'it'], 'thanks'), (['Can', 'you', 'help', 'me', '?'], 'help'), (['I', 'need', 'assistance'], 'help'), (['Help', 'me'], 'help'), (['What', "'s", 'the', 'weather', 'like', '?'], 'weather'), (['Is', 'it', 'raining', 'today', '?'], 'weather'), (['Tell', 'me', 'the', 'forecast'], 'weather'), (['Tell', 'me', 'a', 'joke'], 'joke'), (['Make', 'me', 'laugh'], 'joke'), (['Do', 'you', 'know', 'any', 'jokes', '?'], 'joke')]


In [7]:
words = [lemmatizer.lemmatize(word) for sublist in words for word in sublist if word not in ignore_symbols]
words = list(sorted(set(words)))
classes = list(sorted(set(classes)))


# saving files

In [8]:
import pickle

with open('words.pkl' , 'wb') as f:
  pickle.dump(words , f)

with open('classes.pkl' , 'wb') as f:
  pickle.dump(classes , f)

# Trianing Data

In [9]:
import random
training = []
output_empty = [0] * len(classes)

for document in documents:
  bag=[]
  word_patterns = document[0]
  word_patterns =[lemmatizer.lemmatize(word) for word in word_patterns]
  for word in words:
    bag.append(1) if word in word_patterns else bag.append(0)
  outputrow = list(output_empty)
  outputrow[classes.index(document[1])] = 1
  training.append([bag  , outputrow])

random.shuffle(training)
training = np.array(training, dtype=object)
train_x = list(training[: , 0])
train_y = list(training[: , 1])

# Building the model

In [14]:
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(len(train_x[0]),)))
model.add(Dropout(0.25))
model.add(Dense(64 , activation = 'relu'))
model.add(Dropout(0.25))
model.add(Dense(len(train_y[0]) , activation = 'softmax'))

sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer = sgd ,  metrics=['accuracy'] , loss='categorical_crossentropy' )

In [16]:
model.fit(np.array(train_x) , np.array(train_y) , epochs=200 , batch_size =5 , verbose=0)

<keras.src.callbacks.history.History at 0x7a6b06024090>

# Building ChatBot

In [None]:
def clean_up(sentence):
  sentence_word = word_tokenize(sentence)
  sentence_word = [lemmatizer.lemmatize(word) for word in sentence_word if word not in ignore_symbols]
  return sentence_word

def bag_of_words(sentence):
  sentence_word = clean_up(sentence)
  bag=[0]* len(words)
  for w in sentence_word:
    for i, word in enumerate(words):
      if word == w:
        bag[i] =1
  return np.array(bag)

def predict_class(sentence):
    bow = bag_of_words(sentence)
    predicted_word = model.predict(np.array([bow]))[0]
    ERROR_THRESHOLD = 0.8
    results = [[i, r] for i, r in enumerate(predicted_word) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)

    return_list = []
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})

    return return_list


def get_responses(intent_list , intent_json):
  tag = intent_list[0]['intent']
  list_of_intents = intents_data['intents']
  for i in list_of_intents:
    if i['tag'] ==tag:
      result =random.choice(i['responses'])
  return result

while True:
    message = input("Vous: ")
    ints = predict_class(message)
    res = get_responses(ints, intents_data)
    print("Chatbot:", res)