# Problem Statement
### chatbot to assist in simulated police investigations by processing natural language queries and generating context-aware responses using provided and AI-generated situational data.  

### **About Me**  
I am Ruthravarshan S, a final-year student of Artificial Intelligence and Data Science at K S Rangasamy College of Technology (KSRCT). My interests lie in Data Science, Machine Learning, and the Internet of Things (IoT). I actively work on innovative projects that integrate AI with real-world applications, such as developing intelligent systems for various domains, including simulations and research-focused initiatives.

In [1]:
import json
import string
import random
import nltk
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Downloading necessary NLTK data
nltk.download("punkt")
nltk.download("wordnet")
nltk.download("stopwords")

# Initializing the lemmatizer and stopwords
lm = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
# Loading the dataset
with open("Data.json") as file:
    data = json.load(file)

# Preprocessing variables
newWords = []
documentX = []
documentY = []
ourClasses = []

# Tokenization and lemmatization
for intent in data["ourIntents"]:
    for pattern in intent["patterns"]:
        words = nltk.word_tokenize(pattern.lower())
        words = [lm.lemmatize(w) for w in words if w not in string.punctuation and w not in stop_words]
        newWords.extend(words)
        documentX.append(pattern)
        documentY.append(intent["tag"])
    if intent["tag"] not in ourClasses:
        ourClasses.append(intent["tag"])

newWords = sorted(set(newWords))
ourClasses = sorted(set(ourClasses))


In [7]:
# Bag-of-Words and output encoding
trainingData = []
output_empty = [0] * len(ourClasses)

for idx, doc in enumerate(documentX):
    bag = [1 if word in lm.lemmatize(doc.lower()) else 0 for word in newWords]
    output_row = list(output_empty)
    output_row[ourClasses.index(documentY[idx])] = 1
    trainingData.append([bag, output_row])

# Shuffling and converting to NumPy arrays
random.shuffle(trainingData)
trainingData = np.array(trainingData, dtype=object)

x = np.array(list(trainingData[:, 0]))
y = np.array(list(trainingData[:, 1]))


In [9]:
# Defining the neural network
model = Sequential()
model.add(Dense(128, input_shape=(len(x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(len(y[0]), activation="softmax"))

# Compiling the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
print(model.summary())

# Training the model
model.fit(x, y, epochs=200, batch_size=5, verbose=1)

# Saving the model
model.save("chatbot_model.h5")

# Saving the vocabulary and classes
import pickle
pickle.dump(newWords, open("words.pkl", "wb"))
pickle.dump(ourClasses, open("classes.pkl", "wb"))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None
Epoch 1/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.0025 - loss: 6.2541
Epoch 2/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0127 - loss: 6.0913
Epoch 3/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0194 - loss: 5.6496
Epoch 4/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0524 - loss: 5.2261
Epoch 5/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1165 - loss: 4.6766
Epoch 6/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1769 - loss: 4.1808
Epoch 7/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2414 - loss: 3.6505
Epoch 8/200
[1m408/408[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.2657 - loss: 3.3945
Epoch 9/200
[1m408/408[0m



In [11]:
# Function to clean user input
def clean_up_sentence(sentence):
    words = nltk.word_tokenize(sentence.lower())
    return [lm.lemmatize(w) for w in words if w not in string.punctuation and w not in stop_words]

# Function to convert user input to bag-of-words
def bow(sentence, words, show_details=False):
    sentence_words = clean_up_sentence(sentence)
    bag = [1 if w in sentence_words else 0 for w in words]
    if show_details:
        print(f"Bag-of-Words: {bag}")
    return np.array(bag)


In [13]:
# Classify user input
def classify_local(sentence):
    bag_data = bow(sentence, newWords, show_details=False)
    predictions = model.predict(np.array([bag_data]))[0]
    threshold = 0.25
    results = [[i, prob] for i, prob in enumerate(predictions) if prob > threshold]
    results.sort(key=lambda x: x[1], reverse=True)
    return [{"intent": ourClasses[i], "probability": str(prob)} for i, prob in results]

# Generate chatbot response
def response(sentence):
    intents = classify_local(sentence)
    if intents:
        tag = intents[0]["intent"]
        for i in data["ourIntents"]:
            if i["tag"] == tag:
                return random.choice(i["responses"])
    return "I don't understand!"


In [None]:
# Running the chatbot
print("Chatbot is running! Type 'quit' to exit.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        print("Goodbye!")
        break
    print(f"Bot: {response(user_input)}")


Chatbot is running! Type 'quit' to exit.


You:  hi


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Bot: Hey


You:  Why are you sweating?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Bot: I’ve never been in a situation like this.


You:  What do you know about witness?


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Bot: Please elaborate on witness.


You:  Do you have a name


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Bot: You can call me Chatbot.


You:  Tell me about coding


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Bot: Coding is the process of writing instructions for computers to execute tasks. It powers software, websites, and apps.


You:  exit


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Bot: I am a chatbot


# NOTE
### The data used in this project is provided and supplemented with situational data entirely generated by AI for the purpose of police investigation simulations. It does not represent real-world scenarios or actual cases, and all responses are designed for experimental and educational purposes only.
## Given dataset is also added , I am currently working on the dataset to make it much effective for my problem statement.
