# Indian Culture Chatbot

This notebook creates and runs a chatbot focused on Indian culture using a neural network model.

In [1]:
%pip install tensorflow keras nltk flask

Note: you may need to restart the kernel to use updated packages.


## 1. Training the Model

First, let's prepare the data and train a model to classify user questions.

In [4]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [6]:
from keras.models import load_model
from flask import Flask, render_template, request
import threading
import pickle
import json
import nltk
nltk.download('popular')
nltk.download('punkt_tab')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import nltk
nltk.download('popular')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import random

# Initialize lists
words = []
classes = []
documents = []
ignore_words = ['?', '!', '.', ',', ':', ';']

# Load the dataset
with open('festivals_data.json', 'r') as f:
    dataset = json.load(f)

# Process the dataset to create categories
# We'll create categories based on the type of question
categories = {
    'cultural_heritage': ['heritage', 'culture', 'unique', 'tradition'],
    'unesco_sites': ['unesco', 'heritage site', 'taj mahal', 'qutub', 'ajanta', 'ellora'],
    'classical_dance': ['dance', 'bharatanatyam', 'kathak', 'odissi', 'kathakali', 'kuchipudi', 'mohiniyattam', 'manipuri'],
    'festivals': ['festival', 'diwali', 'holi', 'eid', 'navratri', 'onam'],
    'architecture': ['architecture', 'temple', 'taj mahal', 'khajuraho', 'hampi'],
    'textiles': ['textile', 'silk', 'cotton', 'saree', 'weaving'],
    'cuisine': ['cuisine', 'food', 'dish', 'spice', 'recipe'],
    'ayurveda': ['ayurveda', 'medicine', 'healing', 'wellness'],
    'music': ['music', 'classical', 'hindustani', 'carnatic', 'raga'],
    'literature': ['literature', 'mahabharata', 'ramayana', 'epic'],
    'regional_food': ['punjab', 'bengal', 'gujarat', 'kerala', 'rajasthan', 'tamil nadu', 'assam', 'maharashtra', 'kashmir', 'andhra'],
    'folk_dance': ['folk dance', 'bihu', 'ghoomar', 'garba']
}

# Process each item in the dataset
for intent in dataset["intents"]:
    for pattern in intent["patterns"]:
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        documents.append((w, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

# Lemmatize and lowercase each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# Sort classes
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique lemmatized words", words)

# Save vocabulary and classes
pickle.dump(words, open('indian_culture_words.pkl', 'wb'))
pickle.dump(classes, open('indian_culture_classes.pkl', 'wb'))

# Debug: Check dataset and preprocessing
print("Dataset keys:", dataset.keys())
print("First 2 items in intents:", dataset["intents"][:2])
print("Type of dataset:", type(dataset))
print("Number of documents:", len(documents))
print("First 2 documents:", documents[:2])
print("Classes:", classes)
print("Words:", words)

# Create training data
training = []
output_empty = [0] * len(classes)

# Create bag of words for each document
for doc in documents:
    bag = []
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in doc[0]]
    
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])


# Shuffle and convert to numpy array
random.shuffle(training)
training = np.array(training, dtype=object)

# Split into features and labels
train_x = [item[0] for item in training]
train_y = [item[1] for item in training]

print("Training data created")
print("Length of training:", len(training))
print("First few items in training:", training[:3])

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\nisha\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]   

57 documents
12 classes ['christmas', 'diwali', 'eid', 'farewell', 'greeting', 'holi', 'indian_clothing', 'indian_culture_general', 'indian_dance', 'indian_food', 'indian_music', 'indian_religion']
76 unique lemmatized words ['a', 'about', 'are', 'bharatanatyam', 'bye', 'carnatic', 'celebrate', 'celebrated', 'christmas', 'classical', 'clothing', 'country', 'cuisine', 'culture', 'dance', 'describe', 'different', 'dish', 'diverse', 'diwali', 'do', 'dress', 'eat', 'eid', 'evening', 'food', 'form', 'good', 'goodbye', 'happens', 'hello', 'hey', 'hi', 'hindu', 'hindustani', 'holi', 'how', 'in', 'india', 'indian', 'instrument', 'is', 'kathak', 'kurta', 'later', 'major', 'make', 'me', 'morning', 'music', 'name', 'of', 'on', 'people', 'popular', 'practiced', 'religion', 'religiously', 'sari', 'see', 'significance', 'some', 'special', 'spicy', 'talk', 'tell', 'the', 'to', 'tradition', 'traditional', 'unique', 'we', 'wear', 'what', 'why', 'you']
Dataset keys: dict_keys(['intents'])
First 2 items 

In [7]:
# Create and train the model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile the model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Train the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

# Save the model
model.save('indian_culture_model.h5')
print("Model created and saved")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.0781 - loss: 2.5434 
Epoch 2/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2355 - loss: 2.3869
Epoch 3/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1652 - loss: 2.3861 
Epoch 4/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3712 - loss: 2.2712 
Epoch 5/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2078 - loss: 2.2770
Epoch 6/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2978 - loss: 2.2130 
Epoch 7/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3506 - loss: 2.1513 
Epoch 8/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.4955 - loss: 1.9430
Epoch 9/200
[1m12/12[0m [32m━━━━━━━━━



Model created and saved


## 2. Running the Chatbot

Now let's create the Flask application to serve the chatbot.

In [12]:

import json
import pickle
from tensorflow.keras.models import load_model
from nltk.stem import WordNetLemmatizer
# any other necessary imports

import pickle
from keras.models import load_model
from flask import Flask, render_template, request
import threading


# Load the trained model and data
model = load_model('indian_culture_model.h5')
words = pickle.load(open('indian_culture_words.pkl', 'rb'))
classes = pickle.load(open('indian_culture_classes.pkl', 'rb'))
with open('festivals_data.json', 'r') as f:
    dataset = json.load(f)

lemmatizer = WordNetLemmatizer()

tag_to_responses = {}
for intent in dataset['intents']:
    tag_to_responses[intent['tag']] = intent['responses']

def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words
def bow(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)
def predict_class(sentence):
    p = bow(sentence, words)
    res = model.predict(np.array([p]), verbose=0)[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list
def get_response(ints, intents_json):
    tag = ints[0]['intent'] if ints else None
    if tag:
        for i in intents_json['intents']:
            if i['tag'] == tag:
                return random.choice(i['responses'])
    return "Sorry, I do not understand."
def chatbot_response(msg):
    ints = predict_class(msg)
    res = get_response(ints, dataset)
    return res

# Create Flask app
app = Flask(__name__)

@app.route("/")
def home():
    return render_template("index.html")  # NOT "templates/index.html"

@app.route("/get")
def get_bot_response():
    user_text = request.args.get('msg')
    return chatbot_response(user_text)

# To run Flask in a notebook:
def run_flask():
    app.run(debug=False, use_reloader=False)

# Start the server in a background thread (run this cell ONCE)
threading.Thread(target=run_flask).start()



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [10/May/2025 19:09:15] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/May/2025 19:09:15] "[36mGET /static/styles/style.css HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [10/May/2025 19:09:30] "GET /get?msg=Hello HTTP/1.1" 200 -
