In [2]:
import tensorflow
from tensorflow import keras

In [9]:
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer





In [None]:
from collections import deque

class MyChatbot:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.corpus = []
        self.history = deque(maxlen=2)  # Store only last 2 queries
        self.history_embeddings = deque(maxlen=2)

    def ingest_text(self, text_list):
        """Builds the search index from provided text strings."""
        self.corpus = text_list
        embeddings = self.model.encode(self.corpus,normalize_embeddings=True)
        self.index = faiss.IndexFlatIP(embeddings.shape[1])
        self.index.add(embeddings.astype('float32'))

    def retrieve(self, user_query, top_k=2):
        # 1. Get embedding for the current query
        current_vec = self.model.encode([user_query],normalize_embeddings=True)[0]
        
        if not self.history:
            final_vec = current_vec
        else:
            # 2. Get average embedding of the history (last 2 queries)
            avg_history_vec = np.mean(self.history_embeddings, axis=0)
            #   high context for shorter queries : why, how , explain etc
            if len(user_query.split()) <= 3:
                final_vec = 0.6 * current_vec + 0.4 * avg_history_vec
            else:
                final_vec = 0.8 * current_vec + 0.2 * avg_history_vec

            final_vec /= np.linalg.norm(final_vec)
            
        # 4. Search with the weighted final vector "final_vec"
        final_vec = np.array([final_vec]).astype('float32')
        distances, indices = self.index.search(final_vec, top_k)
        
        self.history.append(user_query)
        self.history_embeddings.append(current_vec)
        return [self.corpus[idx] for idx in indices[0] if idx != -1]



In [13]:
# --- Testing the Logic ---
bot1 = MyChatbot()
bot1.ingest_text([
    "The office is located in London.",
    "The London office manager is Alice.",
    "Alice's contact email is alice@company.com",
    "The Paris office is managed by Bob."
])

#Query 1
print("A:", bot1.retrieve("Where is the office?")[0])
# Query 2
print("A:", bot1.retrieve("Who is the manager there?")[0])

A: The office is located in London.
A: The London office manager is Alice.


In [14]:
bot1.retrieve("who is manager there?")[0]

'The London office manager is Alice.'

In [34]:
bot1.retrieve("what is his email?")[0]

"Alice's contact email is alice@company.com"

BUILDING A CHATBOT FOR LARGE TEXT  -> CHUNKING

In [36]:
with open("text.txt", "r", encoding="utf-8") as f:
    text = f.read()      # whole file as one string


In [37]:
#NO OF WORD COUNT 
len(text.strip().split())

586

In [22]:
bot = MyChatbot()
bot.ingest_text(text)
#will show error -> feed chunked_text
#token limit < 512 in "all-MiniLM-L6-v2"

IndexError: tuple index out of range

In [23]:
def chunk_text(text, chunk_size=30, overlap=6):
    words = text.split()
    chunks = []
    start = 0

    while start < len(words):
        end = start + chunk_size
        chunks.append(" ".join(words[start:end]))
        start = end - overlap

    return chunks

chunked_text = chunk_text(text)

In [40]:
for ss in chunked_text:
    print(ss)

A long time ago, there was a landlord. He had a big garden at the border of a forest. That garden was very special to him. But it was very
to him. But it was very big, so he could not take care of it. So, he hired a gardener to take care of his garden. The gardener was very
his garden. The gardener was very loyal to his employer. He was also very good at looking after the garden. When summer came, the gardener had to water the plants
gardener had to water the plants every day without missing them. So, he did not get any holiday. But, he knew that if he took even one day off, the
took even one day off, the plant would not get any water and would dry up. Whenever he approached his employer to give him a day off, he said, “I
a day off, he said, “I do not see anyone else doing your work. So tell me, who will water the plants if you take a day off? I cannot
take a day off? I cannot give you a holiday knowing there is no one else to water my plants.” Upon hearing his employer’s reply, the ga

In [38]:
bot.ingest_text(chunked_text)

In [39]:
bot.retrieve("what did the gardener do at his employer's house?")[0]

'to him. But it was very big, so he could not take care of it. So, he hired a gardener to take care of his garden. The gardener was very'

In [41]:
question = "whom did the gardener ask to water plants so that he can go to celebrate festival ?"
bot.retrieve(question)[0]

'his garden. The gardener was very loyal to his employer. He was also very good at looking after the garden. When summer came, the gardener had to water the plants'

In [44]:
question = "what did monkey do?"
bot.retrieve(question)[0]

'day with his family members. But the monkeys were confused about how much water to pour into each plant. So, they enquired about it to their king. The monkey king'

In [None]:
bot.retrieve(question)[1]

"to their king. The monkey king said, “water each plant according to its roots.” When the monkeys asked how they could find the plants' roots, the king replied, “pull them"