In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
import requests
import ollama
import os


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
OLLAMA_API = "http://localhost:11434/api/chat"
MODEL = "Mistral"

In [3]:
# checking if model is available locally
!ollama pull Mistral

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling f5074b1221da: 100% ▕███████████████

In [None]:
# testing ollama chat

messages = {"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Hello, how are you and who are you? what kinda model are you"}

response = ollama.chat(model=MODEL, messages=messages)
print(response['message']['content'])

 Hello! I'm an AI model designed to assist with various tasks and provide information. How can I help you today? I don't have personal feelings or experiences, but I'm here to make your life easier. As for the model, I'm a text-based AI trained by Mistral AI.

If you're looking for something specific or need help with a particular problem, feel free to ask! I'm here to help.


In [5]:
system_prompt = "You are a helpful assistant that provides answer to the question about Dr. B.R. Ambedkar's speeches. And you'll have a context from ambedded documents of ambedkar's speech to help you answer the question. If you don't know the answer, just say that you don't know. Do not try to make up an answer."


In [6]:
messages = [
    {"role": "system", "content": system_prompt}
]

In [7]:
def update_history(role, content):
    messages.append({"role": role, "content": content})

In [8]:
def get_user_input():
    return input("\nEnter your question about Dr. B.R. Ambedkar's speeches (type 'exit' to quit): ")

In [28]:
def ask_ollama():
    response = ollama.chat(model=MODEL, messages=messages)
    answer = response["message"]["content"]

    update_history("assistant", answer)


    print("Answer :", answer)



In [None]:
## For testing purpose 


# def chat_loop():
#     while True:
#         user_prompt = get_user_input()

#         if user_prompt.lower() in ["exit", "quit"]:
#             print("Ending chat. Goodbye!")
#             break

#         # Store user's message
#         update_history("user", user_prompt)

#         # Query model
#         ask_ollama()

In [None]:
# chat_loop()

Ending chat. Goodbye!


## Now we will implement Vector DB

In [None]:
# Create Vector DB from text file

def create_vector_db(text_path="speech.txt", db_dir="db"):
    #  Load text
    loader = TextLoader(text_path)
    documents = loader.load()

    # Split text into chunks
    splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=300,
        chunk_overlap=100
    )
    chunks = splitter.split_documents(documents)
    print("Chunks created:", len(chunks))
    print(chunks[0:6])

    # Embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    # Create vector DB
    vectordb = Chroma.from_documents(
        chunks,
        embedding=embeddings,
        persist_directory=db_dir
    )

    vectordb.persist()
    print(f"Vector DB created & stored at: {db_dir}")

    return vectordb

In [None]:
## Do not run this cell again if you already have db (vector db) as it will create multiple same chunks in the vector DB
## But by mistake you do, just delete db folder and run again

# vectordb = create_vector_db()


Chunks created: 4
[Document(metadata={'source': 'speech.txt'}, page_content='Topic: Annihilation of Caste\nThe real remedy is to destroy the belief in the sanctity of the shastras. \nHow do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible?'), Document(metadata={'source': 'speech.txt'}, page_content='You must take a stand against the scriptures. Either you must stop the practice of caste or you must stop believing in the shastras. \nYou cannot have both. \nThe problem of caste is not a problem of social reform. It is a problem of overthrowing the authority of the shastras.'), Document(metadata={'source': 'speech.txt'}, page_content='So long as people believe in the sanctity of the shastras, they will never be able to get rid of caste. \nThe work of social reform is like the work of a gardener who is constantly pruning the leaves and branches of a tree without ever attacking the roots.'), Document(metadata={'source': 'speech.txt'}, page_cont

  embeddings = HuggingFaceEmbeddings(


Vector DB created & stored at: db


  vectordb.persist()


### Load vector DB

In [13]:
def load_vector_db(db_dir="db"):
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    vectordb = Chroma(
        persist_directory=db_dir,
        embedding_function=embeddings
    )
    return vectordb.as_retriever()

retriever = load_vector_db()

  vectordb = Chroma(


In [19]:
def context_retriever(question, k=2):
    retriever = vectordb.as_retriever(
        search_type="similarity",
        search_kwargs={"k": k}
    )

    docs = retriever.invoke(question)

    # print("\nRetrieved Chunks:")
    # for i, d in enumerate(docs):
    #     print(f"\n--- Chunk {i+1} ---")
    #     print(d.page_content)

    return docs


In [None]:
# Testing context retriever
# context_retriever("What does the speech say is the real enemy?")




Retrieved Chunks:

--- Chunk 1 ---
The real enemy is the belief in the shastras.

--- Chunk 2 ---
Topic: Annihilation of Caste
The real remedy is to destroy the belief in the sanctity of the shastras. 
How do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible?


[Document(metadata={'source': 'speech.txt'}, page_content='The real enemy is the belief in the shastras.'),
 Document(metadata={'source': 'speech.txt'}, page_content='Topic: Annihilation of Caste\nThe real remedy is to destroy the belief in the sanctity of the shastras. \nHow do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible?')]

In [None]:
def chat_loop():
    # to clear previous history everytime new chat starts
    messages.clear()
    messages.append({"role": "system", "content": system_prompt})   
    while True:
        user_question = get_user_input()

        if user_question.lower() in ["exit", "quit"]:
            print("\nEnding chat. Goodbye!")
            break

        print("\nQuestion:", user_question)

        # 1. Retrieve context
        docs = context_retriever(user_question)
        context = "\n".join([doc.page_content for doc in docs])

        # 2. Build final prompt for model
        final_prompt = f"""
            Use ONLY the context to answer the question.

            Context:
            {context}

            Question:
            {user_question}
            
            If answer is not in the context, say "I don't know".
            """


        # 3. Update Contextual user message in history
        update_history("user", final_prompt)

        # 4. Ask Ollama with context-enhanced prompt
        ask_ollama()


1. Direct extraction questions (should answer correctly)

In [24]:
chat_loop()

Question: What does the speech say is the real enemy?
\Answer :   The speech "Annihilation of Caste" identifies the belief in the shastras as the real enemy.
Question: According to Ambedkar, what is the real remedy for caste problems?
\Answer :   According to Ambedkar, the real remedy for caste problems is to destroy the belief in the sanctity of the shastras.
Question: Why does Ambedkar say people cannot get rid of caste?
\Answer :    People cannot get rid of caste because as long as they believe in the sanctity of the shastras. According to Ambedkar, it is a problem of overthrowing the authority of the shastras rather than social reform.
Question: What is the relationship between the shastras and caste?
\Answer :    The relationship between the shastras and caste, as per Ambedkar, is that as long as people believe in the sanctity of the shastras, they will never be able to get rid of caste. He considers it a problem of overthrowing the authority of the shastras rather than social ref

2. Inference-level questions (answerable only from the context)

In [27]:
chat_loop()



Question: Why does Ambedkar say you must stop believing in the shastras?

Answer :    According to the context, Ambedkar says that you must stop believing in the shastras because as long as people believe in their sanctity, they will never be able to get rid of caste. The problem of caste is not a problem of social reform but rather a problem of overthrowing the authority of the shastras. Therefore, he suggests that one must take a stand against the scriptures and either stop the practice of caste or stop believing in the shastras.


Question: What problem does Ambedkar consider deeper than social reform?

Answer :    The problem that Ambedkar considers deeper than social reform is overthrowing the authority of the shastras.


Question: Why can’t someone practice caste and believe in the shastras at the same time?

Answer :    According to the given context, it does not explicitly mention why one cannot practice caste and believe in the shastras at the same time. The text emphasizes 

3. Questions that should return “I don’t know”

In [30]:
chat_loop()


Question: When was Ambedkar born?
Answer :  I don't know. The provided context does not contain information about Dr. B.R. Ambedkar's birth date.

Question: Where did Ambedkar deliver this speech?
Answer :  I don't know. The provided context does not contain information about where Dr. B.R. Ambedkar delivered his speech on Annihilation of Caste.

Question: How long is the entire book ‘Annihilation of Caste’? What reforms did Ambedkar propose in 1940?
Answer :  The provided context does not contain information about the length of the book 'Annihilation of Caste' or the specific reforms proposed by Dr. B.R. Ambedkar in 1940. However, it discusses his views on the need to destroy the belief in the sanctity of the shastras as a way to get rid of caste and the idea that social reform is like pruning the leaves and branches of a tree without attacking the roots. To find out more detailed information about Ambedkar's specific proposals for reform, one would need to read his speech or other r

4. Paraphrased or tricky formulations

In [31]:
chat_loop()


Question: What must be destroyed to remove the foundation of caste, according to the speech?
Answer :  To remove the foundation of caste, according to the speech, one must destroy the belief in the sanctity of the shastras.

Question: Why does Ambedkar think social reform is ineffective?
Answer :  According to the speech, Ambedkar thinks that social reform is ineffective because it only addresses the symptoms of the problem (pruning leaves and branches) without addressing the root cause, which is the belief in the sanctity of the shastras. He argues that the problem of caste is a problem of overthrowing the authority of the shastras, not just a problem of social reform.

Question: What belief prevents society from eliminating caste?
Answer :   The belief that prevents society from eliminating caste is the belief in the sanctity of the shastras.

Question: What contradiction does Ambedkar point out regarding caste practice and belief in the shastras? and Summarize Ambedkar's argument a