## Install Libraries

In [None]:
!pip install -qU langchain_community
!pip install -qU langchain_huggingface
!pip install -qU faiss-gpu #Use for GPU
# !pip install -qU faiss-cpu #Use for CPU
!pip install -qU evaluate
!pip install -qU rouge_score

## Import Libraries

In [13]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from langchain.docstore.document import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

## Bhagvad Gita

In [14]:
df_gita = pd.read_csv("https://raw.githubusercontent.com/atmabodha/Vedanta_Datasets/refs/heads/main/Bhagwad_Gita/Bhagwad_Gita_Verses_English_Questions.csv")
df_gita = df_gita.rename(columns={'translation':'answer'})
# df_gita = df_gita[['question','answer','chapter','verse']]
print(df_gita.shape)
df_gita.head()

(700, 6)


Unnamed: 0,chapter,verse,speaker,sanskrit,answer,question
0,1,1,धृतराष्ट्र,धर्मक्षेत्रे कुरुक्षेत्रे समवेता युयुत्सवः| मा...,"Dhritarashtra said, ""What did my people and th...",How does the Gita start?
1,1,2,सञ्जय,दृष्ट्वा तु पाण्डवानीकं व्यूढं दुर्योधनस्तदा| ...,Sanjaya said: Having seen the army of the Pand...,"In the Mahabharata war, whom did Duryodhana fi..."
2,1,3,सञ्जय,पश्यैतां पाण्डुपुत्राणामाचार्य महतीं चमूम्| व्...,"Behold, O Teacher! This mighty army of the son...","What did Duryodhana say to his teacher, Drona?"
3,1,4,सञ्जय,अत्र शूरा महेष्वासा भीमार्जुनसमा युधि| युयुधान...,"Here are heroes, mighty archers, equal in batt...",Which warriors from the Pandava army did Duryo...
4,1,5,सञ्जय,धृष्टकेतुश्चेकितानः काशिराजश्च वीर्यवान्| पुरु...,"Dhrishtaketu, Chekitana, the valiant king of K...",Who was the king from Kashi who fought in the ...


In [15]:
df_gita = df_gita.astype(str)

In [16]:
to_embed_dict_gita = df_gita[['question','answer','chapter','verse']].to_dict('records')
to_metadata_dict_gita = df_gita.to_dict('records')

## RAG chain for Bhagvad Gita

In [17]:
docs_gita=[]
for i in range(len(to_embed_dict_gita)):
    to_embed_gita = to_embed_dict_gita[i]
    to_metadata_gita = to_metadata_dict_gita[i]
    to_embed_gita = "\n".join(f"{i}: {j}" for i,j in to_embed_gita.items())
    newDoc_gita=Document(page_content=to_embed_gita,metadata=to_metadata_gita)
    docs_gita.append(newDoc_gita)

In [18]:
print(docs_gita[0].metadata)
print(docs_gita[0].page_content)

{'chapter': '1', 'verse': '1', 'speaker': 'धृतराष्ट्र', 'sanskrit': 'धर्मक्षेत्रे कुरुक्षेत्रे समवेता युयुत्सवः| मामकाः पाण्डवाश्चैव किमकुर्वत सञ्जय  || 1.1 || ', 'answer': 'Dhritarashtra said, "What did my people and the sons of Pandu do when they had assembled together, eager for battle, on the holy plain of Kurukshetra, O Sanjaya?"', 'question': 'How does the Gita start?'}
question: How does the Gita start?
answer: Dhritarashtra said, "What did my people and the sons of Pandu do when they had assembled together, eager for battle, on the holy plain of Kurukshetra, O Sanjaya?"
chapter: 1
verse: 1


In [19]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits_gita = text_splitter.split_documents(docs_gita)

In [20]:
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [21]:
df_vectorstore_gita = FAISS.from_documents(documents=all_splits_gita, embedding=embeddings)

In [22]:
df_retriever_gita = df_vectorstore_gita.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [23]:
df_retrieved_docs_gita = df_retriever_gita.invoke("How does the Gita start?")

In [24]:
for i in df_retrieved_docs_gita:
    print(i.page_content)
    print(i.metadata)

question: How does the Gita start?
answer: Dhritarashtra said, "What did my people and the sons of Pandu do when they had assembled together, eager for battle, on the holy plain of Kurukshetra, O Sanjaya?"
chapter: 1
verse: 1
{'chapter': '1', 'verse': '1', 'speaker': 'धृतराष्ट्र', 'sanskrit': 'धर्मक्षेत्रे कुरुक्षेत्रे समवेता युयुत्सवः| मामकाः पाण्डवाश्चैव किमकुर्वत सञ्जय  || 1.1 || ', 'answer': 'Dhritarashtra said, "What did my people and the sons of Pandu do when they had assembled together, eager for battle, on the holy plain of Kurukshetra, O Sanjaya?"', 'question': 'How does the Gita start?', 'start_index': 0}
question: By whose grace did Sanjaya get to hear the Gita?
answer: Through the grace of Vyasa, I have heard this supreme and most secret Yoga, directly from Krishna, the Lord of Yoga, Himself declaring it.
chapter: 18
verse: 75
{'chapter': '18', 'verse': '75', 'speaker': 'सञ्जय', 'sanskrit': 'व्यासप्रसादाच्छ्रुतवानेतद्गुह्यमहं परम्| योगं योगेश्वरात्कृष्णात्साक्षात्कथयतः स्वय

## Patanjali Yoga Sutras

In [25]:
df_yoga = pd.read_csv("https://raw.githubusercontent.com/atmabodha/Vedanta_Datasets/refs/heads/main/Patanjali_Yoga_Sutras/Patanjali_Yoga_Sutras_Verses_English_Questions.csv")
df_yoga  = df_yoga .rename(columns={'translation':'answer'})
# df_yoga  = df_yoga [['question','answer','chapter','verse']]
print(df_yoga .shape)
df_yoga .head()

(195, 5)


Unnamed: 0,chapter,verse,sanskrit,answer,question
0,1,1,अथ योगानुशासनम्,"Now, the teachings of yoga are presented since...",When does Yoga commence? When should I start d...
1,1,2,योगश्चित्तवृत्तिनिरोधः,The purpose of yoga is to quieten the fluctuat...,What is the purpose of Yoga?
2,1,3,तदा द्रष्टुः स्वरूपेऽवस्थानम्,"Once the state of Yoga is achieved, pure consc...",What is the end result of Yoga practice?
3,1,4,वृत्तिसारूप्यमितरत्र,"Till the state of Yoga is achieved, consciousn...",What is the usual state of our awareness? How ...
4,1,5,वृत्तयः पञ्चतय्यः क्लिष्टाक्लिष्टाः,These mental patterns can be classified into f...,What are the different types of thoughts? How ...


In [26]:
df_yoga  = df_yoga.astype(str)

In [27]:
to_embed_dict_yoga = df_yoga[['question','answer','chapter','verse']].to_dict('records')
to_metadata_dict_yoga = df_yoga.to_dict('records')

## RAG chain for Patanjali Yoga Sutras

In [28]:
docs_yoga=[]
for i in range(len(to_embed_dict_yoga)):
    to_embed_yoga = to_embed_dict_yoga[i]
    to_metadata_yoga = to_metadata_dict_yoga[i]
    to_embed_yoga = "\n".join(f"{i}: {j}" for i,j in to_embed_yoga.items())
    newDoc_yoga=Document(page_content=to_embed_yoga,metadata=to_metadata_yoga)
    docs_yoga.append(newDoc_yoga)

In [29]:
print(docs_yoga[0].metadata)
print(docs_yoga[0].page_content)

{'chapter': '1', 'verse': '1', 'sanskrit': 'अथ योगानुशासनम्', 'answer': 'Now, the teachings of yoga are presented since the student is ready to received these teachings.', 'question': 'When does Yoga commence? When should I start doing Yoga?'}
question: When does Yoga commence? When should I start doing Yoga?
answer: Now, the teachings of yoga are presented since the student is ready to received these teachings.
chapter: 1
verse: 1


In [30]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits_yoga = text_splitter.split_documents(docs_yoga)

In [31]:
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [32]:
df_vectorstore_yoga = FAISS.from_documents(documents=all_splits_yoga, embedding=embeddings)

In [33]:
df_retriever_yoga = df_vectorstore_yoga.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [34]:
df_retrieved_docs_yoga = df_retriever_yoga.invoke("What is the purpose of Yoga?")

In [35]:
for i in df_retrieved_docs_yoga:
    print(i.page_content)

question: What is the purpose of Yoga?
answer: The purpose of yoga is to quieten the fluctuations of the chitta (mind).
chapter: 1
verse: 2
question: What are the methods to achieve the goal of Yoga? What are the methods to quieten the fluctuations of the mind? What are the means to calm your thoughts? What is the importance of practice and dispassion?
answer: In order to quiet the fluctuations of the mind, one needs - practice and dispassion.
chapter: 1
verse: 12
question: What does a (successful) practice of Kriya Yoga leads to? What are the effects of Kriya Yoga? What are the fruits of Kriya Yoga? How can we eliminate suffering?
answer: The aim of this path of action is to eliminate the roots of suffering and accomplish samadhi* (an advanced meditative state).
chapter: 2
verse: 2


## Microsof Phi-3.5-mini-instruct

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

# messages = [
#     {"role": "system", "content": "You are a helpful AI assistant."},
#     {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
#     {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
#     {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
# ]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

# generation_args = {
#     "max_new_tokens": 500,
#     "return_full_text": False,
#     "temperature": 0.0,
#     "do_sample": False,
# }

# output = pipe(messages, **generation_args)
# print(output[0]['generated_text'])

config.json:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

## ROUGE score calculation for accuracy

In [36]:
import evaluate
rouge = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

## Master Class

In [37]:
class bot:
    def __init__(self,prompt):
        self.prompt = prompt

    def invoke_response(self):
        generation_args = {
      "max_new_tokens": self.max_tokens,
      "return_full_text": False,
      # "temperature": 0.01,
      # "do_sample": True,
        }
        output = pipe(self.messages, **generation_args)
        self.response = output[0]['generated_text']

    def chapter_verse_extractor(self):
        self.messages=[
            {"role": "system", "content": f"""You are a helpful AI assistant who extracts chapter,verse and the source from the user's questions.
            ###Example:
            question: What is chapter 1 verse 45 of patanjali yoga sutra?
            answer: chapter:1,verse:45,source:patanjali yoga sutra

            question: What is chapter 2 verse 10 of bhagvad gita?
            answer: chapter:2,verse:10,source:bhagvad gita

            question: What are the various paths to realisation?
            answer: not applicable
            
            """},
            {"role": "user","content": f"question: {self.prompt}\nanswer: "}]
        self.max_tokens = 25
        self.invoke_response()

    def prompt_classifier(self):
        self.df_retrieved_docs_gita = df_retriever_gita.invoke(self.prompt)
        self.gita_top_result = [i.metadata for i in self.df_retrieved_docs_gita][0]
        self.gita_top_result.update({'correct_answer':self.gita_top_result['answer']})
        self.qa_results_gita_all = [i.page_content for i in self.df_retrieved_docs_gita]
        self.qa_results_gita = "\n\n".join(self.qa_results_gita_all)

        self.df_retrieved_docs_yoga = df_retriever_yoga.invoke(self.prompt)
        self.yoga_top_result = [i.metadata for i in self.df_retrieved_docs_yoga][0]
        self.yoga_top_result.update({'correct_answer':self.yoga_top_result['answer']})
        self.qa_results_yoga_all = [i.page_content for i in self.df_retrieved_docs_yoga]
        self.qa_results_yoga = "\n\n".join(self.qa_results_yoga_all)

        self.messages = [
        {"role": "system", "content": f"""You are a helpful AI assistant who answers only Gita or Yoga related questions.
        If the question is similar to the following question: {self.gita_top_result['question']}, answer a.
        If the question is relevant to Bhagvad Gita, Hinduism, Spirituality and the characters in Mahabharatha, answer a.
        If the question is similar to the following question: {self.yoga_top_result['question']}, answer b.
        If the question is relevant to Patanjali Yoga Sutras, answer b.
        If the question is not relevant to Bhagvad Gita or Patanjali Yoga Sutras, answer c."""},
    	{"role": "user","content": f"question:{self.prompt}\n\nanswer:"
    	}]
        self.max_tokens = 1
        self.invoke_response()

    def gita_relevant_prompt(self):
        self.messages = [
        {"role": "system", "content": """You are a helpful AI assistant who answers only Gita related questions.
        For the following question, answer in less than 100 words as a religious scholar along with sanskrit slogas explicityly without mentioning you are a religious scholar"""},
         	{"role": "user","content": f"Context:\n{self.qa_results_gita}\n\nQuestion:{self.prompt}\n\nAnswer:"
         	}]
        self.max_tokens = 512
        # print('Relevant to Bhagvad Gita')
        self.invoke_response()

    def yoga_relevant_prompt(self):
        self.messages = [
        {"role": "system", "content": """You are a helpful AI assistant who answers only Patanjali Yoga Sutras related questions.
        For the following question, answer in less than 100 words as an expert in Patanjali Yoga Sutras"""},
         	{"role": "user","content": f"Context:\n{self.qa_results_yoga}\n\nQuestion:{self.prompt}\n\nAnswer:"
         	}]
        self.max_tokens = 512
        # print('Relevant to Patanjali Yoga Sutras')
        self.invoke_response()

    def non_gita_relevant_prompt(self):
        self.messages = [
        {"role": "system", "content": """You are a helpful AI assistant who answers only Gita related questions.
        The following question is not relevant to Bhagvad Gita, answer of its non relevancy to Gita or Patanjali Yoga Sutras with appropriate explanation in less than 100 words."""},
         	{"role": "user","content": f"Question:{self.prompt}\n\nAnswer:"
         	}]
        self.max_tokens = 128
        # print('Not Relevant to Bhagvad Gita or Yoga Sutras')
        self.invoke_response()

    def run(self):
        self.final_result = {}
        self.chapter_verse_extractor()
        self.response = self.response[self.response.find("answer: ")+len("answer: "):self.response.find("\n\n")]
        if "not applicable" not in self.response:
            try:
                self.response = self.response.split(",")
                if len(self.response)==3:
                    chapter = self.response[0].split(":")[-1]
                    verse = self.response[1].split(":")[-1]
                    source = self.response[2].split(":")[-1]
                    if 'gita' in source.lower():
                        df_result = df_gita.loc[(df_gita.chapter==chapter)&(df_gita.verse==verse)]
                    elif 'yoga' in source.lower():
                        df_result = df_yoga.loc[(df_yoga.chapter==chapter)&(df_yoga.verse==verse)]
                    self.final_result = df_result.to_dict(orient='records')
                    self.final_result = self.final_result[0]
            except:
                self.response = "not applicable"
                
        if 'not applicable' in self.response:
            self.prompt_classifier()
            if self.response.strip()=='a':
              self.gita_relevant_prompt()
              self.final_result.update(self.gita_top_result)
              self.final_result.update({'long_answer':self.response})
              self.final_result.update({'answer_accuracy':rouge.compute(predictions=[self.final_result['answer']],references=[self.final_result['correct_answer']])})
              self.final_result.update({'long_answer_accuracy':rouge.compute(predictions=[self.final_result['long_answer']],references=[self.final_result['correct_answer']])})
            elif self.response.strip()=='b':
              self.yoga_relevant_prompt()
              self.final_result.update(self.yoga_top_result)
              self.final_result.update({'long_answer':self.response})
              self.final_result.update({'answer_accuracy':rouge.compute(predictions=[self.final_result['answer']],references=[self.final_result['correct_answer']])})
              self.final_result.update({'long_answer_accuracy':rouge.compute(predictions=[self.final_result['long_answer']],references=[self.final_result['correct_answer']])})
            else:
                self.non_gita_relevant_prompt()
                self.final_result.update({'long_answer':self.response})

        return self.final_result

In [39]:
prompt = input("Enter your prompts here: ")
x = bot(str(prompt))
final_result = x.run()
final_result

Enter your prompts here:  Sanskrit sloga for chapter 3 verse 22 of bhagavad gita


{'chapter': '3',
 'verse': '22',
 'speaker': 'भगवान',
 'sanskrit': 'न मे पार्थास्ति कर्तव्यं त्रिषु लोकेषु किञ्चन| नानवाप्तमवाप्तव्यं वर्त एव च कर्मणि || 3.22 ||  ',
 'answer': 'There is nothing in the three worlds, O Arjuna, that needs to be done by Me, nor is there anything unattained that needs to be attained; yet I engage Myself in action.',
 'question': 'Is God also bound by any duties towards the world?'}