In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import warnings
warnings.filterwarnings("ignore")

## Dataset Fetching

In [18]:
import pandas as pd

In [None]:
df = pd.read_csv("Bhagwad_Gita.csv")

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701 entries, 0 to 700
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   ID               701 non-null    object
 1   Chapter          701 non-null    int64 
 2   Verse            701 non-null    int64 
 3   Shloka           701 non-null    object
 4   Transliteration  701 non-null    object
 5   HinMeaning       701 non-null    object
 6   EngMeaning       701 non-null    object
 7   WordMeaning      701 non-null    object
dtypes: int64(2), object(6)
memory usage: 43.9+ KB


In [21]:
df.head()

Unnamed: 0,ID,Chapter,Verse,Shloka,Transliteration,HinMeaning,EngMeaning,WordMeaning
0,BG1.1,1,1,धृतराष्ट्र उवाच |\nधर्मक्षेत्रे कुरुक्षेत्रे स...,dhṛtarāṣṭra uvāca .\ndharmakṣetre kurukṣetre s...,।।1.1।।धृतराष्ट्र ने कहा -- हे संजय ! धर्मभूमि...,1.1 Dhritarashtra said What did my people and...,1.1 धर्मक्षेत्रे on the holy plain? कुरुक्षेत्...
1,BG1.2,1,2,सञ्जय उवाच |\nदृष्ट्वा तु पाण्डवानीकं व्यूढं द...,sañjaya uvāca .\ndṛṣṭvā tu pāṇḍavānīkaṃ vyūḍha...,।।1.2।।संजय ने कहा -- पाण्डव-सैन्य की व्यूह रच...,1.2. Sanjaya said Having seen the army of the...,1.2 दृष्ट्वा having seen? तु indeed? पाण्डवानी...
2,BG1.3,1,3,पश्यैतां पाण्डुपुत्राणामाचार्य महतीं चमूम् |\n...,paśyaitāṃ pāṇḍuputrāṇāmācārya mahatīṃ camūm .\...,।।1.3।।हे आचार्य ! आपके बुद्धिमान शिष्य द्रुपद...,"1.3. ""Behold, O Teacher! this mighty army of t...",1.3 पश्य behold? एताम् this? पाण्डुपुत्राणाम् ...
3,BG1.4,1,4,अत्र शूरा महेष्वासा भीमार्जुनसमा युधि |\nयुयुध...,atra śūrā maheṣvāsā bhīmārjunasamā yudhi .\nyu...,।।1.4।।इस सेना में महान् धनुर्धारी शूर योद्धा ...,"1.4. Here are heroes, mighty archers, eal in b...",1.4 अत्र here? शूराः heroes? महेष्वासाः mighty...
4,BG1.5,1,5,धृष्टकेतुश्चेकितानः काशिराजश्च वीर्यवान् |\nपु...,dhṛṣṭaketuścekitānaḥ kāśirājaśca vīryavān .\np...,"।।1.5।।धृष्टकेतु, चेकितान, बलवान काशिराज, पुर...","1.5. ""Dhrishtaketu, chekitana and the valiant ...",1.5 धृष्टकेतुः Dhrishtaketu? चेकितानः Chekitan...


In [22]:
print(df.columns)

Index(['ID', 'Chapter', 'Verse', 'Shloka', 'Transliteration', 'HinMeaning',
       'EngMeaning', 'WordMeaning'],
      dtype='object')


In [23]:
df.isnull().sum()

ID                 0
Chapter            0
Verse              0
Shloka             0
Transliteration    0
HinMeaning         0
EngMeaning         0
WordMeaning        0
dtype: int64

In [24]:
# See how many verses we actually have
print("Total verses:", len(df))

# Distribution of chapters
df['Chapter'].value_counts().sort_index()

Total verses: 701


Chapter
1     47
2     72
3     43
4     42
5     29
6     47
7     30
8     28
9     34
10    42
11    55
12    20
13    35
14    27
15    20
16    24
17    28
18    78
Name: count, dtype: int64

In [25]:
df.columns = df.columns.str.lower()

# Combine chapter, verse and English meaning for chatbot context
df['full_text'] = (
    "Chapter " + df['chapter'].astype(str) +
    " Verse " + df['verse'].astype(str) + ": " +
    df['engmeaning']
)

print(df['full_text'].head())

0    Chapter 1 Verse 1: 1.1 Dhritarashtra said  Wha...
1    Chapter 1 Verse 2: 1.2. Sanjaya said  Having s...
2    Chapter 1 Verse 3: 1.3. "Behold, O Teacher! th...
3    Chapter 1 Verse 4: 1.4. Here are heroes, might...
4    Chapter 1 Verse 5: 1.5. "Dhrishtaketu, chekita...
Name: full_text, dtype: object


In [None]:
print("Total verses:", len(df))

Total verses: 701


## Embeddings Creation

In [26]:
from sentence_transformers import SentenceTransformer
import numpy as np

In [27]:
model = SentenceTransformer('all-MiniLM-L6-v2')


In [30]:
df['embedding'] = df['full_text'].apply(lambda x: model.encode(str(x), convert_to_numpy=True, show_progress_bar=True))

print("Embeddings created for all verses.")

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 101.34it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 114.07it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 120.80it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 54.15it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 57.87it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 100.91it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 143.53it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 99.91it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 96.04it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 171.49it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.86it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 66.86it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 63.01it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 81.06it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 78.02it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 139.70it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 82.10it/s]
Bat

Embeddings created for all verses.


In [29]:
#storing embeddings for later use
import pickle

with open("gita_embeddings.pkl", "wb") as f:
    pickle.dump(df, f)

print("Embeddings saved to gita_embeddings.pkl")

Embeddings saved to gita_embeddings.pkl


## Vector Storage