In [1]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
import re
import numpy as np
import pandas as pd
import pickle
import numpy.linalg as lin
import unicodedata
import warnings 
from sklearn.cluster import KMeans
stop_words = set(stopwords.words('english'))
warnings.filterwarnings('ignore')

In [2]:
file=open('sentence.pkl','rb')
verse_embeddings_sentence=pickle.load(file)
file.close()
file=open('max.pkl','rb')
verse_embeddings_max=pickle.load(file)
file.close()
file=open('mean.pkl','rb')
verse_embeddings_mean=pickle.load(file)
file.close()

In [3]:
file=open('whole.pkl','rb')
verse_embeddings_whole=pickle.load(file)
file.close()

In [4]:
file=open('glove.pkl','rb')
glove=pickle.load(file)
file.close()

In [5]:
def remove_special_characters(text):
    regex = re.compile('[^a-zA-Z\s]')
    text_returned = re.sub(regex,' ',text)
    return text_returned

In [6]:
def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

In [7]:
def word_embedding(word):
    word=word.lower()
    if word in glove.keys():
        return np.array(glove[word],dtype=np.float64)
    return np.array([0.0]*100)

In [8]:
def cosine_similarity(a,b):
    try:
        return a.dot(b)/(lin.norm(a)*lin.norm(b))
    except:
        return -1.0

In [9]:
def ed(a,b):
    return lin.norm(a-b)

In [10]:
def sentence_embedding(sentence):
    sentence=strip_accents(sentence)
    sentence=remove_special_characters(sentence)
    words=word_tokenize(sentence)
    if len(words)>0:
        words=[word  for word in words if word not in stop_words]
        sentence_embedding=[word_embedding(word) for word in words]
        return np.array(list(map(lambda x: sum(x)/len(x), zip(*sentence_embedding))))
    return np.array([0]*100)

In [11]:
data=pd.read_csv('../English.csv')

In [12]:
query='Importance of bhagavad gita'
query_embedding=sentence_embedding(query)
query_embedding_list=query_embedding.reshape(1,-1).astype(np.float64)

# Max Strategy

## Cosine Similarity

In [13]:
def max_cosine(query_embedding,counter=10):
    scoring=[cosine_similarity(query_embedding,i) for i in verse_embeddings_max]
    verse_index=np.argsort(scoring)[-1::-1][:counter]
    return verse_index

In [14]:
verse_index=max_cosine(query_embedding)

In [15]:
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
583,18,64,सर्वगुह्यतमं भूय: श‍ृणु मे परमं वच: ।इष्टोऽसि ...,The Lord has given Arjuna knowledge that is co...
276,7,30,साधिभूताधिदैवं मां साधियज्ञं च ये विदु: ।प्रया...,Persons acting in Kṛṣṇa consciousness are neve...
29,1,46,सञ्जय उवाचएवमुक्त्वार्जुनः संख्ये रथोपस्थ उपाव...,"While observing the situation of his enemy, Ar..."
540,18,11,न हि देहभृता शक्यं त्यक्तुं कर्माण्यशेषत: ।यस्...,It is said in Bhagavad-gītā that one can never...
68,2,39,एषा तेऽभिहिता सांख्ये बुद्धिर्योगे त्विमां श‍ृ...,"According to the Nirukti, or the Vedic diction..."
373,10,40,नान्तोऽस्ति मम दिव्यानां विभूतीनां परन्तप ।एष ...,"As stated in the Vedic literature, although th..."
124,3,26,न बुद्धिभेदं जनयेदज्ञानां कर्मसङ्गिनाम् ।जोषये...,Vedaiś ca sarvair aham eva vedyaḥ. That is the...
487,15,15,सर्वस्य चाहं हृदि सन्निविष्टोमत्त: स्मृतिर्ज्ञ...,The Supreme Lord is situated as Paramātmā in e...
586,18,67,इदं ते नातपस्काय नाभक्ताय कदाचन ।न चाश‍ुश्रूषव...,Persons who have not undergone the austerities...
474,14,27,ब्रह्मणो हि प्रतिष्ठाहममृतस्याव्ययस्य च ।शाश्व...,"The constitution of Brahman is immortality, im..."


## Euclidean Distance

In [16]:
def max_ed(query_embedding,counter=10):
    scoring=[ed(query_embedding,i) for i in verse_embeddings_max]
    verse_index=np.argsort(scoring)[:counter]
    return verse_index

In [17]:
verse_index=max_ed(query_embedding)

In [18]:
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
29,1,46,सञ्जय उवाचएवमुक्त्वार्जुनः संख्ये रथोपस्थ उपाव...,"While observing the situation of his enemy, Ar..."
373,10,40,नान्तोऽस्ति मम दिव्यानां विभूतीनां परन्तप ।एष ...,"As stated in the Vedic literature, although th..."
289,8,13,ॐ इत्येकाक्षरं ब्रह्म व्याहरन्मामनुस्मरन् ।य: ...,"It is clearly stated here that oṁ, Brahman and..."
374,10,41,यद्यद्विभूतिमत्सत्त्वं श्रीमदूर्जितमेव वा ।तत्...,Any glorious or beautiful existence should be ...
586,18,67,इदं ते नातपस्काय नाभक्ताय कदाचन ।न चाश‍ुश्रूषव...,Persons who have not undergone the austerities...
395,11,35,सञ्जय उवाचएतच्छ्रुत्वा वचनं केशवस्यकृताञ्जलिर्...,"As we have already explained, because of the s..."
583,18,64,सर्वगुह्यतमं भूय: श‍ृणु मे परमं वच: ।इष्टोऽसि ...,The Lord has given Arjuna knowledge that is co...
222,6,19,यथा दीपो निवातस्थो नेङ्गते सोपमा स्मृता ।योगिन...,"A truly Kṛṣṇa conscious person, always absorbe..."
391,11,21,अमी हि त्वां सुरसङ्घा विशन्तिकेचिद्भ‍ीता: प्रा...,The demigods in all the planetary systems fear...
276,7,30,साधिभूताधिदैवं मां साधियज्ञं च ये विदु: ।प्रया...,Persons acting in Kṛṣṇa consciousness are neve...


# Mean Pooling

## Cosine Similarity

In [19]:
def mean_cosine(query_embedding,counter=10):
    scoring=[cosine_similarity(query_embedding,i) for i in verse_embeddings_mean]
    verse_index=np.argsort(scoring)[-1::-1][:counter]
    return verse_index

In [20]:
verse_index=mean_cosine(query_embedding)

In [21]:
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
587,18,68,य इदं परमं गुह्यं मद्भ‍क्तेष्वभिधास्यति ।भक्ति...,Generally it is advised that Bhagavad-gītā be ...
586,18,67,इदं ते नातपस्काय नाभक्ताय कदाचन ।न चाश‍ुश्रूषव...,Persons who have not undergone the austerities...
347,10,14,सर्वमेतदृतं मन्ये यन्मां वदसि केशव ।न हि ते भग...,Arjuna herein confirms that persons of faithle...
589,18,72,कच्च‍िदेतच्छ्रुतं पार्थ त्वयैकाग्रेण चेतसा ।कच...,The Lord was acting as the spiritual master of...
142,4,1,श्रीभगवानुवाचइमं विवस्वते योगं प्रोक्तवानहमव्य...,Herein we find the history of the Bhagavad-gīt...
530,18,1,सन्न्यासस्य महाबाहो तत्त्वमिच्छामि वेदितुम् ।त...,Actually the Bhagavad-gītā is finished in seve...
179,4,38,न हि ज्ञानेन सदृशं पवित्रमिह विद्यते ।तत्स्वयं...,"When we speak of transcendental knowledge, we ..."
98,2,72,एषा ब्राह्मी स्थितिःपार्थ नैनां प्राप्य विमुह्...,One can attain Kṛṣṇa consciousness or divine l...
358,10,25,महर्षीणां भृगुरहं गिरामस्म्येकमक्षरम् ।यज्ञाना...,"Brahmā, the first living creature within the u..."
592,18,75,व्यासप्रसादाच्छ्रुतवानेतद्‍गुह्यमहं परम् ।योगं...,"Vyāsa was the spiritual master of Sañjaya, and..."


## Euclidean Distance

In [22]:
def mean_ed(query_embedding,counter=10):
    scoring=[ed(query_embedding,i) for i in verse_embeddings_mean]
    verse_index=np.argsort(scoring)[:counter]
    return verse_index

In [23]:
verse_index=mean_ed(query_embedding)

In [24]:
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
587,18,68,य इदं परमं गुह्यं मद्भ‍क्तेष्वभिधास्यति ।भक्ति...,Generally it is advised that Bhagavad-gītā be ...
586,18,67,इदं ते नातपस्काय नाभक्ताय कदाचन ।न चाश‍ुश्रूषव...,Persons who have not undergone the austerities...
347,10,14,सर्वमेतदृतं मन्ये यन्मां वदसि केशव ।न हि ते भग...,Arjuna herein confirms that persons of faithle...
589,18,72,कच्च‍िदेतच्छ्रुतं पार्थ त्वयैकाग्रेण चेतसा ।कच...,The Lord was acting as the spiritual master of...
358,10,25,महर्षीणां भृगुरहं गिरामस्म्येकमक्षरम् ।यज्ञाना...,"Brahmā, the first living creature within the u..."
527,17,24,तस्माद् ॐ इत्युदाहृत्य यज्ञदानतप:क्रिया: ।प्रव...,Oṁ tad viṣṇoḥ paramaṁ padam (Ṛg Veda 1.22.20)....
142,4,1,श्रीभगवानुवाचइमं विवस्वते योगं प्रोक्तवानहमव्य...,Herein we find the history of the Bhagavad-gīt...
592,18,75,व्यासप्रसादाच्छ्रुतवानेतद्‍गुह्यमहं परम् ।योगं...,"Vyāsa was the spiritual master of Sañjaya, and..."
243,6,44,पूर्वाभ्यासेन तेनैव ह्रियते ह्यवशोऽपि स: ।जिज्...,Advanced yogīs are not very much attracted to ...
530,18,1,सन्न्यासस्य महाबाहो तत्त्वमिच्छामि वेदितुम् ।त...,Actually the Bhagavad-gītā is finished in seve...


# Line Matching

## Cosine Similarity

In [25]:
def line_cosine(query_embeddings,counter=10):
    scoring=[]
    for embeddings in verse_embeddings_sentence:
        score=[cosine_similarity(query_embedding,embed) for embed in embeddings]
        scoring.append(max(score))
    verse_index=np.argsort(scoring)[-1::-1][:counter]
    return verse_index

In [26]:
verse_index=line_cosine(query_embedding)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
265,7,19,बहूनां जन्मनामन्ते ज्ञानवान्मां प्रपद्यते ।वास...,"The living entity, while executing devotional ..."
0,1,1,धृतराष्ट्र उवाचधर्मक्षेत्रे कुरुक्षेत्रे समवेत...,Bhagavad-gītā is the widely read theistic scie...
542,18,13,पञ्चैतानि महाबाहो कारणानि निबोध मे ।सांख्ये कृ...,A question may be raised that since any activi...
346,10,11,तेषामेवानुकम्पार्थमहमज्ञानजं तम: ।नाशयाम्यात्म...,When Lord Caitanya was in Benares promulgating...
595,18,78,यत्र योगेश्वर: कृष्णो यत्र पार्थो धनुर्धर: ।तत...,The Bhagavad-gītā began with an inquiry of Dhṛ...
36,2,7,कार्पण्यदोषोपहतस्वभावःपृच्छामि त्वां धर्मसम्मू...,By nature’s own way the complete system of mat...
37,2,8,न हि प्रपश्यामि ममापनुद्याद् -यच्छोकमुच्छोषणमि...,Although Arjuna was putting forward so many ar...
531,18,2,काम्यानां कर्मणां न्यासं सन्न्यासं कवयो विदु: ...,The performance of activities for results has ...
587,18,68,य इदं परमं गुह्यं मद्भ‍क्तेष्वभिधास्यति ।भक्ति...,Generally it is advised that Bhagavad-gītā be ...
413,11,55,मत्कर्मकृन्मत्परमो मद्भ‍क्त: सङ्गवर्जित: ।निर्...,Anyone who wants to approach the supreme of al...


## Euclidean Distance

In [27]:
def line_ed(query_embeddings,counter=10):
    scoring=[]
    for embeddings in verse_embeddings_sentence:
        score=[ed(query_embedding,embed) for embed in embeddings]
        scoring.append(max(score))
    verse_index=np.argsort(scoring)[:counter]
    return verse_index

In [28]:
verse_index=line_ed(query_embedding)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
373,10,40,नान्तोऽस्ति मम दिव्यानां विभूतीनां परन्तप ।एष ...,"As stated in the Vedic literature, although th..."
289,8,13,ॐ इत्येकाक्षरं ब्रह्म व्याहरन्मामनुस्मरन् ।य: ...,"It is clearly stated here that oṁ, Brahman and..."
29,1,46,सञ्जय उवाचएवमुक्त्वार्जुनः संख्ये रथोपस्थ उपाव...,"While observing the situation of his enemy, Ar..."
395,11,35,सञ्जय उवाचएतच्छ्रुत्वा वचनं केशवस्यकृताञ्जलिर्...,"As we have already explained, because of the s..."
222,6,19,यथा दीपो निवातस्थो नेङ्गते सोपमा स्मृता ।योगिन...,"A truly Kṛṣṇa conscious person, always absorbe..."
391,11,21,अमी हि त्वां सुरसङ्घा विशन्तिकेचिद्भ‍ीता: प्रा...,The demigods in all the planetary systems fear...
374,10,41,यद्यद्विभूतिमत्सत्त्वं श्रीमदूर्जितमेव वा ।तत्...,Any glorious or beautiful existence should be ...
93,2,67,इन्द्रियाणां हि चरतां यन्मनोऽनुविधीयते ।तदस्य ...,Unless all of the senses are engaged in the se...
527,17,24,तस्माद् ॐ इत्युदाहृत्य यज्ञदानतप:क्रिया: ।प्रव...,Oṁ tad viṣṇoḥ paramaṁ padam (Ṛg Veda 1.22.20)....
552,18,23,नियतं सङ्गरहितमरागद्वेषत: कृतम् ।अफलप्रेप्सुना...,"Regulated occupational duties, as prescribed i..."


# Whole Part

## Cosine Similarity

In [29]:
def whole_cosine(query_embedding,counter=10):
    scoring=[cosine_similarity(query_embedding,i) for i in verse_embeddings_whole]
    verse_index=np.argsort(scoring)[-1::-1][:counter]
    return verse_index

In [30]:
verse_index=whole_cosine(query_embedding)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
587,18,68,य इदं परमं गुह्यं मद्भ‍क्तेष्वभिधास्यति ।भक्ति...,Generally it is advised that Bhagavad-gītā be ...
347,10,14,सर्वमेतदृतं मन्ये यन्मां वदसि केशव ।न हि ते भग...,Arjuna herein confirms that persons of faithle...
586,18,67,इदं ते नातपस्काय नाभक्ताय कदाचन ।न चाश‍ुश्रूषव...,Persons who have not undergone the austerities...
589,18,72,कच्च‍िदेतच्छ्रुतं पार्थ त्वयैकाग्रेण चेतसा ।कच...,The Lord was acting as the spiritual master of...
98,2,72,एषा ब्राह्मी स्थितिःपार्थ नैनां प्राप्य विमुह्...,One can attain Kṛṣṇa consciousness or divine l...
358,10,25,महर्षीणां भृगुरहं गिरामस्म्येकमक्षरम् ।यज्ञाना...,"Brahmā, the first living creature within the u..."
592,18,75,व्यासप्रसादाच्छ्रुतवानेतद्‍गुह्यमहं परम् ।योगं...,"Vyāsa was the spiritual master of Sañjaya, and..."
243,6,44,पूर्वाभ्यासेन तेनैव ह्रियते ह्यवशोऽपि स: ।जिज्...,Advanced yogīs are not very much attracted to ...
530,18,1,सन्न्यासस्य महाबाहो तत्त्वमिच्छामि वेदितुम् ।त...,Actually the Bhagavad-gītā is finished in seve...
142,4,1,श्रीभगवानुवाचइमं विवस्वते योगं प्रोक्तवानहमव्य...,Herein we find the history of the Bhagavad-gīt...


## Euclidean Distance

In [31]:
def whole_ed(query_embedding,counter=10):
    scoring=[cosine_similarity(query_embedding,i) for i in verse_embeddings_whole]
    verse_index=np.argsort(scoring)[:counter]
    return verse_index

In [32]:
verse_index=whole_ed(query_embedding)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
204,5,23,शक्न‍ोतीहैव य: सोढुं प्राक्शरीरविमोक्षणात् ।का...,If one wants to make steady progress on the pa...
66,2,37,हतो वा प्राप्स्यसि स्वर्ग जित्वा वा भोक्ष्यसे ...,Even though there was no certainty of victory ...
463,14,12,लोभ: प्रवृत्तिरारम्भ: कर्मणामशम: स्पृहा ।रजस्य...,One in the mode of passion is never satisfied ...
6,1,10,अपर्याप्त‍ं तदस्माकं बलं भीष्माभिरक्षितम् ।पर्...,Herein an estimation of comparative strength i...
7,1,11,अयनेषु च सर्वेषु यथाभागवमस्थिताः ।भीष्ममेवाभिर...,"Duryodhana, after praising the prowess of Bhīṣ..."
326,9,23,येऽप्यन्यदेवताभक्ता यजन्ते श्रद्धयान्विता: ।ते...,“Persons who are engaged in the worship of dem...
538,18,9,कार्यमित्येव यत्कर्म नियतं क्रियतेऽर्जुन ।सङ्ग...,Prescribed duties must be performed with this ...
13,1,23,योत्स्यमानानवेक्षेऽहं य एतेऽत्र समागताः ।धार्त...,It was an open secret that Duryodhana wanted t...
369,10,36,द्यूतं छलयतामस्मि तेजस्तेजस्विनामहम् ।जयोऽस्मि...,There are many kinds of cheaters all over the ...
545,18,16,तत्रैवं सति कर्तारमात्मानं केवलं तु य: ।पश्यत्...,A foolish person cannot understand that the Su...


# Clustering

## Max Pooling

In [33]:
kmeans_max = KMeans(n_clusters=60, random_state=0).fit(verse_embeddings_max)

In [34]:
def kmeans_max_cluster(query_embedding_list):
    label=kmeans_max.predict(query_embedding_list)[0]
    verse_index=[i for i in range(len(data)) if kmeans_max.labels_[i]==label]
    return verse_index

In [35]:
verse_index=kmeans_max_cluster(query_embedding_list)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
4,1,8,भवान्भीष्मश्च कर्णश्च कृपश्च समितिंजयः ।अश्वत्...,Duryodhana mentions the exceptional heroes in ...
29,1,46,सञ्जय उवाचएवमुक्त्वार्जुनः संख्ये रथोपस्थ उपाव...,"While observing the situation of his enemy, Ar..."
93,2,67,इन्द्रियाणां हि चरतां यन्मनोऽनुविधीयते ।तदस्य ...,Unless all of the senses are engaged in the se...
116,3,18,नैव तस्य कृतेनार्थो नाकृतेनेह कश्चन ।न चास्य स...,A self-realized man is no longer obliged to pe...
129,3,31,ये मे मतमिदं नित्यमनुतिष्ठन्ति मानवाः ।श्रद्धा...,The injunction of the Supreme Personality of G...
135,3,37,श्री भगवानुवाचकाम एष क्रोध एष रजोगुणसमुद्भ‍वः ...,When a living entity comes in contact with the...
222,6,19,यथा दीपो निवातस्थो नेङ्गते सोपमा स्मृता ।योगिन...,"A truly Kṛṣṇa conscious person, always absorbe..."
226,6,27,प्रशान्तमनसं ह्येनं योगिनं सुखमुत्तमम् ।उपैति ...,Brahma-bhūta is the state of being free from m...
244,6,45,प्रयत्‍नाद्यतमानस्तु योगी संश‍ुद्धकिल्बिष: ।अन...,"A person born in a particularly righteous, ari..."
289,8,13,ॐ इत्येकाक्षरं ब्रह्म व्याहरन्मामनुस्मरन् ।य: ...,"It is clearly stated here that oṁ, Brahman and..."


## Mean Pooling

In [36]:
kmeans_mean = KMeans(n_clusters=60, random_state=0).fit(verse_embeddings_mean)

In [37]:
def kmeans_mean_cluster(query_embedding_list):
    label=kmeans_mean.predict(query_embedding_list)[0]
    verse_index=[i for i in range(len(data)) if kmeans_mean.labels_[i]==label]
    return verse_index

In [38]:
verse_index=kmeans_mean_cluster(query_embedding_list)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
226,6,27,प्रशान्तमनसं ह्येनं योगिनं सुखमुत्तमम् ।उपैति ...,Brahma-bhūta is the state of being free from m...
289,8,13,ॐ इत्येकाक्षरं ब्रह्म व्याहरन्मामनुस्मरन् ।य: ...,"It is clearly stated here that oṁ, Brahman and..."
527,17,24,तस्माद् ॐ इत्युदाहृत्य यज्ञदानतप:क्रिया: ।प्रव...,Oṁ tad viṣṇoḥ paramaṁ padam (Ṛg Veda 1.22.20)....


## Whole

In [39]:
kmeans_whole = KMeans(n_clusters=60, random_state=0).fit(verse_embeddings_whole)

In [40]:
def kmeans_whole_cluster(query_embedding_list):
    label=kmeans_whole.predict(query_embedding_list)[0]
    verse_index=[i for i in range(len(data)) if kmeans_whole.labels_[i]==label]
    return verse_index

In [41]:
verse_index=kmeans_whole_cluster(query_embedding_list)
data.loc[verse_index]

Unnamed: 0,Chapter,Verse,Sloke,Commentary
281,8,5,अन्तकाले च मामेव स्मरन्मुक्त्वा कलेवरम् ।य: प्...,In this verse the importance of Kṛṣṇa consciou...
289,8,13,ॐ इत्येकाक्षरं ब्रह्म व्याहरन्मामनुस्मरन् ।य: ...,"It is clearly stated here that oṁ, Brahman and..."
358,10,25,महर्षीणां भृगुरहं गिरामस्म्येकमक्षरम् ।यज्ञाना...,"Brahmā, the first living creature within the u..."
