In [1]:
import numpy as np
import pandas as pd
import ast
import pymongo
from pymongo import MongoClient
import json
import nltk
# nltk.download('wordnet')
# nltk.download('omw-1.4')
# nltk.download('stopwords')
# nltk.download('punkt')
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from bson import ObjectId
from sklearn.metrics.pairwise import cosine_similarity

# Database Connection

In [2]:
CONNECTION_STRING = "mongodb://prathamrasal:pratham@movies-shard-00-00.3vhge.mongodb.net:27017,movies-shard-00-01.3vhge.mongodb.net:27017,movies-shard-00-02.3vhge.mongodb.net:27017/sihdb?ssl=true&replicaSet=atlas-13vdfc-shard-0&authSource=admin&retryWrites=true&w=majority"
client = MongoClient(CONNECTION_STRING)
db = client['sihdb']
resources = db['resources']
institutes =db['institutes']

In [3]:
resInfo=resources.find()
resInfo= list(resInfo)
resDF = pd.DataFrame(resInfo)
# title, description, cost , reputation point
# resDF.head(2)

In [4]:
insInfo=institutes.find()
insInfo= list(insInfo)
insDF = pd.DataFrame(insInfo)
street = []
state = []
city = []
for i in range(len(insDF)): 
    street.append(insDF.iloc[i].address['street'])
    city.append(insDF.iloc[i].address['city'])
    state.append(insDF.iloc[i].address['state'])
insDF['street'] = street
insDF['city'] = city
insDF['state'] = state
insDF = insDF[['_id','instituteName','aisheCode','naac','street','city','state']]
# insDF.tail(5)

# Data Selection

In [5]:
resDF = pd.merge(resDF,insDF,left_on="lendingInstitute", right_on = "_id", how="inner")
# resDF.tail(3)

In [6]:
resDF = resDF[['_id_x','title','description','cost','city','street','state','naac']]
# resDF.head(5)

# Data Cleaning

In [7]:
resDF.dropna(inplace=True)
resDF.isnull().sum()

_id_x          0
title          0
description    0
cost           0
city           0
street         0
state          0
naac           0
dtype: int64

In [8]:
lematizer = WordNetLemmatizer()
def getCleanText(text):
    text = str(text)
    words = word_tokenize(text)
    wordd = []
    for w in words:
        word = lematizer.lemmatize(w)
        if not word in set(stopwords.words('english')):
            wordd.append(word)
    cleanText = ' '.join(wordd)
    return cleanText

def toLowerCase(text):
    text = str(text)
    return text.lower()

In [9]:
resDF['description'] = resDF['description'].apply(getCleanText)

# Data Processing

In [10]:
resDF['tags'] = ''
# resDF.head(2)

In [11]:
def generateTag():
    resDF['tags'] = resDF['title'] + " " + resDF['description']+ " "  + resDF['cost']+ " " + resDF['city']+ " " +resDF['street']+ " " +resDF['state']+ " " +resDF['naac']
    resDF['tags'] = resDF['tags'].apply(lambda x: x.lower())
# resDF['tags'].head(2)
generateTag()

# Vectorization

In [12]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [13]:
def vectorize(narr):
    vector = cv.fit_transform(narr.astype('str')).toarray()
    similarity = cosine_similarity(vector)
    return similarity    

In [14]:
def recommend_search(instituteId,title):
    instituteId = ObjectId(instituteId)
    institute = institutes.find_one({"_id": instituteId})
    institute = pd.DataFrame(institute)
    city = institute['address']['city']
    street = institute['address']['street']
    state = institute['address']['state']
    naac = institute['naac'][0]
    tags = title + city + street +state + naac
    myDict = {
         "title":title,
         "description":" ", 
         "cost":" ", 
         "city": city,
         "street":street,
         "state":state,
         "naac": naac,
         "tags": tags.lower()
    }
    newDF = resDF.append(myDict,ignore_index = True)
    newarr = newDF['tags'].values
    vector = vectorize(newarr)
    recommended_list = sorted(list(enumerate(vector[-1])),reverse=True,key= lambda x:x[1])
    recList = []
    for i in recommended_list[1:]:
        recList.append(resDF.iloc[i[0]]._id_x)
    return recList

In [15]:
def getInstitute(instituteId):
    instituteId = ObjectId(instituteId)
    institute = institutes.find_one({"_id": instituteId})
    institute = pd.DataFrame(institute)
    city = institute['address']['city']
    street = institute['address']['street']
    state = institute['address']['state']
    naac = institute['naac'][0]
    tags = city + street +state + naac
    myDict = {
         "title":" ",
         "description":" ", 
         "cost":" ", 
         "city": city,
         "street":street,
         "state":state,
         "naac": naac,
         "tags": tags.lower()
    }
    newDF = resDF.append(myDict,ignore_index = True)
    return newDF

In [20]:
# def dashboard(id):
#     newDF = getInstitute(id)
#     newarr = newDF['tags'].values
#     vector = vectorize(newarr)
#     recommended_list = sorted(list(enumerate(vector[-1])),reverse=True,key= lambda x:x[1])
#     recList = []
#     for i in recommended_list[1:]:
#         recList.append(resDF.iloc[i[0]].title)
#     return recList

In [30]:
def dashboard(instituteId):
#     resDF = recommendation_foundation()
    # instituteId = ObjectId(instituteId)
    # instituteId = json.loads(json_util.dumps(instituteId))
    print(instituteId)
    institute = institutes.find_one({"_id": ObjectId(instituteId)})
    print(institute, instituteId)
    institute = pd.DataFrame(institute)
    city = institute['address']['city']
    street = institute['address']['street']
    state = institute['address']['state']
    naac = institute['naac'][0]
    tags = city + street +state + naac
    myDict = {
         "title":" ",
         "description":" ", 
         "cost":" ", 
         "city": city,
         "street":street,
         "state":state,
         "naac": naac,
         "tags": tags.lower()
    }
    newDF = resDF.append(myDict,ignore_index = True)
    newarr = newDF['tags'].values
    vector = vectorize(newarr)
    recommended_list = sorted(list(enumerate(vector[-1])),reverse=True,key= lambda x:x[1])
    recList = []
    for i in recommended_list[1:]:
        recList.append(resDF.iloc[i[0]]._id_x)
    # print(recList)
    return recList

In [31]:
dashboard('62f412466fc0348badda5bab')

62f412466fc0348badda5bab
{'_id': ObjectId('62f412466fc0348badda5bab'), 'instituteName': 'DEVI AHILYA VISHWAVIDYALAYA', 'address': {'street': ' 452001 , Devi Ahilya Vishwavidyala, Nalanda Parisar, R N T Marg', 'city': ' Indore ', 'state': 'Madhya Pradesh'}, 'aisheCode': 'U-0270', 'naac': 'A+', 'reputationPoint': '5.596294454098518', '__v': 0} 62f412466fc0348badda5bab


  newDF = resDF.append(myDict,ignore_index = True)


[ObjectId('62f4155f4d4f5fe937b401a5'),
 ObjectId('62f415ac4d4f5fe937b411c7'),
 ObjectId('62f4157d4d4f5fe937b40805'),
 ObjectId('62f415a44d4f5fe937b41021'),
 ObjectId('62f4156f4d4f5fe937b404f9'),
 ObjectId('62f415a74d4f5fe937b410b5'),
 ObjectId('62f4159c4d4f5fe937b40ead'),
 ObjectId('62f415a34d4f5fe937b40ffd'),
 ObjectId('62f415654d4f5fe937b402e1'),
 ObjectId('62f415ab4d4f5fe937b41179'),
 ObjectId('62f415764d4f5fe937b40691'),
 ObjectId('62f415944d4f5fe937b40d37'),
 ObjectId('62f4155f4d4f5fe937b401d5'),
 ObjectId('62f4156c4d4f5fe937b4047d'),
 ObjectId('62f4157a4d4f5fe937b40757'),
 ObjectId('62f4157f4d4f5fe937b4089f'),
 ObjectId('62f4159e4d4f5fe937b40f13'),
 ObjectId('62f4156b4d4f5fe937b40433'),
 ObjectId('62f415674d4f5fe937b4036b'),
 ObjectId('62f415954d4f5fe937b40d3f'),
 ObjectId('62f415734d4f5fe937b405f7'),
 ObjectId('62f415ae4d4f5fe937b4124b'),
 ObjectId('62f415b54d4f5fe937b413c1'),
 ObjectId('62f4158c4d4f5fe937b40b31'),
 ObjectId('62f415974d4f5fe937b40db9'),
 ObjectId('62f415904d4f5f

In [18]:
# recommend_search('62f412426fc0348badda5ae9',"Aries Gold G 729 S-BK Analog Watch  - For Men, Boys")