# Using data from MongoDB

### Connecting to Mongo

In [1]:
import pymongo
import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

mongo_uri = os.getenv('MONGO_URI')

client = pymongo.MongoClient(mongo_uri)
db = client["TripPlanner"]

### Retrieve data from collections

In [2]:
places_data = db["new_places"].find()
ratings_data = db["ratings"].find()

### Create dataframes using retrieved data

In [3]:
new_places_df = pd.DataFrame(list(places_data))
new_places_df

Unnamed: 0,_id,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,6404770215f2cc1ab48c7fbc,1880294,73.858543,18.518969,Chavhan Shriram Mandir,"religion,hindu_temples,interesting_places"
1,6404770215f2cc1ab48c7fb3,1880288,73.855309,18.520626,Bajirao I statue,"historic,monuments_and_memorials,interesting_p..."
2,6404770215f2cc1ab48c7fb9,5167808,73.857071,18.518755,Kala Datta Mandir,"religion,hindu_temples,interesting_places"
3,6404770215f2cc1ab48c7fba,5686924,73.856079,18.518808,Shrimant Peshwe Ganesh Mandir,"religion,hindu_temples,interesting_places"
4,6404770215f2cc1ab48c7fb6,5167809,73.857101,18.518833,Phani Ali Ganesh Mandir,"religion,hindu_temples,interesting_places"
...,...,...,...,...,...,...
405,6404770215f2cc1ab48c810e,5167799,73.778374,18.626381,Morya Gosavi Temple,"religion,hindu_temples,interesting_places"
406,6404770215f2cc1ab48c8115,1879994,73.806526,18.658690,Vittal Mandir,"religion,hindu_temples,interesting_places"
407,6404770215f2cc1ab48c811a,7989632,73.808846,18.660667,Saraswati Mandir,"religion,hindu_temples,interesting_places"
408,6404770215f2cc1ab48c812a,1879536,73.708687,18.598501,Bajranj Bali Mandir,"religion,hindu_temples,interesting_places"


In [4]:
ratings_df = pd.DataFrame(list(ratings_data))
ratings_df

Unnamed: 0,_id,userid,placeid,rating
0,6404770515f2cc1ab48c8161,1,4982514,1
1,6404770515f2cc1ab48c816a,1,6189253,1
2,6404770515f2cc1ab48c816b,1,7982633,5
3,6404770515f2cc1ab48c816f,1,14698880,2
4,6404770515f2cc1ab48c8188,1,11375902,4
...,...,...,...,...
50574,6404770515f2cc1ab48d46ca,2090,5102357,5
50575,6404770515f2cc1ab48d46cc,2090,6604083,1
50576,6404770515f2cc1ab48d46cd,2090,4906168,1
50577,6404770515f2cc1ab48d46d9,2090,4982507,4


# CBR

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

### Creating TF*IDF matrix for place kinds

In [6]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix_places = tf.fit_transform(new_places_df['features__properties__kinds'])

### Creating dataframe for a particular user

In [7]:
userid = 5

# Selecting all the rows of this userid
user_ratings_df = ratings_df[ratings_df['userid'] == userid]
user_places_df = pd.merge(user_ratings_df, new_places_df, left_on='placeid', right_on='features__id')
user_df = user_places_df[['userid', 'placeid', 'features__properties__name', 'features__properties__kinds', 'rating']]
user_df

Unnamed: 0,userid,placeid,features__properties__name,features__properties__kinds,rating
0,5,5102381,Gurudawara Guru Nanak Darbar,"religion,other_temples,interesting_places",3
1,5,7979616,ISKCON NVCC Temple,"religion,hindu_temples,interesting_places",2
2,5,7978452,Ganesh Mandir,"religion,hindu_temples,interesting_places",2
3,5,5102356,Mahalakshmi temple,"religion,other_temples,interesting_places",2
4,5,7989626,Maruti Mandir,"religion,hindu_temples,interesting_places",1
5,5,1880215,Savarkar Memorial,"historic,monuments_and_memorials,interesting_p...",4
6,5,1880239,Farog E Nabavi Masjid,"religion,other_temples,interesting_places",1
7,5,1880089,Shani Maruti Temple,"religion,hindu_temples,interesting_places",4
8,5,16244529,Kirkee Memorial,"historic,burial_places,interesting_places,war_...",2
9,5,9958520,Kubera Estate,"other,unclassified_objects,interesting_places,...",4


In [8]:
cosine_similarities = linear_kernel(tfidf_matrix_places, tfidf_matrix_places)
results = {}
cosine_similarities

array([[1.        , 0.02165328, 1.        , ..., 1.        , 1.        ,
        0.01628604],
       [0.02165328, 1.        , 0.02165328, ..., 0.02165328, 0.02165328,
        0.00570797],
       [1.        , 0.02165328, 1.        , ..., 1.        , 1.        ,
        0.01628604],
       ...,
       [1.        , 0.02165328, 1.        , ..., 1.        , 1.        ,
        0.01628604],
       [1.        , 0.02165328, 1.        , ..., 1.        , 1.        ,
        0.01628604],
       [0.01628604, 0.00570797, 0.01628604, ..., 0.01628604, 0.01628604,
        1.        ]])

In [9]:
for idx, row in new_places_df.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], new_places_df['features__id'][i]) for i in similar_indices]

    results[row['features__id']] = similar_items[1:]
    
print('done!')
results

done!


{1880294: [(1.0000000000000002, 7989640),
  (1.0000000000000002, 7979443),
  (1.0000000000000002, 5167810),
  (1.0000000000000002, 7982646),
  (1.0000000000000002, 7982642),
  (1.0000000000000002, 1879536),
  (1.0000000000000002, 7978447),
  (1.0000000000000002, 7979617),
  (1.0000000000000002, 1879972),
  (1.0000000000000002, 5686851),
  (1.0000000000000002, 7989628),
  (1.0000000000000002, 7977845),
  (1.0000000000000002, 7982628),
  (1.0000000000000002, 7978454),
  (1.0000000000000002, 7989626),
  (1.0000000000000002, 1880577),
  (1.0000000000000002, 1880137),
  (1.0000000000000002, 5165268),
  (1.0000000000000002, 1880085),
  (1.0000000000000002, 7978449),
  (1.0000000000000002, 5686736),
  (1.0000000000000002, 1879355),
  (1.0000000000000002, 1880093),
  (1.0000000000000002, 11381988),
  (1.0000000000000002, 5686932),
  (1.0000000000000002, 7982639),
  (1.0000000000000002, 1879983),
  (1.0000000000000002, 7982640),
  (1.0000000000000002, 1880201),
  (1.0000000000000002, 9958522),


In [10]:
def item(id):
    return new_places_df.loc[new_places_df['features__id'] == id]['features__properties__name'].tolist()[0]

In [11]:
# set to avoid duplicate recommedations
recs_set = set()
# print(type(set))

In [12]:
def recommend(item_id, num):
    recs = results[item_id][:num]
    for rec in recs:
        if rec[1] not in recs_set:
            recs_set.add(rec[1])
            print("Recommended: " +
                  item(rec[1]) + " (score:" + str(rec[0]) + ")" + 'id: ', (rec[1]))

In [13]:
for idx, row in user_df.iterrows():
    recs_set.add(row['placeid'])
    # Only making recommendations for places rated 4 or above by user
    if row['rating'] >= 4:
        print(f"PLACES RECOMMENDED FOR {row['features__properties__name']}:")
        recommend(row['placeid'], 5)
        print()

PLACES RECOMMENDED FOR Savarkar Memorial:
Recommended: Buddha Statue (score:1.0000000000000004)id:  1879370
Recommended: Pramod Mahale (score:1.0000000000000004)id:  1880314
Recommended: Hutatma Chaphekar Bandhu Memorial (score:1.0000000000000004)id:  1880347
Recommended: General Arunkumar Vaidya (score:1.0000000000000004)id:  1880420
Recommended: National War Memorial Southern Command (score:1.0000000000000004)id:  7299651

PLACES RECOMMENDED FOR Shani Maruti Temple:
Recommended: Shri Mahadev Mandir (score:1.0000000000000002)id:  7989640
Recommended: Lakerya Maruti (score:1.0000000000000002)id:  7979443
Recommended: Dulya Maruti (score:1.0000000000000002)id:  5167810
Recommended: Guruji Talim (score:1.0000000000000002)id:  7982646
Recommended: Navagraha Shani Mandir (score:1.0000000000000002)id:  7982642

PLACES RECOMMENDED FOR Kubera Estate:
Recommended: Shinde Chhatri (score:1.0000000000000002)id:  11447701
Recommended: Peacock bay (score:1.0000000000000002)id:  1879227
Recommended: