# Cleaning

In [2]:
import pandas as pd
df = pd.read_csv('../Datasets/places.csv')

In [3]:
df

Unnamed: 0,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,7982642,73.856285,18.519789,Navagraha Shani Mandir,"religion,hindu_temples,interesting_places"
1,1880291,73.855774,18.519611,,"architecture,historic_architecture,interesting..."
2,7989636,73.855408,18.520498,,"religion,hindu_temples,interesting_places"
3,8119763,73.855835,18.519409,,"architecture,historic_architecture,interesting..."
4,4982515,73.855301,18.520477,Shaniwar Wada Amphitheatre,"architecture,historic_architecture,fortificati..."
...,...,...,...,...,...
495,5167813,73.860718,18.754732,,"religion,hindu_temples,interesting_places"
496,7989644,73.869949,18.754507,,"religion,hindu_temples,interesting_places"
497,1880659,73.849441,18.755674,,"religion,hindu_temples,interesting_places"
498,1880658,73.848267,18.756159,,"religion,hindu_temples,interesting_places"


In [4]:
df.isnull().sum()

features__id                             0
features__geometry__coordinates__001     0
features__geometry__coordinates__002     0
features__properties__name              79
features__properties__kinds              0
dtype: int64

In [5]:
new_df = df.dropna()
new_df = new_df[~new_df['features__properties__name'].str.contains('\d', na=False)]

In [7]:
new_df.to_csv("../Datasets/new_places.csv", index=False)

# Using data from MongoDB

### Connecting to Mongo

In [263]:
import pymongo
import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

mongo_uri = os.getenv('MONGO_URI')

client = pymongo.MongoClient(mongo_uri)
db = client["TripPlanner"]

### Retrieve data from collections

In [264]:
places_data = db["new_places"].find()
ratings_data = db["ratings"].find()

### Create dataframes using retrieved data

In [265]:
new_places_df = pd.DataFrame(list(places_data))
new_places_df

Unnamed: 0,_id,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,6404770215f2cc1ab48c7fb1,7982642,73.856285,18.519789,Navagraha Shani Mandir,"religion,hindu_temples,interesting_places"
1,6404770215f2cc1ab48c7fb2,4982515,73.855301,18.520477,Shaniwar Wada Amphitheatre,"architecture,historic_architecture,fortificati..."
2,6404770215f2cc1ab48c7fb3,1880288,73.855309,18.520626,Bajirao I statue,"historic,monuments_and_memorials,interesting_p..."
3,6404770215f2cc1ab48c7fb4,5102363,73.856308,18.519026,Prem Vitthal,"religion,other_temples,interesting_places"
4,6404770215f2cc1ab48c7fb5,11471114,73.857239,18.519030,Kasba Ganpati,"religion,hindu_temples,interesting_places"
...,...,...,...,...,...,...
405,6404770215f2cc1ab48c8146,15609194,73.704056,18.692251,Ghorawadi Caves,"interesting_places,natural,geological_formatio..."
406,6404770215f2cc1ab48c8147,5102381,73.880508,18.747219,Gurudawara Guru Nanak Darbar,"religion,other_temples,interesting_places"
407,6404770215f2cc1ab48c8148,15605364,74.059166,18.645556,Battle of Koregaon,"battlefields,historic,historical_places,intere..."
408,6404770215f2cc1ab48c8149,7980742,74.023514,18.352192,om sai mitra mandal saisamrajya chowk saswad,"religion,hindu_temples,interesting_places"


In [266]:
ratings_df = pd.DataFrame(list(ratings_data))
ratings_df

Unnamed: 0,_id,userid,placeid,rating
0,6404770515f2cc1ab48c814b,1,5167809,3
1,6404770515f2cc1ab48c814c,1,1879958,3
2,6404770515f2cc1ab48c814d,1,4982513,2
3,6404770515f2cc1ab48c814e,1,4906168,5
4,6404770515f2cc1ab48c814f,1,15832604,3
...,...,...,...,...
50574,6404770515f2cc1ab48d46d9,2090,4982507,4
50575,6404770515f2cc1ab48d46da,2090,7989627,2
50576,6404770515f2cc1ab48d46db,2090,4982508,1
50577,6404770515f2cc1ab48d46dc,2090,7979443,2


# CBR

In [267]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

### Creating TF*IDF matrix for place kinds

In [268]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix_places = tf.fit_transform(new_places_df['features__properties__kinds'])

### Creating TF*IDF matrix for a particular user

In [269]:
userid = 5

# Selecting all the rows of this userid
user_ratings_df = ratings_df[ratings_df['userid'] == userid]
user_places_df = pd.merge(user_ratings_df, new_places_df, left_on='placeid', right_on='features__id')
user_df = user_places_df[['userid', 'placeid', 'features__properties__name', 'features__properties__kinds', 'rating']]
user_df

Unnamed: 0,userid,placeid,features__properties__name,features__properties__kinds,rating
0,5,1880089,Shani Maruti Temple,"religion,hindu_temples,interesting_places",4
1,5,11474381,BJ Medical College,"architecture,historic_architecture,interesting...",2
2,5,1880261,Sarvajanik Kaka,"historic,monuments_and_memorials,interesting_p...",3
3,5,5102381,Gurudawara Guru Nanak Darbar,"religion,other_temples,interesting_places",3
4,5,7979616,ISKCON NVCC Temple,"religion,hindu_temples,interesting_places",2
5,5,1880049,Kanifnath Mandir,"religion,hindu_temples,interesting_places",4
6,5,1879983,Ganapati mandir,"religion,hindu_temples,interesting_places",3
7,5,7978452,Ganesh Mandir,"religion,hindu_temples,interesting_places",2
8,5,11289895,Gokhale Institute of Politics and Economics,"architecture,historic_architecture,interesting...",1
9,5,1879703,Bhairavnath Temple,"religion,hindu_temples,interesting_places",1


In [270]:
tfidf_matrix_user = tf.transform(user_df['features__properties__kinds'])

In [271]:
cosine_similarities = linear_kernel(tfidf_matrix_places, tfidf_matrix_user)
results = {}
cosine_similarities

array([[1.        , 0.01902982, 0.02165328, ..., 0.02165328, 0.02472961,
        0.01870452],
       [0.00926597, 0.17100294, 0.04285378, ..., 0.04285378, 0.00370895,
        0.0028053 ],
       [0.02165328, 0.00666962, 1.        , ..., 1.        , 0.00866729,
        0.00655561],
       ...,
       [0.01545904, 0.00476168, 0.07149586, ..., 0.07149586, 0.00618789,
        0.00468028],
       [1.        , 0.01902982, 0.02165328, ..., 0.02165328, 0.02472961,
        0.01870452],
       [0.13187855, 0.01304329, 0.01484144, ..., 0.01484144, 0.01695   ,
        0.01282032]])

In [272]:
for idx, row in new_places_df.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], new_places_df['features__id'][i]) for i in similar_indices]

    results[row['features__id']] = similar_items[1:]
    
print('done!')
results

done!


{7982642: [(1.0000000000000002, 1880294),
  (1.0000000000000002, 11471114),
  (1.0000000000000002, 5167809),
  (1.0000000000000002, 7230246),
  (1.0000000000000002, 1880287),
  (1.0000000000000002, 7982642),
  (1.0000000000000002, 1880290),
  (0.13187855228501733, 7977880),
  (0.13187855228501733, 7989635),
  (0.13187855228501733, 5102363),
  (0.024729610275076017, 7982647),
  (0.021653278897932053, 7087223),
  (0.021653278897932053, 1880288),
  (0.01902982386351372, 5167808),
  (0.01902982386351372, 4982515),
  (0.018704521128545558, 1880357),
  (0.016034443638444373, 1880283),
  (0.015559453584506994, 5102362),
  (0.0138073020686974, 11422000)],
 4982515: [(0.17100293801616712, 4982515),
  (0.17100293801616712, 5167808),
  (0.042853777413597416, 7087223),
  (0.042853777413597416, 1880288),
  (0.030793551578525258, 5102362),
  (0.01581169254413773, 11422000),
  (0.009265967961997898, 5686924),
  (0.009265967961997898, 1880287),
  (0.009265967961997898, 11471114),
  (0.0092659679619978

In [273]:
def item(id):
    return new_places_df.loc[new_places_df['features__id'] == id]['features__properties__name'].tolist()[0]

In [277]:
# set to avoid duplicate recommedations
recs_set = set()
# print(type(set))

TypeError: 'set' object is not callable

In [255]:
def recommend(item_id, num):
    recs = results[item_id][:num]
    for rec in recs:
        if rec[1] not in recs_set:
            recs_set.add(rec[1])
            print("Recommended: " +
                  item(rec[1]) + " (score:" + str(rec[0]) + ")" + 'id: ', (rec[1]))

TypeError: 'set' object is not callable

In [None]:
for idx, row in user_df.iterrows():
    recs_set.add(row['placeid'])
    # Only making recommendations for places rated 4 or above by user
    if row['rating'] >= 4:
        print(f"PLACES RECOMMENDED FOR {row['features__properties__name']}:")
        recommend(row['placeid'], 5)
        print()