# Cleaning

In [2]:
import pandas as pd
df = pd.read_csv('../Datasets/places.csv')

In [3]:
df

Unnamed: 0,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,7982642,73.856285,18.519789,Navagraha Shani Mandir,"religion,hindu_temples,interesting_places"
1,1880291,73.855774,18.519611,,"architecture,historic_architecture,interesting..."
2,7989636,73.855408,18.520498,,"religion,hindu_temples,interesting_places"
3,8119763,73.855835,18.519409,,"architecture,historic_architecture,interesting..."
4,4982515,73.855301,18.520477,Shaniwar Wada Amphitheatre,"architecture,historic_architecture,fortificati..."
...,...,...,...,...,...
495,5167813,73.860718,18.754732,,"religion,hindu_temples,interesting_places"
496,7989644,73.869949,18.754507,,"religion,hindu_temples,interesting_places"
497,1880659,73.849441,18.755674,,"religion,hindu_temples,interesting_places"
498,1880658,73.848267,18.756159,,"religion,hindu_temples,interesting_places"


In [4]:
df.isnull().sum()

features__id                             0
features__geometry__coordinates__001     0
features__geometry__coordinates__002     0
features__properties__name              79
features__properties__kinds              0
dtype: int64

In [5]:
new_df = df.dropna()
new_df = new_df[~new_df['features__properties__name'].str.contains('\d', na=False)]

In [7]:
new_df.to_csv("../Datasets/new_places.csv", index=False)

# Using data from MongoDB

### Connecting to Mongo

In [16]:
import pymongo
import os
from dotenv import load_dotenv
load_dotenv()

mongo_password = os.getenv('PASSWORD')

client = pymongo.MongoClient(f"mongodb+srv://adwaitkulkarni2211:{mongo_password}@tripplanner.merlit0.mongodb.net/?retryWrites=true&w=majority")
db = client["TripPlanner"]

### Retrieve data from collections

In [17]:
places_data = db["new_places"].find()

### Create dataframes using retrieved data

In [19]:
ds = pd.DataFrame(list(places_data))

# CBR

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [21]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['features__properties__kinds'])

In [22]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {}

In [23]:
for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['features__id'][i]) for i in similar_indices]

    results[row['features__id']] = similar_items[1:]
    
print('done!')

done!


In [24]:
def item(id):
    return ds.loc[ds['features__id'] == id]['features__properties__name'].tolist()[0]

In [25]:
def recommend(item_id, num):
    print("Recommending " + str(num) + " places similar to " + item(item_id) + "...")
    print("-------")
    recs = results[item_id][:num]
    for rec in recs:
        print("Recommended: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

In [26]:
recommend(item_id=1880355, num=10)

Recommending 10 places similar to COEP Auditorium...
-------
Recommended: Ankushrao Landge Natyagruha (score:0.9999999999999999)
Recommended: Dhanwantari Auditorium (score:0.9999999999999999)
Recommended: Amphitheatre (score:0.9999999999999999)
Recommended: Amphitheater (score:0.9999999999999999)
Recommended: Nehru Memorial Hall (score:0.9999999999999999)
Recommended: Mini Auditorium COEP (score:0.9999999999999999)
Recommended: COEP Auditorium (score:0.9999999999999999)
Recommended: Auditorium (score:0.9999999999999999)
Recommended: Pandit Bhimsen Joshi Kalamandir (score:0.9999999999999999)
Recommended: Bharat Naatya Mandir (score:0.9999999999999999)
