## Recommender Engine ##

**Import Libraries**

In [None]:
import spacy
import pandas as pd
import re
from decimal import Decimal
import numpy as np
import nltk
from nltk import word_tokenize, pos_tag
from nltk.corpus import stopwords
import inspect
from spacy import displacy

In [None]:
# Load spaCy model
nlp = spacy.load("en_core_web_lg")

**Import Combined Dataset & Reformat Data**

In [None]:
df = pd.read_csv('combined_data.csv', encoding="ISO-8859-1")
df = df.drop(df.columns[0], axis=1)
df.head()

Unnamed: 0,region,location,activity,title,description,price,rating,review_count,duration,url,description_clean,general
0,Luzon,Palawan,Day Trips,Underground River Day Trips from Puerto Prince...,Discover Puerto Princesa Subterranean River Na...,2820.81,4.5,104,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,Discover Puerto Princesa Subterranean River Na...,Tours
1,Visayas,Cebu,Snorkeling,Whale Shark Encounter and Sumilon Sandbar with...,"An amazing experience. It was a very long day,...",8922.98,4.5,83,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,amazing experience long day 330am pickup 2hr h...,Water Activities
2,Luzon,Metro Manila,Day Trips,Amazing Manila - Tagaytay Full Day Sightseeing...,Thanks again to the amazing staff Sean (Lead T...,7483.79,5.0,47,7.0,https://www.tripadvisor.com.ph/AttractionProdu...,Thanks amazing staff Sean Lead Tour Guide Dan ...,Tours
3,Luzon,Palawan,Ziplining,"3-in-1 Adventure: Underground River, Zipline r...",Get the most out of your trip to the world-fam...,4029.73,4.5,9,8.0,https://www.tripadvisor.com.ph/AttractionProdu...,Get trip worldfamous Underground River booking...,Outdoor Activities
4,Luzon,Palawan,Day Trips,El Nido Island Hopping Day Tour from Puerto Pr...,Enjoy an island hopping experience in El Nido ...,11455.96,5.0,1,18.0,https://www.tripadvisor.com.ph/AttractionProdu...,Enjoy island hopping experience El Nido even h...,Tours


<br>
<br>

*Convert descriptions to string type*

In [None]:
df['description'] = df['description'].astype(str)
df['description_clean'] = df['description_clean'].astype(str)

<br>

*Clean descriptions*

In [None]:
df['description_clean'] = df['description'].str.replace('See more', '')
df['description_clean'].head()

0    Discover Puerto Princesa Subterranean River Na...
1    An amazing experience. It was a very long day,...
2    Thanks again to the amazing staff Sean (Lead T...
3    Get the most out of your trip to the world-fam...
4    Enjoy an island hopping experience in El Nido ...
Name: description_clean, dtype: object

<br>

*Remove punctuations and stop words*

In [None]:
import string

# Download the NLTK stopwords corpus (only required once)
nltk.download('stopwords')

# Get the set of English stopwords
stopwords_set = set(stopwords.words('english'))

# Function to remove stopwords and punctuation from a text
def remove_stopwords_punctuation(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Tokenize the text
    tokens = nltk.word_tokenize(text)
    
    # Remove stopwords
    filtered_tokens = [token for token in tokens if token.lower() not in stopwords_set]
    
    # Join the filtered tokens back into a string
    filtered_text = ' '.join(filtered_tokens)
    
    return filtered_text

# Apply the function to the 'text_column' column
df['description_clean'] = df['description'].apply(remove_stopwords_punctuation)

df['description_clean'].head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


0    Discover Puerto Princesa Subterranean River Na...
1    amazing experience long day 330am pickup 2hr h...
2    Thanks amazing staff Sean Lead Tour Guide Dan ...
3    Get trip worldfamous Underground River booking...
4    Enjoy island hopping experience El Nido even h...
Name: description_clean, dtype: object

<br>

*Separate sentences with spaces*

In [None]:
# Function to insert spaces before capital letters inside a word
def insert_spaces(text):
    # Use regular expression to find capital letters inside a word
    modified_text = re.sub(r"(\w)([A-Z])", r"\1 \2", text)
    return modified_text

# Apply the function to the 'text_column' column
df['description_clean'] = df['description'].apply(insert_spaces)

df['description_clean'].head()

0    Discover Puerto Princesa Subterranean River Na...
1    An amazing experience. It was a very long day,...
2    Thanks again to the amazing staff Sean (Lead T...
3    Get the most out of your trip to the world-fam...
4    Enjoy an island hopping experience in El Nido ...
Name: description_clean, dtype: object

<br>

*Remove unnecessary characters*

In [None]:
df['description_clean'] = df['description_clean'].str.replace('[¯Â\x7f]', '', regex=True)
df['description_clean']

0       Discover Puerto Princesa Subterranean River Na...
1       An amazing experience. It was a very long day,...
2       Thanks again to the amazing staff Sean (Lead T...
3       Get the most out of your trip to the world-fam...
4       Enjoy an island hopping experience in El Nido ...
                              ...                        
1000    Donsol is best known for its whale watching wh...
1001    Experience the New Zealand of Tanay Rizal and ...
1002    Start your morning by hiking to Mt. Batolusong...
1003    Enjoy this Ultimate Gigantes Islands All In Pa...
1004    Experience camping in the sky only an hour awa...
Name: description_clean, Length: 1005, dtype: object

<br>
<br>

**Create data frames based on generic activities**

In [None]:
unique_generic = df['general'].unique()
unique_generic

array(['Tours', 'Water Activities', 'Outdoor Activities', 'Recreational'],
      dtype=object)

In [None]:
tours_df = df[(df['general'] == 'Tours') & (df['region'] == 'Mindanao')]

water_df = df[(df['general'] == 'Water Activities') & (df['region'] == 'Mindanao')]

outdoor_df = df[(df['general'] == 'Outdoor Activities') & (df['region'] == 'Mindanao')]

rec_df = df[(df['general'] == 'Recreational') & (df['region'] == 'Mindanao')]

water_df

Unnamed: 0,region,location,activity,title,description,price,rating,review_count,duration,url,description_clean,general
95,Mindanao,Surigao del Norte,Boat Tours,Sugba Lagoon Tour and Magpupungko Rock Pools,We'll pick you from where you're staying in th...,3454.06,1.0,1,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,We'll pick you from where you're staying in th...,Water Activities
337,Mindanao,Surigao del Norte,Boat Tours,Siargao Island Hopping,"Overall, this was a well-organized Private tou...",3338.92,4.5,2,4.0,https://www.tripadvisor.com.ph/AttractionProdu...,"Overall, this was a well-organized Private tou...",Water Activities
451,Mindanao,Surigao del Norte,surfing,Quality Surfing Lessons in Siargao Island,Highly recommend Ulap Siyam for anybody that w...,2590.54,5.0,7,1.0,https://www.tripadvisor.com.ph/AttractionProdu...,Highly recommend Ulap Siyam for anybody that w...,Water Activities
474,Mindanao,Surigao del Norte,Boat Tours,Siargao Island Hopping & land Tour,The tri island tour is one of the best things ...,6159.74,4.5,4,8.0,https://www.tripadvisor.com.ph/AttractionProdu...,The tri island tour is one of the best things ...,Water Activities
725,Mindanao,Surigao del Norte,Boat Tours,Siargao 3 Island hopping + Corregidor Island,You will experience the crystal clear water su...,5346.02,4.5,8,7.0,https://www.tripadvisor.com.ph/AttractionProdu...,You will experience the crystal clear water su...,Water Activities
736,Mindanao,Surigao del Norte,Boat Tours,Siargao 3 Island hopping + Mam On Island,,5001.12,4.5,8,7.0,https://www.tripadvisor.com.ph/AttractionProdu...,,Water Activities
858,Mindanao,Siargao,Boat Tours,Tri Island Tour in Siargao,Go on a tri-island day trip in Siargao and exp...,1450.0,4.3,396,24.0,https://www.klook.com/en-PH/activity/27656-dak...,Go on a tri-island day trip in Siargao and exp...,Water Activities
953,Mindanao,Siargao,Surfing,Siargao Surfing Lessons,SIARGAO is the top surfing spot in the Philipp...,700.0,4.5,4,1.0,https://www.klook.com/en-PH/activity/76411-sia...,S IA RG AO is the top surfing spot in the Phil...,Water Activities
954,Mindanao,Siargao,Surfing,Siargao Surfing Lessons,SIARGAO is the top surfing spot in the Philipp...,700.0,4.5,4,1.0,https://www.klook.com/en-PH/activity/76411-sia...,S IA RG AO is the top surfing spot in the Phil...,Water Activities


<br>

**Get dataframe for Luzon & Visayas**

In [None]:
luzvis_df = df[df['region'] != 'Mindanao']
print(luzvis_df.shape)
luzvis_df.head()

(971, 12)


Unnamed: 0,region,location,activity,title,description,price,rating,review_count,duration,url,description_clean,general
0,Luzon,Palawan,Day Trips,Underground River Day Trips from Puerto Prince...,Discover Puerto Princesa Subterranean River Na...,2820.81,4.5,104,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,Discover Puerto Princesa Subterranean River Na...,Tours
1,Visayas,Cebu,Snorkeling,Whale Shark Encounter and Sumilon Sandbar with...,"An amazing experience. It was a very long day,...",8922.98,4.5,83,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,"An amazing experience. It was a very long day,...",Water Activities
2,Luzon,Metro Manila,Day Trips,Amazing Manila - Tagaytay Full Day Sightseeing...,Thanks again to the amazing staff Sean (Lead T...,7483.79,5.0,47,7.0,https://www.tripadvisor.com.ph/AttractionProdu...,Thanks again to the amazing staff Sean (Lead T...,Tours
3,Luzon,Palawan,Ziplining,"3-in-1 Adventure: Underground River, Zipline r...",Get the most out of your trip to the world-fam...,4029.73,4.5,9,8.0,https://www.tripadvisor.com.ph/AttractionProdu...,Get the most out of your trip to the world-fam...,Outdoor Activities
4,Luzon,Palawan,Day Trips,El Nido Island Hopping Day Tour from Puerto Pr...,Enjoy an island hopping experience in El Nido ...,11455.96,5.0,1,18.0,https://www.tripadvisor.com.ph/AttractionProdu...,Enjoy an island hopping experience in El Nido ...,Tours


## User Input

In [None]:
destination = 'Cebu'

In [None]:
activity = 'Boat tours'

In [None]:
budget = 5000

*Determine General Activity of user input*

In [None]:
act_df = pd.read_csv('act.csv', encoding="ISO-8859-1")
act_df.head()

Unnamed: 0,activity,general
0,Canyoneering,Outdoor Activities
1,Rock climbing,Outdoor Activities
2,Scooter rentals,Outdoor Activities
3,ATV,Outdoor Activities
4,Gliding,Outdoor Activities


In [None]:
gen_act = act_df.loc[act_df['activity'] == activity, 'general']
gen_act = gen_act.tolist()
gen_act

['Tours']

In [None]:
gen_act = ''.join(gen_act)
gen_act

'Tours'

<br>
<br>

**Recommender Engine**

In [None]:
%%time

# get all rows with destination = user input and general = gen_act
des_user_df = luzvis_df[(luzvis_df['location'] == destination) & (luzvis_df['general'] == 'Tours')]
    
# combine clean descriptions
concat_desc = ' '.join(des_user_df['description_clean'])

#filter based on budget
tours_df = tours_df[tours_df['price'] <= budget]
water_df = water_df[water_df['price'] <= budget]
outdoor_df = outdoor_df[outdoor_df['price'] <= budget]
rec_df = rec_df[rec_df['price'] <= budget]


#if else statement for different generic activities, similiarity of activity & destination
if gen_act == 'Tours':
    tours_df['activity_score'] = [nlp(activity).similarity(nlp(text)) for text in tours_df['activity']]
    tours_df['des_score'] = [nlp(concat_desc).similarity(nlp(text)) for text in tours_df['description_clean']]
    reco_df = tours_df.sort_values(by='activity_score', ascending=False)
elif  gen_act == 'Water Activities':
    water_df['activity_score'] = [nlp(activity).similarity(nlp(text)) for text in water_df['activity']]
    water_df['des_score'] = [nlp(concat_desc).similarity(nlp(text)) for text in water_df['description_clean']]
    reco_df = water_df.sort_values(by='activity_score', ascending=False)
elif  gen_act == 'Outdoor Activities':
    outdoor_df['activity_score'] = [nlp(activity).similarity(nlp(text)) for text in outdoor_df['activity']]
    outdoor_df['des_score'] = [nlp(concat_desc).similarity(nlp(text)) for text in outdoor_df['description_clean']]
    reco_df = outdoor_df.sort_values(by='activity_score', ascending=False)
else:
    rec_df['activity_score'] = [nlp(activity).similarity(nlp(text)) for text in rec_df['activity']]
    rec_df['des_score'] = [nlp(concat_desc).similarity(nlp(text)) for text in rec_df['description_clean']]
    reco_df = rec_df.sort_values(by='activity_score', ascending=False)   
    
    
# Get the average of both activity_score and des_score
reco_df['average_sim'] = reco_df[['activity_score', 'des_score']].mean(axis=1)

#filter based on budget
#reco_df = reco_df[reco_df['price'] <= 5000]

#Get top recommendations
top_reco = reco_df.sort_values('average_sim', ascending=False).head(5)

import time
time.sleep(2)

top_reco


CPU times: total: 25.7 s
Wall time: 41.2 s


Unnamed: 0,region,location,activity,title,description,price,rating,review_count,duration,url,description_clean,general,activity_score,des_score,average_sim
604,Mindanao,Surigao del Norte,Boat tours,Siargao Island Hopping,Can't believe how crap this tour was. So borin...,3334.08,3.5,3,4.0,https://www.tripadvisor.com.ph/AttractionProdu...,Can't believe how crap this tour was. So borin...,Tours,1.0,0.909698,0.954849
878,Mindanao,Siargao,Day trips,Sohoton-Bucas Grande Tour in Siargao,Have the ultimate beach day in the Philippines...,3000.0,3.0,84,24.0,https://www.klook.com/en-PH/activity/78141-soh...,Have the ultimate beach day in the Philippines...,Tours,0.46795,0.968028,0.717989
899,Mindanao,Davao,Day trips,Samal Island Tour in Davao,Explore unique beaches and attractions of Sama...,2450.0,5.0,137,24.0,https://www.klook.com/en-PH/activity/79805-sam...,Explore unique beaches and attractions of Sama...,Tours,0.46795,0.934903,0.701426
977,Mindanao,Davao,Day trips,Highlands Tour in Davao,Enjoy breathtaking views in Davao with this 1 ...,2050.0,3.0,0,24.0,https://www.klook.com/en-PH/activity/79893-hig...,Enjoy breathtaking views in Davao with this 1 ...,Tours,0.46795,0.933663,0.700807
608,Mindanao,Surigao del Norte,multi-Day Tours,"3 Islands, Secret beach and Coral Garden Full-...","Naked island Siargao, Guyam island and Daku is...",3161.63,4.5,8,6.0,https://www.tripadvisor.com.ph/AttractionProdu...,"Naked island Siargao, Guyam island and Daku is...",Tours,0.432601,0.958438,0.695519
