In [1]:
# General
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# NLP
import spacy
nlp = spacy.load("en_core_web_lg")

In [8]:
df = pd.read_csv('Data/combined_data.csv')
df = df.drop(df.columns[0], axis=1)
df.head()

Unnamed: 0,Region,Location,Activity,Title,Description,Price,Rating,Review,Duration
0,Visayas,Palawan,day trips,Coron Super Ultimate Day Tour,Visit the highlights around Coron's breathtaki...,1590.0,4.7,684,9.0
1,Visayas,Cebu,canyoning,Oslob Whale Shark Snorkeling and Badian Canyon...,Learn more about this activity's Enhanced Heal...,3635.0,4.7,798,16.0
2,Visayas,Boracay,boat tours,Boracay Tour Package (Island Hopping),Book this island tour package and discover Bor...,900.0,4.5,3947,
3,Visayas,Boracay,sightseeing cruises,"Boracay Sunset Cruise with Kayak, Paddle Board...",Catch the spectacular Boracay sunset while cru...,675.0,4.7,1517,2.5
4,Visayas,Palawan,boat tours,Puerto Princesa Underground River Tour in Palawan,Explore the Puerto Princesa Underground River ...,1850.0,4.6,2478,9.5


In [29]:
# Converting values in Descrption into a string (float identified during analysis)
df['Description'] = df['Description'].astype(str)

In [30]:
df_min = df[df['Region'] == 'Mindanao'].reset_index()
print(df_min.shape)
df_min.head(20)

(20, 10)


Unnamed: 0,index,Region,Location,Activity,Title,Description,Price,Rating,Review,Duration
0,25,Mindanao,Siargao,sightseeing cruises,Tri Island Tour in Siargao,Go on a tri-island day trip in Siargao and exp...,1450.0,4.3,396,24.0
1,30,Mindanao,Siargao,day trips,Siargao Land Tour,Dedicate your vacation pleasures with thrillin...,1900.0,4.6,229,24.0
2,45,Mindanao,Siargao,day trips,Sohoton-Bucas Grande Tour in Siargao,Have the ultimate beach day in the Philippines...,3000.0,3.0,84,24.0
3,48,Mindanao,Davao,day trips,Nature Tour in Davao,Go on an adventure in Davao with this Day Dava...,2550.0,5.0,183,24.0
4,66,Mindanao,Davao,day trips,Samal Island Tour in Davao,Explore unique beaches and attractions of Sama...,2450.0,5.0,137,24.0
5,83,Mindanao,Davao,day trips,Davao City Tour,Explore the wonderful sights of Davao with thi...,1300.0,5.0,2,24.0
6,120,Mindanao,Siargao,surfing,Siargao Surfing Lessons,SIARGAO is the top surfing spot in the Philipp...,700.0,4.5,4,1.0
7,121,Mindanao,Siargao,surfing,Siargao Surfing Lessons,SIARGAO is the top surfing spot in the Philipp...,700.0,4.5,4,1.0
8,144,Mindanao,Davao,day trips,Highlands Tour in Davao,Enjoy breathtaking views in Davao with this 1 ...,2050.0,3.0,0,24.0
9,213,Mindanao,Surigao del Norte,tour islands beach garden,"3 Islands, Secret beach and Coral Garden Full-...","Naked island Siargao, Guyam island and Daku is...",3166.22,4.6,8,6.0


In [31]:
df_rest = df[df['Region'] != 'Mindanao']
print(df_rest.shape)
df_rest.head()

(738, 9)


Unnamed: 0,Region,Location,Activity,Title,Description,Price,Rating,Review,Duration
0,Visayas,Palawan,day trips,Coron Super Ultimate Day Tour,Visit the highlights around Coron's breathtaki...,1590.0,4.7,684,9.0
1,Visayas,Cebu,canyoning,Oslob Whale Shark Snorkeling and Badian Canyon...,Learn more about this activity's Enhanced Heal...,3635.0,4.7,798,16.0
2,Visayas,Boracay,boat tours,Boracay Tour Package (Island Hopping),Book this island tour package and discover Bor...,900.0,4.5,3947,
3,Visayas,Boracay,sightseeing cruises,"Boracay Sunset Cruise with Kayak, Paddle Board...",Catch the spectacular Boracay sunset while cru...,675.0,4.7,1517,2.5
4,Visayas,Palawan,boat tours,Puerto Princesa Underground River Tour in Palawan,Explore the Puerto Princesa Underground River ...,1850.0,4.6,2478,9.5


Recommender engine function version that combines all text for activity and description for all entries of the entered location

In [34]:
def rec_eng(location):
    # Filter the df
    filtered_df = df_rest[df_rest['Location'] == location]

    # Concatenate the text from the column (so all text for all entries of location are combined)
    act_text = ' '.join(filtered_df['Activity'])
    desc_text = ' '.join(filtered_df['Description'])

    # Get the similarity for location activity
    df_min['act_sim'] = [nlp(act_text).similarity(nlp(text)) for text in df_min['Activity']]

    # Get the similarity for location description
    df_min['desc_sim'] = [nlp(desc_text).similarity(nlp(text)) for text in df_min['Description']]

    # Get the average of both similarities
    df_min['ave_sim'] = df_min[['act_sim', 'desc_sim']].mean(axis=1)

    # Get the top 3
    top3 = df_min.sort_values('ave_sim', ascending=False).head(3)

    return top3


In [35]:
rec_eng('Cebu')

Unnamed: 0,index,Region,Location,Activity,Title,Description,Price,Rating,Review,Duration,act_sim,desc_sim,ave_sim
9,213,Mindanao,Surigao del Norte,tour islands beach garden,"3 Islands, Secret beach and Coral Garden Full-...","Naked island Siargao, Guyam island and Daku is...",3166.22,4.6,8,6.0,0.837293,0.963216,0.900254
15,558,Mindanao,Surigao del Norte,tour islands beach garden,"3 Islands, Secret beach and Coral Garden Full-...","Naked island Siargao, Guyam island and Daku is...",3166.22,4.6,8,6.0,0.837293,0.963216,0.900254
17,656,Mindanao,Surigao del Norte,tour land hopping island,Siargao Island Hopping & land Tour,The tri island tour is one of the best things ...,6159.74,4.5,4,8.0,0.818924,0.960268,0.889596


Recommender engine function version that combines all text for description for all entries of the entered location but asks to enter an activity

In [36]:
def rec_eng_act(location, activity):
    # Filter the df
    filtered_df = df_rest[df_rest['Location'] == location]

    # Concatenate the text from the column (so all text for all entries of location are combined)
    act_text = activity
    desc_text = ' '.join(filtered_df['Description'])

    # Get the similarity for location activity
    df_min['act_sim'] = [nlp(act_text).similarity(nlp(text)) for text in df_min['Activity']]

    # Get the similarity for location description
    df_min['desc_sim'] = [nlp(desc_text).similarity(nlp(text)) for text in df_min['Description']]

    # Get the average of both similarities
    df_min['ave_sim'] = df_min[['act_sim', 'desc_sim']].mean(axis=1)

    # Get the top 3
    top3 = df_min.sort_values('ave_sim', ascending=False).head(3)

    return top3


In [37]:
rec_eng_act('Cebu', 'whale watching')

Unnamed: 0,index,Region,Location,Activity,Title,Description,Price,Rating,Review,Duration,act_sim,desc_sim,ave_sim
13,514,Mindanao,Surigao del Norte,hopping island,Siargao Island Hopping,"Overall, this was a well-organized Private tou...",3338.92,4.5,2,4.0,0.453957,0.9756,0.714778
0,25,Mindanao,Siargao,sightseeing cruises,Tri Island Tour in Siargao,Go on a tri-island day trip in Siargao and exp...,1450.0,4.3,396,24.0,0.465414,0.941016,0.703215
11,292,Mindanao,Surigao del Norte,tour cove jellyfish,Jellyfish sanctuary and Sohoton cove Full-Day ...,"In order to grab the best possible experience,...",4893.25,2.0,1,6.0,0.423426,0.982806,0.703116
