# Travel Planner location recommendation algorithm prototype

Step 1: Set up our environment and load in required packages

In [1]:
import pandas as pd
import numpy as np
import time as tm
import datetime
import geopy.distance

In [2]:
# Global Constant

opening = ['', 'mon_o', 'tue_o', 'wed_o','thurs_o', 'fri_o', 'sat_o', 'sun_o']
closing = ['', 'mon_c', 'tue_c', 'wed_c','thurs_c', 'fri_c', 'sat_c', 'sun_c']

Step 2: Read in data

In [3]:
data_pwd = "../../Data/Sydney.xlsx"
db = pd.read_excel(data_pwd, sheet_name = 0, header = 0)
db.head(5)

Unnamed: 0,name,category,location,timezone_type,timezone,latitude,longtitude,postcode,free_f,cost_min,...,wed_c,thurs_o,thurs_o.1,fri_o,fri_c,sat_o,sat_c,sun_o,sun_c,ready_f
0,Mogo Zoo,Theme Park,Sydney,UTC,10,-35.78217,150.11169,2536.0,0.0,19.0,...,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,1
1,Sydney Aquarium,Theme Park,Sydney,UTC,10,-33.86993,151.20203,2000.0,0.0,39.0,...,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,1
2,University of Sydney,Culture,Sydney,UTC,10,-33.88778,151.18722,2006.0,1.0,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
3,The University of New South Wales,Culture,Sydney,UTC,10,-33.9235,151.2262,2052.0,1.0,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
4,Chifley Tower,Shopping,Sydney,UTC,10,-33.8659,151.212,2000.0,1.0,,...,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,1


In [4]:
db.dtypes

name              object
category          object
location          object
timezone_type     object
timezone           int64
latitude         float64
longtitude       float64
postcode         float64
free_f           float64
cost_min         float64
cost_max         float64
indoor_f         float64
outdoor_f        float64
family_f         float64
mon_o             object
mon_c             object
tues_o            object
tues_c            object
wed_o             object
wed_c             object
thurs_o           object
thurs_o.1         object
fri_o             object
fri_c             object
sat_o             object
sat_c             object
sun_o             object
sun_c             object
ready_f            int64
dtype: object

Step 3: Prepare our dataset for usage

In [5]:
db = db[db['ready_f'] == 1]
db[['name', 'category', 'location','timezone_type']] = db[['name', 'category', 'location','timezone_type']].astype(str)
db[['timezone', 'postcode']] = db[['timezone', 'postcode']].astype(int)
db[['latitude', 'longtitude', 'cost_min', 'cost_max']] = db[['latitude', 'longtitude', 'cost_min', 'cost_max']].astype(float)
db[['free_f', 'indoor_f', 'outdoor_f', 'family_f']] = db[['free_f', 'indoor_f', 'outdoor_f', 'family_f']].astype(bool)
db = db.reset_index(drop=True)

In [6]:
db.head(10)

Unnamed: 0,name,category,location,timezone_type,timezone,latitude,longtitude,postcode,free_f,cost_min,...,wed_c,thurs_o,thurs_o.1,fri_o,fri_c,sat_o,sat_c,sun_o,sun_c,ready_f
0,Mogo Zoo,Theme Park,Sydney,UTC,10,-35.78217,150.11169,2536,False,19.0,...,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,09:00:00,17:00:00,1
1,Sydney Aquarium,Theme Park,Sydney,UTC,10,-33.86993,151.20203,2000,False,39.0,...,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,1
2,University of Sydney,Culture,Sydney,UTC,10,-33.88778,151.18722,2006,True,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
3,The University of New South Wales,Culture,Sydney,UTC,10,-33.9235,151.2262,2052,True,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
4,Chifley Tower,Shopping,Sydney,UTC,10,-33.8659,151.212,2000,True,,...,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,09:30:00,18:00:00,1
5,Sydney Tower,Culture,Sydney,UTC,10,-33.8705,151.209,2000,False,20.0,...,21:00:00,09:00:00,21:00:00,09:00:00,21:00:00,09:00:00,21:00:00,09:00:00,21:00:00,1
6,Central Station,Culture,Sydney,UTC,10,-33.88274,151.20646,2000,True,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
7,Parliament House,Culture,Sydney,UTC,10,-33.8673,151.213,2000,True,,...,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1
8,Parliament House Tour,Culture,Sydney,UTC,10,-33.8732,151.206,2000,True,,...,00:00:00,00:00:00,00:00:00,13:30:00,13:30:00,00:00:00,00:00:00,00:00:00,00:00:00,1
9,Sydney Town Hall,Culture,Sydney,UTC,10,-33.8732,151.206,2000,True,,...,18:00:00,08:00:00,18:00:00,08:00:00,18:00:00,00:00:00,00:00:00,00:00:00,00:00:00,1


In [7]:
db['wed_c'][1].hour

18

Step 4: Building our functions

In [8]:
# Our web app or mobile app should auto detect user lat long and ask for a number of inputs from user
def usr_init(usr_lat, usr_long):
    usr_init_ls = []
    print("Tell us something about how you would like your trip to be set up!")
    print("Which day are you planning for?")
    day = input()
    print("Your trip will start from:")
    begin_hr = input()
    print("Your trip will end at:")
    end_hr = input()
    print("How many locations would you like to visit?")
    loc_count = input()
    print("How much time would you allow for travelling in total (in hours)?")
    trv_time = input()
    print("Thanks for the information! We will now crunch some numbers and find locations that best suit your requirements!")
    usr_init_ls = [float(usr_lat), float(usr_long), int(day), int(begin_hr), int(end_hr), int(loc_count), int(trv_time)]
    return(usr_init_ls)

In [9]:
# We save our results from usr_init into usr_init_result which is a list
usr_init_result = usr_init(-33.7704301, 151.0753451)

Tell us something about how you would like your trip to be set up!
Which day are you planning for?
3
Your trip will start from:
9
Your trip will end at:
15
How many locations would you like to visit?
3
How much time would you allow for travelling in total (in hours)?
2
Thanks for the information! We will now crunch some numbers and find locations that best suit your requirements!


In [10]:
# Demonstration of expected usr_init_result
usr_init_result

[-33.7704301, 151.0753451, 3, 9, 15, 3, 2]

In [11]:
# Demonstration of distance calculation
coords_1 = (usr_init_result[0], usr_init_result[1])
coords_2 = (db['latitude'][1], db['longtitude'][1])
geopy.distance.geodesic(coords_1, coords_2).km

16.104578372537055

In [12]:
# For first location, we should include a travel distance parameter scaled 1-3 to find locations for user
# Should it be a random parameter, or default 1?
def first_loc(usr_init_result, trav_dist = 1):
    db_temp = db
    db_temp['usr_trav_dist'] = np.nan
    for i in range(0, len(db_temp.index)):
        db_temp.iat[i,db_temp.columns.get_loc("usr_trav_dist")] = geopy.distance.geodesic((usr_init_result[0], usr_init_result[1]), 
                                                                         (db_temp['latitude'][i], db_temp['longtitude'][i])).km
    if trav_dist == 1:
        rcmd1 = db_temp[db_temp['usr_trav_dist'] <= 10]
    elif trav_dist == 2:
        rcmd1 = db_temp[db_temp['usr_trav_dist'] <= 30]
    else:
        rcmd1 = db_temp
    return(rcmd1)
    
    

In [13]:
# Demonstration of first location finder function
location1 = first_loc(usr_init_result, 1)
location1.head()

Unnamed: 0,name,category,location,timezone_type,timezone,latitude,longtitude,postcode,free_f,cost_min,...,thurs_o,thurs_o.1,fri_o,fri_c,sat_o,sat_c,sun_o,sun_c,ready_f,usr_trav_dist
92,"Bicentennial Park, Glebe",Nature,Sydney,UTC,132,-33.8493,151.078,2037,True,,...,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1,8.75162
96,Macquarie Centre,Shopping,Sydney,UTC,136,-33.776366,151.117856,2113,False,0.0,...,09:30:00,21:00:00,09:30:00,18:00:00,09:30:00,18:00:00,10:00:00,18:00:00,1,3.992465
99,DFO Homebush,Shopping,Sydney,UTC,139,-33.855473,151.076857,2140,False,0.0,...,10:00:00,20:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,1,9.433936


The way the code is set up, it has the following assumptions and logical flow:
<ol><li>user first input their preference 
</li><li>we will recommend a first location based on their pref and travel dist metric
</li><li>we recommend the remaining locations based on the first loc chosen
</li></ol>
When we integrate the code with web, the users will simply click buttons or turn some dials to set their preference which will be captured and fed into backend python code

In [14]:
# We need to filter out locations that are not available during the hours specified by the user
def loc_filter(rcmd, usr_init_result):
    cols = []
    if usr_init_result[2] == 1:
        cols = ["mon_o", "mon_c"]
    elif usr_init_result[2] == 2:
        cols = ["tues_o", "tues_c"]
    elif usr_init_result[2] == 3:
        cols = ["wed_o", "wed_c"]
    elif usr_init_result[2] == 4:
        cols = ["thurs_o", "thurs_c"]
    elif usr_init_result[2] == 5:
        cols = ["fri_o", "fri_c"]
    elif usr_init_result[2] == 6:
        cols = ["sat_o", "sat_c"]
    elif usr_init_result[2] == 7:
        cols = ["sun_o", "sun_c"]
    rcmd_fil = rcmd[(rcmd[cols[0]].apply(lambda x: x.hour) < int(usr_init_result[3])) | 
                (rcmd[cols[1]].apply(lambda x: x.hour) > int(usr_init_result[4]))]
    return(rcmd_fil)

In [15]:
# We need to filter out locations that are not available during the hours specified by the user

def loc_filter_mod(rcmd, usr_init_result):

    rcmd_fil = rcmd[(rcmd[opening[usr_init_result[2]]].apply(lambda x: x.hour) < int(usr_init_result[3])) | 
            (rcmd[closing[usr_init_result[2]]].apply(lambda x: x.hour) > int(usr_init_result[4]))]
    
    return(rcmd_fil)


In [16]:
# Demonstration of the loc_filter_mod function
location1_fil_mod = loc_filter_mod(location1, usr_init_result)
location1_fil_mod

Unnamed: 0,name,category,location,timezone_type,timezone,latitude,longtitude,postcode,free_f,cost_min,...,thurs_o,thurs_o.1,fri_o,fri_c,sat_o,sat_c,sun_o,sun_c,ready_f,usr_trav_dist
92,"Bicentennial Park, Glebe",Nature,Sydney,UTC,132,-33.8493,151.078,2037,True,,...,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1,8.75162
96,Macquarie Centre,Shopping,Sydney,UTC,136,-33.776366,151.117856,2113,False,0.0,...,09:30:00,21:00:00,09:30:00,18:00:00,09:30:00,18:00:00,10:00:00,18:00:00,1,3.992465
99,DFO Homebush,Shopping,Sydney,UTC,139,-33.855473,151.076857,2140,False,0.0,...,10:00:00,20:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,1,9.433936


In [17]:
# Demonstration of the loc_filter function
location1_fil_mod = loc_filter(location1, usr_init_result)
location1_fil_mod

Unnamed: 0,name,category,location,timezone_type,timezone,latitude,longtitude,postcode,free_f,cost_min,...,thurs_o,thurs_o.1,fri_o,fri_c,sat_o,sat_c,sun_o,sun_c,ready_f,usr_trav_dist
92,"Bicentennial Park, Glebe",Nature,Sydney,UTC,132,-33.8493,151.078,2037,True,,...,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,00:00:00,23:59:00,1,8.75162
96,Macquarie Centre,Shopping,Sydney,UTC,136,-33.776366,151.117856,2113,False,0.0,...,09:30:00,21:00:00,09:30:00,18:00:00,09:30:00,18:00:00,10:00:00,18:00:00,1,3.992465
99,DFO Homebush,Shopping,Sydney,UTC,139,-33.855473,151.076857,2140,False,0.0,...,10:00:00,20:00:00,10:00:00,18:00:00,10:00:00,18:00:00,10:00:00,18:00:00,1,9.433936
