## to-do
2. Get POI data for Haarlem
    - Define area manually or use admin boundaries?
    - Start with OSM, see if we can find ratings there or some metric for popularity.
    - Then see if we need google maps
    - use saturated places algorithm for Google Maps
    - Confirm POI list that needs to pulled for either

2. Routing Algo
    - Look up relevant routing algorithms
    - Only walking paths? Or where walking is possible?
    - focus on FOSS
    - See how we can optimize for different types of POI
    - See how we can optimize for time for e.g. I don't walk to walk for more than 4 hours
    - Factor in time taken to see each location - smart algo or fixed time per attraction?
    - Factor in popularity of the location
    - Some compromise between popularity of the location vs time taken to get there? Basicall assign weights to edges? 
    - Starting location: Central train station?
    - Visualization: folium or kepler.gl?


In [63]:
import geopandas as gpd
import pandas as pd
import numpy as np
import requests as re
import osmnx as ox
import h3
import os
import ast
import tqdm
import shapely as shp
import time
import json
import folium
from shapely.ops import unary_union, transform


In [None]:
file_path = '../config/API_KEY.json'
with open(file=file_path,mode= 'r') as f:
    json_data = json.load(f)
API_KEY = json_data['apiKey']



In [12]:
def func_visualize_hexagons(hexagons, color="red", folium_map=None):
    """
    original source: https://nbviewer.org/github/uber/h3-py-notebooks/blob/master/notebooks/usage.ipynb

    hexagons is a list of hexcluster. Each hexcluster is a list of hexagons. 
    eg. [[hex1, hex2], [hex3, hex4]]
    """
    polylines = []
    lat = []
    lng = []
    for hex in hexagons:
        polygons = h3.h3_set_to_multi_polygon([hex], geo_json=False)
        # Flatten polygons into loops.
        outlines = [loop for polygon in polygons for loop in polygon]
        polyline = [outline + [outline[0]] for outline in outlines][0]
        lat.extend(map(lambda v:v[0],polyline))
        lng.extend(map(lambda v:v[1],polyline))
        polylines.append(polyline)
    
    if folium_map is None:
        m = folium\
            .Map(location=[sum(lat)/len(lat),
                        sum(lng)/len(lng)],
                        zoom_start=9, 
                        tiles='cartodbpositron')
    else:
        m = folium_map
    for polyline in polylines:
        my_PolyLine=folium.PolyLine(locations=polyline,weight=2,color=color)
        m.add_child(my_PolyLine)
    return m

def func_get_hex_radius(row,BUFFER):
    edge_length = h3.edge_length(row.hex_resolution,'m')
    radius = edge_length
    radius = radius+edge_length*BUFFER
    return radius


In [None]:
def func_get_places_poi(lat,lng,resolution,type,api_key):
    
    # Nearby sarch API URL
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    params = {
    'location': f'{lat},{lng}',
    'radius': resolution,
    'types': type,
    'key': api_key}

    response = re.get(url, params=params)
    places_df = []

    results = json.loads(response.content)

    places_df.append(results['results'])

    while 'next_page_token' in results:
        time.sleep(1)
        params['pagetoken'] = results['next_page_token']
        response = re.get(url,params=params)
        results = json.loads(response.content)
        places_df.append(results['results'])
        
    results_df = pd.concat([pd.DataFrame(df) for df in places_df])
    results_df = results_df.reset_index(drop=True)
    
    return results_df

# Location for Haarlem near Haarlem centraal
# lat,lng = 52.387111, 4.638286

# See all POI types for places API here: https://developers.google.com/maps/documentation/places/web-service/supported_types
# results_df = func_get_places_poi(lat=lat,lng=lng,
#                                  resolution=2000,
#                                  type='tourist_attraction', 
#                                  api_key=API_KEY)

# results_df[['place_id','name','types','user_ratings_total','vicinity']].head()
# results_df

Unnamed: 0,business_status,geometry,icon,icon_background_color,icon_mask_base_uri,name,opening_hours,photos,place_id,plus_code,rating,reference,scope,types,user_ratings_total,vicinity
0,OPERATIONAL,"{'location': {'lat': 52.39047799999999, 'lng':...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,Het Dolhuys,{'open_now': False},"[{'height': 3120, 'html_attributions': ['<a hr...",ChIJxfcBs3PvxUcReOojnqq1zx4,"{'compound_code': '9JRQ+56 Haarlem, Netherland...",4.0,ChIJxfcBs3PvxUcReOojnqq1zx4,GOOGLE,"[museum, tourist_attraction, cafe, food, store...",1080.0,"Schotersingel 2, Haarlem"
1,OPERATIONAL,"{'location': {'lat': 52.3810706, 'lng': 4.6373...",https://maps.gstatic.com/mapfiles/place_api/ic...,#7B9EB0,https://maps.gstatic.com/mapfiles/place_api/ic...,The St. Bavo Church in Haarlem,{'open_now': False},"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJFUIK6mvvxUcRqw1Oz3TX7yw,"{'compound_code': '9JJP+CW Haarlem, Netherland...",4.6,ChIJFUIK6mvvxUcRqw1Oz3TX7yw,GOOGLE,"[tourist_attraction, church, place_of_worship,...",3916.0,"Grote Markt 22, Haarlem"
2,OPERATIONAL,"{'location': {'lat': 52.3765894, 'lng': 4.6336...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,Frans Hals Museum,{'open_now': False},"[{'height': 2976, 'html_attributions': ['<a hr...",ChIJO9v2L0DvxUcRdU0VRVnyzUg,"{'compound_code': '9JGM+JF Haarlem, Netherland...",4.5,ChIJO9v2L0DvxUcRdU0VRVnyzUg,GOOGLE,"[tourist_attraction, museum, point_of_interest...",2890.0,"Groot Heiligland 62, Haarlem"
3,OPERATIONAL,"{'location': {'lat': 52.38115209999999, 'lng':...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,De Hallen Haarlem,{'open_now': False},"[{'height': 1670, 'html_attributions': ['<a hr...",ChIJwXIDmGvvxUcR-sbrPP5SCEs,"{'compound_code': '9JJP+FC Haarlem, Netherland...",3.9,ChIJwXIDmGvvxUcR-sbrPP5SCEs,GOOGLE,"[museum, tourist_attraction, point_of_interest...",495.0,"Grote Markt 16, Haarlem"
4,OPERATIONAL,"{'location': {'lat': 52.3803511, 'lng': 4.6403...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,Teylers Museum,{'open_now': False},"[{'height': 9000, 'html_attributions': ['<a hr...",ChIJbeZoEmnvxUcRLdbE56r1Ufo,"{'compound_code': '9JJR+44 Haarlem, Netherland...",4.5,ChIJbeZoEmnvxUcRLdbE56r1Ufo,GOOGLE,"[tourist_attraction, museum, point_of_interest...",4920.0,"Spaarne 16, Haarlem"
5,OPERATIONAL,"{'location': {'lat': 52.3773563, 'lng': 4.6308...",https://maps.gstatic.com/mapfiles/place_api/ic...,#7B9EB0,https://maps.gstatic.com/mapfiles/place_api/ic...,Proveniershuis,{'open_now': True},"[{'height': 3672, 'html_attributions': ['<a hr...",ChIJz59WVRXvxUcRWWkx0fZuBh8,"{'compound_code': '9JGJ+W8 Haarlem, Netherland...",4.2,ChIJz59WVRXvxUcRWWkx0fZuBh8,GOOGLE,"[tourist_attraction, point_of_interest, establ...",157.0,"Grote Houtstraat 142D, Haarlem"
6,OPERATIONAL,"{'location': {'lat': 52.3910141, 'lng': 4.6531...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,Barrel Organ Museum Haarlem,{'open_now': False},"[{'height': 2976, 'html_attributions': ['<a hr...",ChIJjwIPdnzvxUcRr0xJo-uq0Wo,"{'compound_code': '9MR3+C6 Haarlem, Netherland...",4.5,ChIJjwIPdnzvxUcRr0xJo-uq0Wo,GOOGLE,"[museum, tourist_attraction, point_of_interest...",111.0,"Küppersweg 3, Haarlem"
7,OPERATIONAL,"{'location': {'lat': 52.3892523, 'lng': 4.6379...",https://maps.gstatic.com/mapfiles/place_api/ic...,#4DB546,https://maps.gstatic.com/mapfiles/place_api/ic...,De Bolwerken,{'open_now': True},"[{'height': 1600, 'html_attributions': ['<a hr...",ChIJa-SPXXLvxUcR7SESkcnvoJ0,"{'compound_code': '9JQQ+P5 Haarlem, Netherland...",4.6,ChIJa-SPXXLvxUcR7SESkcnvoJ0,GOOGLE,"[park, tourist_attraction, point_of_interest, ...",476.0,"De Bolwerken, MK, Haarlem"
8,OPERATIONAL,"{'location': {'lat': 52.3818564, 'lng': 4.6429...",https://maps.gstatic.com/mapfiles/place_api/ic...,#7B9EB0,https://maps.gstatic.com/mapfiles/place_api/ic...,Teylers Hofje,{'open_now': False},"[{'height': 3024, 'html_attributions': ['<a hr...",ChIJc6_fzWjvxUcRClPQo6H1g4A,"{'compound_code': '9JJV+P5 Haarlem, Netherland...",4.4,ChIJc6_fzWjvxUcRClPQo6H1g4A,GOOGLE,"[tourist_attraction, point_of_interest, establ...",110.0,"Koudenhorn 64, Haarlem"
9,OPERATIONAL,"{'location': {'lat': 52.38087639999999, 'lng':...",https://maps.gstatic.com/mapfiles/place_api/ic...,#13B5C7,https://maps.gstatic.com/mapfiles/place_api/ic...,Stadsklooster Haarlem,{'open_now': False},,ChIJ6_9Du6jvxUcR8yRYqo2_-Cg,"{'compound_code': '9JJP+9M Haarlem, Netherland...",,ChIJ6_9Du6jvxUcR8yRYqo2_-Cg,GOOGLE,"[tourist_attraction, point_of_interest, establ...",,"Oude Groenmarkt 3-5, Haarlem"


In [4]:
place_name = "Haarlem, Netherlands"
# Get the boundary of the place
gdf = ox.geocode_to_gdf(place_name)
# Get the boundary polygon
haarlem_boundary = gdf.geometry.values[0]

In [5]:
centroid_haarlem = haarlem_boundary.centroid
m_ = folium.Map(location=[centroid_haarlem.y, centroid_haarlem.x], zoom_start=13)
# Add the boundary to the map
folium.GeoJson(
    haarlem_boundary.__geo_interface__,
    style_function=lambda x: {'color': 'blue', 'weight': 2, 'fillOpacity': 0.1}
).add_to(m_)
m_

In [6]:
# flip coordinate sequence from longitude, latitude to latitude, longitude as this is how the h3 API reads it
def flip(x,y):
    return y,x
haarlem_boundary = transform(flip,haarlem_boundary)

In [7]:
# Convert to plan geojson object
haarlem_boundary_geojson = gpd.GeoSeries([haarlem_boundary])\
    .__geo_interface__['features'][0]['geometry']

# Get h3 cell IDs for cells within the bounding polygon / geojson
res8_haarlem_hex = h3.polyfill(haarlem_boundary_geojson,8)

In [8]:
m = func_visualize_hexagons(hexagons=res8_haarlem_hex)
display(m)

In [9]:
res8_haarlem_hex = h3.polyfill(haarlem_boundary_geojson,8)
res9_haarlem_hex = h3.polyfill(haarlem_boundary_geojson,9)
res10_haarlem_hex = h3.polyfill(haarlem_boundary_geojson,10)

res8_haarlem_hex = pd.DataFrame({'hex_id':list(res8_haarlem_hex),'hex_resolution':8})
res9_haarlem_hex = pd.DataFrame({'hex_id':list(res9_haarlem_hex),'hex_resolution':9})
res10_haarlem_hex = pd.DataFrame({'hex_id':list(res10_haarlem_hex),'hex_resolution':10})
# Concatenate the three dataframes
haarlem_hex_df = pd.concat([res8_haarlem_hex,res9_haarlem_hex,res10_haarlem_hex],ignore_index=True)


In [None]:
haarlem_hex_df['hex_area'] = haarlem_hex_df.hex_id.apply(lambda x: h3.cell_area(x,unit='m^2'))
haarlem_hex_df['hex_radius_places_api'] = haarlem_hex_df.apply(lambda row: func_get_hex_radius(row,0.15),axis=1)
haarlem_hex_df['centroid'] = haarlem_hex_df.hex_id.apply(lambda x: h3.h3_to_geo(x))

In [14]:
haarlem_hex_df.head()

Unnamed: 0,hex_id,hex_resolution,hex_area,hex_radius_places_api,centroid
0,8819682513fffff,8,615821.403749,530.557887,"(52.391097862889424, 4.670125339200063)"
1,88196825adfffff,8,615941.870444,530.557887,"(52.36678421945318, 4.635951467369167)"
2,881968251dfffff,8,615587.827867,530.557887,"(52.40471474475745, 4.662419418478945)"
3,8819682507fffff,8,615414.625032,530.557887,"(52.40617422858309, 4.6376171249224445)"
4,88196825a1fffff,8,616058.623742,530.557887,"(52.35997342987185, 4.639804615709505)"


In [25]:
max_haarlem_hex_def = haarlem_hex_df[haarlem_hex_df.hex_resolution==haarlem_hex_df.hex_resolution.min()]

# We created a seperate folder to store all output, in case the code is interrupted we can use
# this list to make sure we're not repeating any cells.

# searched_cells = os.listdir(f'../data/h3cell_output_cafe/')
# searched_cells = set([x.replace('.csv','') for x in searched_cells])

# Setting POI type for cafe.
# type = 'cafe'
ALL_TYPES = ['tourist_attraction','cafe','museum','art_gallery','park','restaurant','zoo',
             'art_gallery','theme_park', 'library','church','shopping_mall']

for type in ALL_TYPES:
    # Create a directory for each type if it doesn't exist
    if not os.path.exists(f'../data/h3cell_output_{type}/'):
        os.makedirs(f'../data/h3cell_output_{type}/')

    print(type)

    searched_cells = os.listdir(f'../data/h3cell_output_{type}/')
    # If the directory is empty, we can start from scratch
    if(len(searched_cells)>0):
        searched_cells = set([x.replace('.csv','') for x in searched_cells])

    # Loop through the hexagons and get the POI data
    for ind,row in tqdm.tqdm(max_haarlem_hex_def.iterrows(),
                            total=max_haarlem_hex_def.shape[0]):
        
        # Parsing the highest resolution cells and saving outputs in a csv file
        h3_cell = row.hex_id
        if(h3_cell not in searched_cells):
            lat,lng = row.centroid[0],row.centroid[1]
            resolution = row.hex_radius_places_api
            # Call the Places using the the function we wrote earlier
            df = func_get_places_poi(lat=lat,lng=lng,resolution=resolution,type=type,api_key=API_KEY)
            df['hex_id'] = h3_cell
            cell_save_path = f'../data/h3cell_output_{type}/{h3_cell}.csv'
            df.to_csv(cell_save_path,index=False)
            
            # Key part of the algorithm, if the API returns equal or more than 60 results
            if(len(df)>=60):
                cell_children = h3.h3_to_children(df['hex_id'][0])
                child_df = haarlem_hex_df[haarlem_hex_df.hex_id.isin(cell_children)]
                
                # res 7 ~ 1400m
                for ind,row in child_df.iterrows():
                    h3_cell = row.hex_id
                    lat,lng = row.centroid[0],row.centroid[1]
                    resolution = row.hex_radius_places_api
                    df = func_get_places_poi(lat=lat,lng=lng,resolution=resolution,type=type,api_key=API_KEY)
                    df['hex_id'] = h3_cell
                    cell_save_path = f'../data/h3cell_output_{type}/{h3_cell}.csv'
                    df.to_csv(cell_save_path,index=False)

                    # Repeat same logic as above and all other resolutions  
                    if(len(df)>=60):
                        cell_children = h3.h3_to_children(df['hex_id'][0])
                        child_df = haarlem_hex_df[haarlem_hex_df.hex_id.isin(cell_children)]

                        # res 8 ~ 500m
                        for ind,row in child_df.iterrows():
                            h3_cell = row.hex_id
                            lat,lng = row.centroid[0],row.centroid[1]
                            resolution = row.hex_radius_places_api
                            df = func_get_places_poi(lat=lat,lng=lng,resolution=resolution,type=type,api_key=API_KEY)
                            df['hex_id'] = h3_cell
                            cell_save_path = f'../data/h3cell_output_{type}/{h3_cell}.csv'
                            df.to_csv(cell_save_path,index=False)


tourist_attraction


  0%|          | 0/49 [00:00<?, ?it/s]

100%|██████████| 49/49 [00:09<00:00,  5.00it/s]


cafe


100%|██████████| 49/49 [00:15<00:00,  3.11it/s]


museum


100%|██████████| 49/49 [00:08<00:00,  5.95it/s]


art_gallery


100%|██████████| 49/49 [00:11<00:00,  4.42it/s]


park


100%|██████████| 49/49 [00:09<00:00,  5.15it/s]


restaurant


100%|██████████| 49/49 [00:22<00:00,  2.19it/s]


zoo


100%|██████████| 49/49 [00:08<00:00,  5.97it/s]


art_gallery


100%|██████████| 49/49 [00:00<00:00, 7410.16it/s]


theme_park


100%|██████████| 49/49 [01:13<00:00,  1.50s/it]


library


100%|██████████| 49/49 [00:08<00:00,  5.64it/s]


church


100%|██████████| 49/49 [00:09<00:00,  5.35it/s]


shopping_mall


100%|██████████| 49/49 [00:08<00:00,  5.71it/s]


In [None]:
# read all the dataframes and concatenate them
paths = [] 
for type in ALL_TYPES:
    paths += [f'../data/h3cell_output_{type}/{x}' for x in os.listdir(f'../data/h3cell_output_{type}/')]
n_df = pd.concat([pd.read_csv(path) for path in paths])
n_df.place_id.nunique() # number of unique places

# write
# n_df.to_csv(f'../data/h3cell_output_all.csv',index=False)
# n_df.to_pickle(f'../data/h3cell_output_all.pkl')

1399

In [None]:
n_df['geometry'] = n_df['geometry'].apply(lambda x: ast.literal_eval(x))
n_df['lat'] = n_df['geometry'].apply(lambda x: x['location']['lat'])
n_df['lng'] = n_df['geometry'].apply(lambda x: x['location']['lng'])
n_gdf = n_df.copy()
n_gdf['geometry'] = n_gdf['geometry'].apply(lambda row: shp.geometry.Point(row['location']['lat'],row['location']['lng']))
n_gdf = gpd.GeoDataFrame(n_gdf, geometry='geometry', crs='EPSG:4326')

In [92]:
map_ = folium.Map(location=[n_gdf.lat.mean(),
                            n_gdf.lng.mean()],
        zoom_start=13,
        tiles='cartodbpositron')
for i in range(len(n_gdf)):
    folium.CircleMarker(location=[n_gdf.iloc[i]['lat'], n_gdf.iloc[i]['lng']],
                  popup=n_gdf.iloc[i]['name'],
                  radius=5,
                  fill=True,
                  weight=1,
                  fill_opacity=1).add_to(map_)

In [93]:
map_