In [2]:
# Import required modules
import json
import folium
import os
import pandas as pd

from math import cos, asin, sqrt
#import io
#from PIL import Image
#from geopy.distance import geodesic

In [12]:
# Load the external files
geojson = os.path.join("./projects/geo_data/calgary.json")
attendance = os.path.join("./projects/geo_data/DecemberAttendance.csv")

In [7]:
# Load the file in Python
with open(geojson) as data_file:    
    data = json.load(data_file) 
    
# Convert to dataframe
df = pd.json_normalize(data, 'features', ['properties',], errors='ignore',
                       record_prefix='locations_')

In [14]:
# Select only locations_properties.name and locations_geometry.coordinates from df
clean_df = df[['locations_properties.name', 'locations_geometry.coordinates']]

# Convert to dictionary
clean_df_dict = clean_df.set_index('locations_properties.name').T.to_dict('list')

# Convert each locations coordinates entries to a seperate column with the location as a matching column
data = pd.DataFrame([])
new_df = pd.DataFrame([])
counter = 0

for x in clean_df['locations_properties.name']:
    data = pd.DataFrame(clean_df['locations_geometry.coordinates'][counter][0][0], columns=['lon', 'lat'])
    data['location'] = clean_df['locations_properties.name'][counter]
    new_df = new_df.append(data)
    counter += 1

# Separate these out into a new dictionary, as well as a list of just neighbourhood names    
new_df = new_df[['lat', 'lon', 'location']]
new_dictionary = new_df.to_dict('records')
new_dict_list = new_df['location'].tolist()

#Using our attendance file, convert to a dataframe and then a dictionary:

fields = ['lat', 'lon', 'attendance']
attendance_df = pd.read_csv(attendance, usecols=fields)
attendance_df["lon"] = pd.to_numeric(attendance_df["lon"])
attendance_dict = attendance_df.to_dict('records')

In [17]:
def distance(lat1, lon1, lat2, lon2):
    p = 0.017453292519943295
    hav = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
    return 12742 * asin(sqrt(hav))

def closest(data, v):
    return min(data, key=lambda p: distance(v['lat'],v['lon'],p['lat'],p['lon']))

#Define a temporary list and empty array to map the results to:

tempDataList = new_dictionary

places = []
places = (map (lambda x: (closest(tempDataList, x)['location']) , attendance_dict))
places = [*places]

#Insert as a new column in our attendance_df dataframe:

attendance_df['location'] = places

In [20]:
# Find the values that don't exist in attendance_df

to_zero = set(new_dict_list).difference(places)
to_zero = list(to_zero)

# Set to a new df with lat and long to 0

to_zero_df = pd.DataFrame([])
to_zero_df['lat'] = '0'
to_zero_df['lon'] = '0'
to_zero_df['location']=to_zero

# Concat the two
total_attendance_df = pd.concat([attendance_df, to_zero_df])

# Group by location and attendance
plot_df = total_attendance_df.groupby("location")["attendance"].sum()

In [22]:
geo_map = folium.Map(location=[51.0447, -114.0719], zoom_start=10)

folium.Choropleth(geo_data=geojson,
    data = plot_df,
    columns=['location','attendance'],
    key_on='feature.properties.name',
    fill_color='YlGnBu',
    fill_opacity=0.8, 
    line_opacity=0.7,
    legend_name='Q4 2022 Attendance').add_to(geo_map)
folium.TileLayer('cartodbpositron').add_to(geo_map)

geo_map