In [44]:
import pandas as pd #importing the Pandas Library as 'pd'
from keplergl import KeplerGl #importing KeplerGl
import geopandas as gpd #importing geopandas as 'gpd'
from shapely.geometry import Point
import xml.etree.ElementTree as ET

from geopy.geocoders import Nominatim # Imports the geopy for the geocoding

In [45]:

map = KeplerGl(height=600, width=800)

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [46]:
filename = "Liverpool.geojson"
file = open(filename)
df = gpd.read_file(file)

In [130]:
list(df)

['LSOA11CD',
 'LSOA11NM',
 'LSOA code (2011)',
 'LSOA name (2011)_x',
 'Local Authority District code (2019)_x',
 'Local Authority District name (2019)_x',
 'Total population: mid 2015 (excluding prisoners)',
 'Dependent Children aged 0-15: mid 2015 (excluding prisoners)',
 'Population aged 16-59: mid 2015 (excluding prisoners)',
 'Older population aged 60 and over: mid 2015 (excluding prisoners)',
 'Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners)',
 'LSOA name (2011)_y',
 'Local Authority District code (2019)_y',
 'Local Authority District name (2019)_y',
 'Income Domain numerator',
 'Income Deprivation Affecting Children Index (IDACI) numerator',
 'Income Deprivation Affecting Older People Index (IDAOPI) numerator',
 'Employment Domain numerator',
 'Staying on in education post 16 indicator',
 'Entry to higher education indicator',
 'Adult skills and English language proficiency indicator',
 'Years of potential life lost indicator',
 

In [143]:
metrics=['Income Domain numerator',
 'Income Deprivation Affecting Children Index (IDACI) numerator',
 'Income Deprivation Affecting Older People Index (IDAOPI) numerator',
 'Employment Domain numerator',
 'Staying on in education post 16 indicator',
 'Entry to higher education indicator',
 'Adult skills and English language proficiency indicator',
 'Years of potential life lost indicator',
 'Comparative illness and disability ratio indicator',
 'Acute morbidity indicator',
 'Mood and anxiety disorders indicator',
 'Road distance to a post office indicator (km)',
 'Road distance to a primary school indicator (km)',
 'Road distance to general store or supermarket indicator (km)',
 'Road distance to a GP surgery indicator (km)',
 'Household overcrowding indicator',
 'Homelessness indicator (rate per 1000 households)',
 'Owner-occupation affordability (component of housing affordability indicator)',
 'Private rental affordability (component of housing affordability indicator)',
 'Housing affordability indicator',
 'Housing in poor condition indicator',
 'Houses without central heating indicator',
 'Road traffic accidents indicator',
 'Nitrogen dioxide (component of air quality indicator)',
 'Benzene (component of air quality indicator)',
 'Sulphur dioxide (component of air quality indicator)',
 'Particulates (component of air quality indicator)',
 'Air quality indicator',
        ]

In [142]:
import numpy as np
for elem in metrics:   
    df[elem+"_rank"]=np.ceil(df[elem].rank( pct=True).mul(5)).astype('Int64')
    

In [146]:
df.head(n=5)

# Write out the food outlets location as a geojson for use in the mapbox map
df.to_file("Liverpool.geojson", driver='GeoJSON')

In [48]:
map.add_data(data=df, name="Liverpool_lsoa")


In [49]:
! pip3 install geopy



In [50]:
# Notes on terms
# Select a type polygon
# Select geo as geometry
# Strokes refer to the width of the line
# Fill color changes the color of the polygons
# Edit the 'new layer' to change the names of the
# Press the eye icon in the exported widget to bring the layer to the top


In [51]:
# Initialises the geocoding
geolocator = Nominatim(user_agent="Liverpool_analysis")


In [52]:
# The geocoding function

def long_lat_func(x):
  try:
    location = geolocator.geocode(str(x))
    # Parse the tuple
    return location.latitude , location.longitude
  except:
    return None, None

# Adding in the restaurant locations

In [54]:
# Example of the xml that is being parsed
'''
<Header>
    <ExtractDate>2021-01-27</ExtractDate>
    <ItemCount>3959</ItemCount>
    <ReturnCode>Success</ReturnCode>
  </Header>
  <EstablishmentCollection>
    <EstablishmentDetail>
      <FHRSID>1264123</FHRSID>
      <LocalAuthorityBusinessID>78306</LocalAuthorityBusinessID>
      <BusinessName>@Desserts</BusinessName>
      <BusinessType>Takeaway/sandwich shop</BusinessType>
      <BusinessTypeID>7844</BusinessTypeID>
      <AddressLine1>17, Central Shopping Centre Ranelagh Street</AddressLine1>
      <AddressLine3>Liverpool</AddressLine3>
      <AddressLine4>Liverpool</AddressLine4>
      <PostCode>L1 1QE</PostCode>
      <RatingValue>AwaitingInspection</RatingValue>
      <RatingKey>fhrs_awaitinginspection_en-GB</RatingKey>
      <RatingDate xsi:nil="true"/>
      <LocalAuthorityCode>414</LocalAuthorityCode>
      <LocalAuthorityName>Liverpool</LocalAuthorityName>
      <LocalAuthorityWebSite>http://www.liverpool.gov.uk</LocalAuthorityWebSite>
      <LocalAuthorityEmailAddress>environmental.health@liverpool.gov.uk</LocalAuthorityEmailAddress>
      <Scores/>
      <SchemeType>FHRS</SchemeType>
      <NewRatingPending>False</NewRatingPending>
      <Geocode>
      
      '''

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 6)

In [55]:
tree = ET.parse('Liverpool.xml')
root = tree.getroot()



df_cols = ["LocalAuthorityBusinessID", "BusinessName","BusinessAddress","BusinessType","BusinessTypeID", "RatingKey", "Postcode"]
rows = []

food_outlets_df  = pd.DataFrame(rows, columns = df_cols)

for neighbor in root.iter('EstablishmentCollection'):
   for node in neighbor:
       bus_laid=node.find("LocalAuthorityBusinessID").text  if node is not None else None
       bus_rate=node.find("RatingKey").text if node is not None else None  
       bus_address=""
       try:
        bus_address=bus_address+" "+node.find('AddressLine1').text 
       except:
        bus_address=bus_address     
       try:
        bus_address=bus_address+" "+node.find('AddressLine2').text 
       except:
        bus_address=bus_address 
       try:
        bus_address=bus_address+" "+node.find('AddressLine3').text 
       except:
        bus_address=bus_address
       try:
         bus_postcode=node.find("PostCode").text if node is not None else None
         bus_address=bus_address+" "+bus_postcode+ " "+"UK" 
       except:
          bus_postcode=("missing")    
       bus_type=node.find("BusinessType").text if node is not None else None
       bus_name=node.find("BusinessName").text  if node is not None else None
       bus_type_id=node.find("BusinessTypeID").text if node is not None else None
       try:
         bus_postcode=node.find("PostCode").text if node is not None else None
       except:
          bus_postcode=("missing") 
       rows.append({"LocalAuthorityBusinessID": bus_laid, "BusinessName": bus_name, "BusinessAddress": bus_address,
                  "BusinessType": bus_type,"BusinessTypeID":bus_type_id, "Postcode": bus_postcode, "RatingValue": bus_rate})

food_outlets_df = pd.DataFrame(rows, columns = df_cols)

food_outlets_df.head(n=5)

Unnamed: 0,LocalAuthorityBusinessID,BusinessName,BusinessAddress,BusinessType,BusinessTypeID,RatingKey,Postcode
0,78306,@Desserts,"17, Central Shopping Centre Ranelagh Street L...",Takeaway/sandwich shop,7844,,L1 1QE
1,71900,08 Burger Unit,,Mobile caterer,7846,,missing
2,79155,109 Allerton,109 Allerton Road Mossley Hill Liverpool L18 ...,Restaurant/Cafe/Canteen,1,,L18 2DD
3,78939,14forty,100 Wavertree Boulevard Liverpool L7 9PT UK,Restaurant/Cafe/Canteen,1,,L7 9PT
4,PI/000272350,2 in 1,54 Durning Road Liverpool L7 5NG UK,Takeaway/sandwich shop,7844,,L7 5NG


In [120]:
pd.DataFrame(food_outlets_df["BusinessType"].value_counts())

Unnamed: 0,BusinessType
Restaurant/Cafe/Canteen,775
Retailers - other,549
Takeaway/sandwich shop,539
Pub/bar/nightclub,272
Hospitals/Childcare/Caring Premises,97
School/college/university,56
Other catering premises,52
Hotel/bed & breakfast/guest house,50
Retailers - supermarkets/hypermarkets,49
Mobile caterer,32


In [57]:
# Applies the geocoding function

food_outlets_df["lat"] , food_outlets_df["lon"] = zip(*food_outlets_df["BusinessAddress"].apply(long_lat_func))

In [58]:
food_outlets_df.head(n=5)

Unnamed: 0,LocalAuthorityBusinessID,BusinessName,BusinessAddress,BusinessType,BusinessTypeID,RatingKey,Postcode,lat,lon
0,78306,@Desserts,"17, Central Shopping Centre Ranelagh Street L...",Takeaway/sandwich shop,7844,,L1 1QE,,
1,71900,08 Burger Unit,,Mobile caterer,7846,,missing,,
2,79155,109 Allerton,109 Allerton Road Mossley Hill Liverpool L18 ...,Restaurant/Cafe/Canteen,1,,L18 2DD,53.3818,-2.90548
3,78939,14forty,100 Wavertree Boulevard Liverpool L7 9PT UK,Restaurant/Cafe/Canteen,1,,L7 9PT,53.4057,-2.93406
4,PI/000272350,2 in 1,54 Durning Road Liverpool L7 5NG UK,Takeaway/sandwich shop,7844,,L7 5NG,53.4064,-2.94843


In [128]:
# Subsets to drop the na terms
food_outlets_df=food_outlets_df.dropna(subset=['lon', 'lat'])


# Subsets to drop the na terms
food_outlets_df =food_outlets_df[food_outlets_df["Postcode"]!="Missing"]

#Converts the longitude and latitude as points data to give the geocoordinates for the data frame
geometry = [Point(xy) for xy in zip(food_outlets_df['lon'], food_outlets_df['lat'])]



# Create a GeoDataFrame from art and verify the type
food_outlets_geo = gpd.GeoDataFrame(food_outlets_df, crs = 'epsg:4326' , geometry = geometry)


# Write out the food outlets location as a geojson for use in the mapbox map
food_outlets_geo.to_file("Food_outlets_geo.geojson", driver='GeoJSON')



In [129]:
map.add_data(data=food_outlets_geo, name="food_outlets")

# Adding in the foodbank information

In [60]:
foodbanks=pd.read_csv("Liverpool_foodbanks_geocode_input.csv")

In [107]:
foodbanks.head(n=3)

Unnamed: 0,Category,Company Name,Region,name,Street Address,City,State,Zip,Mobile Number,Website Address,Email,Reviews Average,Count Of Review,full_address,lat,lon,geometry
3,Non-profit organisation,Micah Liverpool,Liverpool,St James Mt Liverpool L1 7AZ,St James MtLiverpool L1 7AZ,St James Mt,LiverpoolL1,7.0,0151 702 7206,micahliverpool.com,hannah.clarke@liverpool.org.uk,,,"St James Mt Liverpool L1 7AZ, Liverpool, UK",53.3943,-2.97483,POINT (-2.97483 53.39430)
5,Cathedral,Liverpool Cathedral,Liverpool,St James Mt Liverpool L1 7AZ,St James MtLiverpool L1 7AZ,St James Mt,LiverpoolL1,7.0,0151 709 6271,liverpoolcathedral.org.uk,jenny.moran@liverpoolcathedral.org.uk,4.7,8503.0,"St James Mt Liverpool L1 7AZ, Liverpool, UK",53.3943,-2.97483,POINT (-2.97483 53.39430)
6,Non-profit organisation,British Red Cross Liverpool,Liverpool,36 Renshaw St Liverpool L1 4EF,36 Renshaw StLiverpool L1 4EF,36 Renshaw St,LiverpoolL1,4.0,0151 707 1074,redcross.org.uk,contactus@redcross.org.uk.,5.0,1.0,"36 Renshaw St Liverpool L1 4EF, Liverpool, UK",53.4039,-2.9773,POINT (-2.97730 53.40389)


In [62]:
# Adjust the coding of the address to make the geocoding more accurate

def complete_address(x):
  if "Bootle" in x:
    return x+", Bootle, UK"
  if "Waterloo" in x:
    return x+", Waterloo, UK"
  if "Speke" in x:
    return x+", Speke, UK"
  else:
    return x+", Liverpool, UK"


foodbanks["full_address"]=foodbanks["name"].apply(complete_address)

In [63]:
# Applies the geocoding function

foodbanks["lat"] , foodbanks["lon"] = zip(*foodbanks["full_address"].apply(long_lat_func))

In [108]:
# Subsets to drop the na terms
foodbanks=foodbanks.dropna(subset=['lon', 'lat'])

# Foodbanks drop Bootle
foodbanks=foodbanks[~foodbanks["full_address"].str.contains('Bootle')]

#Converts the longitude and latitude as points data to give the geocoordinates for the data frame
geometry = [Point(xy) for xy in zip(foodbanks['lon'], foodbanks['lat'])]



# Create a GeoDataFrame from art and verify the type
foodbanks_geo = gpd.GeoDataFrame(foodbanks, crs = 'epsg:4326' , geometry = geometry)


# Foodbanks drop Bootle



In [109]:
# Selects the points that fall within Liverpool

PointInPoly = gpd.sjoin(foodbanks_geo,df, how='left',op='within') 

In [115]:
! ls

[34mFood_map[m[m
Geocoder.R
Liverpool.geojson
Liverpool.html
Liverpool.xml
Liverpool2.html
Liverpool3.html
Liverpool4.html
Liverpool_14_12_20.html
Liverpool_15_12_20.html
Liverpool_foodbanks.csv
Liverpool_foodbanks_geocode_input.csv
Liverpool_foodbanks_geocode_input.csv_geocoded5.rds
Liverpool_foodbanks_geocode_input.xlsm
Liverpool_map.ipynb
[34mcitymap_env[m[m
config.py
hex_config.py
requirements.txt
test_map.html


In [116]:
# Create a GeoDataFrame from art and verify the type
foodbanks_geo = gpd.GeoDataFrame(PointInPoly, crs = 'epsg:4326' , geometry = geometry)


foodbanks_geo.to_file("Foodbanks.geojson", driver='GeoJSON')

In [106]:
map.add_data(data=foodbanks_geo, name="foodbanks")



AttributeError: 'str' object has no attribute '_geom'

# The map

In [68]:
map

KeplerGl(data={'Liverpool_lsoa': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …

In [41]:
# Saves the map to an html format that can be rendered in the browser
map.save_to_html(file_name='Liverpool_15_12_20.html')

Map saved to Liverpool_15_12_20.html!


In [None]:
The map can also be served as a flask app

from flask import Flask

app = Flask(__name__)

@app.route('/')
def index():
    return map_1._repr_html_()

if __name__ == '__main__':
    app.run(debug=True)

In [42]:
# The code below is a way to speed up the geocoding

In [None]:
import multiprocessing
from geopy.geocoders import Nominatim
geocoder = Nominatim()
def worker(address):
    return geocoder.geocode(address)
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
addresses = ['58 Victoria Embankment, London', 'Ferry Road, Edinburgh']
locations = pool.map(worker, addresses)