In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random

In [2]:
ol_osm_buffer_gpkg_path = "data/olomouc/olomouc_cz_2019_1600m_buffer.gpkg"
ol_official_dests_filepath = "data/olomouc/olomouc_supermarkets_2018"
ol_destinations_column = "OBJECTID"
ol_destinations_values = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63"]
crs = {'init': 'epsg:4326'}

In [3]:
# load the study area boundary as a shapely (multi)polygon
ol_gdf_study_area = gpd.read_file(ol_osm_buffer_gpkg_path, layer="urban_study_region")
ol_study_area = ol_gdf_study_area["geometry"].iloc[0]
print(ox.ts(), "loaded study area boundary")

# load the entire geopackage
ol_geopackage = gpd.read_file(ol_osm_buffer_gpkg_path)

# load the official destinations shapefile
# retain only rows with desired values in the destinations column
ol_gdf_official_destinations = gpd.read_file(ol_official_dests_filepath)
ol_mask = ol_gdf_official_destinations[ol_destinations_column].isin(ol_destinations_values)
ol_gdf_official_destinations = ol_gdf_official_destinations[ol_mask]
print(ox.ts(), "loaded and filtered official destinations shapefile")

# load the osm destinations shapefile
ol_gdf_osm = gpd.read_file(ol_osm_buffer_gpkg_path, layer="destinations")
ol_gdf_osm_destinations = ol_gdf_osm[ol_gdf_osm["dest_name"] == "fresh_food_market"]
print(ox.ts(), "loaded osm destinations shapefile")

# project the data to a common crs
ol_crs = ol_gdf_study_area.crs
if ol_geopackage.crs != ol_crs:
    ol_geopackage = ol_geopackage.to_crs(ol_crs)
    print(ox.ts(), "projected geopackage")
if ol_gdf_official_destinations.crs != ol_crs:
    ol_gdf_official_destinations = ol_gdf_official_destinations.to_crs(ol_crs)
    print(ox.ts(), "projected official destinations")
if ol_gdf_osm_destinations.crs != ol_crs:
    ol_gdf_osm_destinations = ol_gdf_osm_destinations.to_crs(ol_crs)
    print(ox.ts(), "projected osm destinations")

# spatially clip the destinationss to the study area boundary
import warnings

warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
ol_gdf_osm_destinations_clipped = gpd.clip(ol_gdf_osm_destinations, ol_study_area)
ol_gdf_official_destinations_clipped = gpd.clip(ol_gdf_official_destinations, ol_study_area)
print(ox.ts(), "clipped osm/official destinations to study area boundary")

# double-check everything has same CRS, then return
assert (
    ol_gdf_study_area.crs
    == ol_geopackage.crs
    == ol_gdf_osm_destinations_clipped.crs
    == ol_gdf_official_destinations_clipped.crs
)
# returns ol_study_area, ol_geopackage, ol_gdf_osm_destinations_clipped, ol_gdf_official_destinations_clipped

2020-09-28 00:52:14 loaded study area boundary
2020-09-28 00:52:15 loaded and filtered official destinations shapefile
2020-09-28 00:52:15 loaded osm destinations shapefile
2020-09-28 00:52:15 projected official destinations
2020-09-28 00:52:15 clipped osm/official destinations to study area boundary


In [4]:
ol_gdf_osm_destinations_clipped = ol_gdf_osm_destinations_clipped.to_crs(crs)
ol_gdf_official_destinations_clipped = ol_gdf_official_destinations_clipped.to_crs(crs)

  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


In [5]:
ol_gdf_osm_destinations_clipped = ol_gdf_osm_destinations_clipped.reset_index()
print(ol_gdf_osm_destinations_clipped.shape)

(60, 6)


In [6]:
# Randomly select 20 destinations from the destinations derived from the OSM dataset for Olomouc
# The first 10 destinations were selected using a random seed (random.choices), and therefore are not replicable, they are listed below
# This cell finds 10 additional unique destiations
# The 10 destinations that were found from a random seed are
# 22, 49, 51, 57, 47, 48, 5, 36, 3, 17 
random.seed(3)   
ol_osm_set = set()
for dest in range(11):
    dest = random.randint(0,60) 
    ol_osm_set.add(dest)
print(ol_osm_set)

{34, 4, 37, 38, 8, 40, 15, 23, 58, 30}


In [7]:
print(ol_gdf_osm_destinations_clipped.loc[3, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[4, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[5, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[8, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[15, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[17, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[22, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[23, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[30, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[34, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[36, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[37, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[38, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[40, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[47, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[48, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[49, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[51, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[57, "geometry"])
print(ol_gdf_osm_destinations_clipped.loc[58, "geometry"])

POINT (17.247804 49.57584719999998)
POINT (17.2369917 49.58190269999999)
POINT (17.2494973 49.5746043)
POINT (17.2494893 49.57467209999999)
POINT (17.2521229 49.5922138)
POINT (17.2582162 49.58705129999999)
POINT (17.2484482 49.59638829999999)
POINT (17.2437152 49.60243169999999)
POINT (17.2511981 49.59444139999999)
POINT (17.2500296 49.59516049999999)
POINT (17.2578628 49.601745)
POINT (17.2690025 49.59485)
POINT (17.2667728 49.5954673)
POINT (17.2719467 49.5883662)
POINT (17.2780985 49.5923773)
POINT (17.2728728 49.59919669999999)
POINT (17.2783607 49.6016576)
POINT (17.23491530227623 49.59111024959994)
POINT (17.27325686072833 49.59914789669003)
POINT (17.28454369789085 49.59358171700822)


In [8]:
ol_gdf_official_destinations_clipped = ol_gdf_official_destinations_clipped.reset_index()
print(ol_gdf_official_destinations_clipped.shape)

(50, 6)


In [23]:
# Randomly select 20 destinations from the destinations derived from the official dataset for Olomouc
# The first 10 destinations were selected using a random seed (random.choices), and therefore are not replicable, they are listed below
# This cell finds 10 additional unique destiations
# The 10 destinations that were found from a random seed are
# 15, 47, 4, 14, 17, 39, 23, 29, 40, 8
random.seed(6)   
ol_off_set = set()
for dest in range(10):
    dest = random.randint(0,49)
    ol_off_set.add(dest)
print(ol_off_set)

{0, 2, 36, 5, 37, 9, 42, 48, 16, 31}


In [24]:
print(ol_gdf_official_destinations_clipped.loc[0, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[2, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[4, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[5, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[8, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[9, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[14, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[15, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[16, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[17, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[23, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[29, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[31, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[36, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[37, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[39, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[40, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[42, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[47, "geometry"])
print(ol_gdf_official_destinations_clipped.loc[48, "geometry"])

POINT (17.2485335602552 49.60071812234299)
POINT (17.24561824232677 49.59735297747774)
POINT (17.2491618074927 49.59315871639684)
POINT (17.22909333527297 49.5929938283056)
POINT (17.27325348973117 49.59919483208895)
POINT (17.27182509764802 49.58839795008866)
POINT (17.27856878233937 49.59892950049041)
POINT (17.27856878233937 49.59892950049041)
POINT (17.28460614162956 49.59350857042033)
POINT (17.28460614162956 49.59350857042033)
POINT (17.27140486011946 49.58313360097501)
POINT (17.2285914589908 49.59758264132959)
POINT (17.23485561128411 49.59112131177946)
POINT (17.24187669285135 49.59497907362115)
POINT (17.23517431667831 49.57563574595006)
POINT (17.25667615144096 49.57687776846497)
POINT (17.25667615144096 49.57687776846497)
POINT (17.25016263433868 49.59497598113982)
POINT (17.24674659832209 49.58128741594175)
POINT (17.24847334435308 49.58220919421118)


# Sao Paulo

In [13]:
sp_osm_buffer_gpkg_path = "data/sao_paulo/sao_paulo_br_2019_1600m_buffer.gpkg"
sp_official_dests_filepath = "data/sao_paulo/SP_Freshfood_merged"
sp_destinations_column = "eq_classe"
sp_destinations_values = ["MERCADOS MUNICIPAIS", "SACOLÃƒO"]

In [14]:
# load the study area boundary as a shapely (multi)polygon
sp_gdf_study_area = gpd.read_file(sp_osm_buffer_gpkg_path, layer="urban_study_region")
sp_study_area = sp_gdf_study_area["geometry"].iloc[0]
print(ox.ts(), "loaded study area boundary")

# load the entire geopackage
sp_geopackage = gpd.read_file(sp_osm_buffer_gpkg_path)

# load the official destinations shapefile
# retain only rows with desired values in the destinations column
sp_gdf_official_destinations = gpd.read_file(sp_official_dests_filepath)
sp_mask = sp_gdf_official_destinations[sp_destinations_column].isin(sp_destinations_values)
sp_gdf_official_destinations = sp_gdf_official_destinations[sp_mask]
print(ox.ts(), "loaded and filtered official destinations shapefile")

# load the osm destinations shapefile
sp_gdf_osm = gpd.read_file(sp_osm_buffer_gpkg_path, layer="destinations")
sp_gdf_osm_destinations = sp_gdf_osm[sp_gdf_osm["dest_name"] == "fresh_food_market"]
print(ox.ts(), "loaded osm destinations shapefile")

# project the data to a common crs
sp_crs = sp_gdf_study_area.crs
if sp_geopackage.crs != sp_crs:
    sp_geopackage = sp_geopackage.to_crs(sp_crs)
    print(ox.ts(), "projected geopackage")
if sp_gdf_official_destinations.crs != sp_crs:
    sp_gdf_official_destinations = sp_gdf_official_destinations.to_crs(sp_crs)
    print(ox.ts(), "projected official destinations")
if sp_gdf_osm_destinations.crs != sp_crs:
    sp_gdf_osm_destinations = sp_gdf_osm_destinations.to_crs(sp_crs)
    print(ox.ts(), "projected osm destinations")

# spatially clip the destinationss to the study area boundary
import warnings

warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
sp_gdf_osm_destinations_clipped = gpd.clip(sp_gdf_osm_destinations, sp_study_area)
sp_gdf_official_destinations_clipped = gpd.clip(sp_gdf_official_destinations, sp_study_area)
print(ox.ts(), "clipped osm/official destinations to study area boundary")

# double-check everything has same CRS, then return
assert (
    sp_gdf_study_area.crs
    == sp_geopackage.crs
    == sp_gdf_osm_destinations_clipped.crs
    == sp_gdf_official_destinations_clipped.crs
)
# returns sp_study_area, sp_geopackage, sp_gdf_osm_destinations_clipped, sp_gdf_official_destinations_clipped

2020-09-28 01:09:22 loaded study area boundary
2020-09-28 01:09:29 loaded and filtered official destinations shapefile
2020-09-28 01:09:30 loaded osm destinations shapefile
2020-09-28 01:09:30 projected official destinations
2020-09-28 01:09:30 clipped osm/official destinations to study area boundary


In [15]:
sp_gdf_osm_destinations_clipped = sp_gdf_osm_destinations_clipped.to_crs(crs)
sp_gdf_official_destinations_clipped = sp_gdf_official_destinations_clipped.to_crs(crs)

  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


In [16]:
sp_gdf_osm_destinations_clipped = sp_gdf_osm_destinations_clipped.reset_index()
print(sp_gdf_osm_destinations_clipped.shape)

(1562, 6)


In [17]:
# Randomly select 20 destinations from the destinations derived from the OSM dataset for Sao Paulo
# The first 10 destinations were selected using a random seed (random.choices), and therefore are not replicable, they are listed below
# This cell finds 10 additional unique destiations
# The 10 destinations that were found from a random seed are
# 475, 1269, 179, 808, 440, 19, 686, 289, 708, 1063
random.seed(1)   
sp_osm_set = set()
for dest in range(10):
    dest = random.randint(0,1562)
    sp_osm_set.add(dest)
print(sp_osm_set)

{129, 967, 522, 1165, 241, 275, 1014, 1558, 920, 1334}


In [18]:
print(sp_gdf_osm_destinations_clipped.loc[19, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[129, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[179, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[241, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[275, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[289, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[440, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[475, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[522, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[686, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[708, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[808, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[920, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[967, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1014, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1063, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1165, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1269, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1334, "geometry"])
print(sp_gdf_osm_destinations_clipped.loc[1558, "geometry"])

POINT (-46.7819685 -23.6580963)
POINT (-46.7048266 -23.6111952)
POINT (-46.6923831 -23.5672164)
POINT (-46.6360213 -23.61760680000001)
POINT (-46.6047713 -23.63076639999999)
POINT (-46.6402969 -23.5974519)
POINT (-46.58094 -23.59129689999999)
POINT (-46.4819764 -23.60342589999999)
POINT (-46.6704075 -23.53567250000001)
POINT (-46.5953604 -23.5502743)
POINT (-46.6176659 -23.52998279999998)
POINT (-46.6218692 -23.4815398)
POINT (-46.5815645 -23.48931629999999)
POINT (-46.4541466 -23.5147727)
POINT (-46.77058315000711 -23.68497145002759)
POINT (-46.69984489874574 -23.65016003306899)
POINT (-46.61851197964082 -23.61899347715126)
POINT (-46.69708051161243 -23.49417908854859)
POINT (-46.62970289984809 -23.54206214767827)
POINT (-46.38775618882573 -23.52180978440611)


In [19]:
sp_gdf_official_destinations_clipped = sp_gdf_official_destinations_clipped.reset_index()
print(sp_gdf_official_destinations_clipped.shape)

(34, 24)


In [20]:
# Randomly select 20 destinations from the destinations derived from the Official dataset for Sao Paulo
# The first 10 destinations were selected using a random seed (random.choices), and therefore are not replicable, they are listed below
# This cell finds 10 additional unique destiations
# The 10 destinations that were found from a random seed are
# 20, 17, 28, 7, 29, 26, 11, 24, 30, 6
random.seed(2)   
sp_off_set = set()
for dest in range(12):
    dest = random.randint(0,34)
    sp_off_set.add(dest)
print(sp_off_set)

{2, 3, 5, 10, 13, 16, 19, 23, 25, 27}


In [21]:
print(sp_gdf_official_destinations_clipped.loc[2, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[3, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[5, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[6, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[7, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[10, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[11, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[13, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[16, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[17, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[19, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[20, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[23, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[24, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[25, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[26, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[27, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[28, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[29, "geometry"])
print(sp_gdf_official_destinations_clipped.loc[30, "geometry"])

POINT (-46.69248703101302 -23.56565530129838)
POINT (-46.43968056681558 -23.49357070584416)
POINT (-46.60040132979196 -23.5972277401266)
POINT (-46.54565477619595 -23.56641375048255)
POINT (-46.61189898025206 -23.48147023661034)
POINT (-46.70008929237225 -23.65058769324573)
POINT (-46.41081158721061 -23.54280009501386)
POINT (-46.52262365415542 -23.5968509429958)
POINT (-46.76067057259279 -23.57081716163365)
POINT (-46.76058029229463 -23.48934579771526)
POINT (-46.74230177310341 -23.6736266747205)
POINT (-46.69794971612233 -23.47434591257144)
POINT (-46.43912461284087 -23.49429175409668)
POINT (-46.77614003675547 -23.66768654891171)
POINT (-46.67479183245619 -23.56469638564992)
POINT (-46.64681055100298 -23.55100540556737)
POINT (-46.64558758305883 -23.56347946547715)
POINT (-46.69818786150953 -23.51602109851548)
POINT (-46.70038334358041 -23.65101669679036)
POINT (-46.73817144274712 -23.58792930436906)
