In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random

%matplotlib inline

In [2]:
cities = ['adelaide',
'auckland',
'baltimore',
'bangkok',
'barcelona',
'belfast',
'bern',
'chennai',
'mexico_city',
'cologne',
'ghent',
'graz',
'hanoi',
'hong_kong',
'lisbon',
'melbourne',
'odense',
'olomouc',
'sao_paulo',          
'phoenix',
'seattle',
'sydney',
'valencia',
'vic'
         ]

In [6]:
process_folder = '../../process'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]

In [14]:
selected_destinations = {}
for city in cities:
    # Access City-Specific Config
    process_config_path = f"../../process/configuration/{city}.json"
    
    # Retrieve Data Paths
    with open(process_config_path) as json_file:
        config = json.load(json_file)

    input_folder = os.path.join(process_folder, config['folder'])

    gpkg_input = os.path.join(input_folder, config['geopackagePath'])
    
    # Extract Data
    pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )
    
    dests = gpd.read_file(gpkg_input, layer='destinations' )
    
    fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]
    
    gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]

    # Project Data
    crs = gdf_study_area.crs
    if pop.crs != crs:
        pop = pop.to_crs(crs)
    if fresh_food.crs != crs:
        fresh_food = fresh_food.to_crs(crs)
        
    # Clip Data to Study Area
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
    pop_clipped = gpd.clip(pop, study_area)
    fresh_food_clipped = gpd.clip(fresh_food, study_area)
    
    # Create Population Quantiles
    pop_clipped['pop_quintile'] = pd.qcut(pop_clipped['pop_est'], 5, labels=False)
    
    # Join Pop Hexagons and Destinatinos
    joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='right', op='within')
    
    # Remove Values with No Fresh Food Destination
    cleaned_joined_freshfood = joined_freshfood[~joined_freshfood['dest_name_full'].isnull()]
    
    # Set a Seed and Randomly Select 10 Destinations from each Quintile for each City
    random.seed(24)
    final_destiations = cleaned_joined_freshfood.sample(frac=1).groupby('pop_quintile', sort=False).head(10)
    
    print(ox.ts(), f"calculated destination for {city}")
    
    selected_destinations[f"final_destiations_{city}"] = final_destiations
    
df_sds = pd.DataFrame(selected_destinations).T

2020-10-19 16:28:48 calculated destination for adelaide
2020-10-19 16:54:26 calculated destination for auckland
2020-10-19 16:55:02 calculated destination for baltimore
2020-10-19 16:57:56 calculated destination for bangkok
2020-10-19 16:59:46 calculated destination for barcelona
2020-10-19 17:00:14 calculated destination for belfast
2020-10-19 17:00:17 calculated destination for bern
2020-10-19 17:00:44 calculated destination for chennai
2020-10-19 17:06:49 calculated destination for mexico_city
2020-10-19 17:08:18 calculated destination for cologne
2020-10-19 17:08:28 calculated destination for ghent
2020-10-19 17:08:35 calculated destination for graz
2020-10-19 17:12:18 calculated destination for hanoi
2020-10-19 17:27:42 calculated destination for hong_kong
2020-10-19 17:27:58 calculated destination for lisbon
2020-10-19 17:32:44 calculated destination for melbourne
2020-10-19 17:33:09 calculated destination for odense
2020-10-19 17:33:10 calculated destination for olomouc
2020-10-

ValueError: If using all scalar values, you must pass an index

In [13]:
df_sds

NameError: name 'df_sds' is not defined