In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random
import numpy as np

%matplotlib inline

In [2]:
cities = ['adelaide',
'auckland',
'baltimore',
'bangkok',
'barcelona',
'belfast',
'bern',
'chennai',
'mexico_city',
'cologne',
'ghent',
'graz',
'hanoi',
'hong_kong',
'lisbon',
'melbourne',
'odense',
'olomouc',
'sao_paulo',          
'phoenix',
'seattle',
'sydney',
'valencia',
'vic'
         ]

In [3]:
process_folder = '../../process'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]
filenames_filepath = "./filenames.csv"
np.random.seed(24)

In [4]:
for city in cities:
    
    print(f"start {city}")
    
    # Access City-Specific Config
    process_config_path = f"../../process/configuration/{city}.json"
    
    # Retrieve Data Paths
    with open(process_config_path) as json_file:
        config = json.load(json_file)

    input_folder = os.path.join(process_folder, config['folder'])

    gpkg_input = os.path.join(input_folder, config['geopackagePath'])
    
    # Extract Data
    pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )
    
    dests = gpd.read_file(gpkg_input, layer='destinations' )
    
    fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]
    
    gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]

    # Project Data
    crs = gdf_study_area.crs
    if pop.crs != crs:
        pop = pop.to_crs(crs)
    if fresh_food.crs != crs:
        fresh_food = fresh_food.to_crs(crs)
        
    # Clip Data to Study Area
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
    pop_clipped = gpd.clip(pop, study_area)
    fresh_food_clipped = gpd.clip(fresh_food, study_area)
    
    # Create Population Quantiles
    pop_clipped['pop_quintile'] = pd.qcut(pop_clipped['pop_est'], 5, labels=False)
    
    # Join Pop Hexagons and Destinatinos
    joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')
    
    # Remove Values with No Fresh Food Destination
    cleaned_joined_freshfood = joined_freshfood[~joined_freshfood['dest_name_full'].isnull()]
    
    q1_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0].values.tolist()
    q2_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1].values.tolist()
    q3_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2].values.tolist()
    q4_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3].values.tolist()
    q5_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4].values.tolist()
    
    count = 0
    good_quant = 0 
    if len(q1_count_list) < 10:
        q1_dests = len(q1_count_list)
        count = count + len(q1_count_list)
    else:
        q1_dests = 10
        count = count + 10
        good_quant = good_quant + 1

    if len(q2_count_list) < 10:
        q2_dests = len(q2_count_list)
        count = count + len(q2_count_list)
    else:
        q2_dests = 10
        count = count + 10
        good_quant = good_quant + 1

    if len(q3_count_list) < 10:
        q3_dests = len(q3_count_list)
        count = count + len(q3_count_list)
    else:
        q3_dests = 10
        count = count + 10
        good_quant = good_quant + 1

    if len(q4_count_list) < 10:
        q4_dests = len(q4_count_list)
        count = count + len(q4_count_list)
    else:
        q4_dests = 10
        count = count + 10
        good_quant = good_quant + 1

    if len(q5_count_list) < 10:
        q5_dests = len(q5_count_list)
        count = count + len(q5_count_list)
    else:
        q5_dests = 10
        count = count + 10
        good_quant = good_quant + 1

    extra_dests = 50 - count
    extra_dests_per_quant = int(extra_dests / good_quant)

    if extra_dests > 0:
        count_2 = 0
        good_quant_2 = 0 

        if len(q1_count_list) < (10 + extra_dests_per_quant):
            q1_dests = len(q1_count_list)
            count_2 = count_2 + len(q1_count_list)
        else:
            q1_dests = 10 + extra_dests_per_quant
            count_2 = count_2 + q1_dests
            good_quant_2 = good_quant_2 + 1

        if len(q2_count_list) < (10 + extra_dests_per_quant):
            q2_dests = len(q2_count_list)
            count_2 = count_2 + len(q2_count_list)
        else:
            q2_dests = 10 + extra_dests_per_quant
            count_2 = count_2 + q2_dests
            good_quant_2 = good_quant_2 + 1

        if len(q3_count_list) < (10 + extra_dests_per_quant):
            q3_dests = len(q3_count_list)
            count_2 = count_2 + len(q3_count_list)
        else:
            q3_dests = 10 + extra_dests_per_quant
            count_2 = count_2 + q3_dests
            good_quant_2 = good_quant_2 + 1

        if len(q4_count_list) < (10 + extra_dests_per_quant):
            q4_dests = len(q4_count_list)
            count_2 = count_2 + len(q4_count_list)
        else:
            q4_dests = 10 + extra_dests_per_quant
            count_2 = count_2 + q4_dests
            good_quant_2 = good_quant_2 + 1

        if len(q5_count_list) < (10 + extra_dests_per_quant):
            q5_dests = len(q5_count_list)
            count_2 = count_2 + len(q5_count_list)
        else:
            q5_dests = 10 + extra_dests_per_quant + (extra_dests%good_quant)
            count_2 = count_2 + q5_dests
            good_quant_2 = good_quant_2 + 1
        
    q1_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0]
    q2_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1]
    q3_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2]
    q4_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3]
    q5_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4]

    q1_sample_dests = q1_count.sample(q1_dests)
    q2_sample_dests = q2_count.sample(q2_dests)
    q3_sample_dests = q3_count.sample(q3_dests)
    q4_sample_dests = q4_count.sample(q4_dests)
    q5_sample_dests = q5_count.sample(q5_dests)

    sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

    final_sample_dests = pd.concat(sample_dests)
    
    print(f"shape of dataframe for {city} below")
    print(final_sample_dests.shape)

start adelaide
shape of dataframe for adelaide below
(50, 28)
start auckland
shape of dataframe for auckland below
(50, 28)
start baltimore
shape of dataframe for baltimore below
(50, 28)
start bangkok
shape of dataframe for bangkok below
(50, 28)
start barcelona
shape of dataframe for barcelona below
(50, 28)
start belfast
shape of dataframe for belfast below
(50, 28)
start bern
shape of dataframe for bern below
(45, 28)
start chennai
shape of dataframe for chennai below
(50, 28)
start mexico_city
shape of dataframe for mexico_city below
(50, 28)
start cologne
shape of dataframe for cologne below
(50, 28)
start ghent
shape of dataframe for ghent below
(50, 28)
start graz
shape of dataframe for graz below
(48, 27)
start hanoi
shape of dataframe for hanoi below
(50, 28)
start hong_kong
shape of dataframe for hong_kong below
(50, 28)
start lisbon
shape of dataframe for lisbon below
(50, 28)
start melbourne
shape of dataframe for melbourne below
(50, 28)
start odense
shape of dataframe fo

In [None]:
for city in cities:
    
    print(f"start {city}")
    
    # Access City-Specific Config
    process_config_path = f"../../process/configuration/{city}.json"
    
    # Retrieve Data Paths
    with open(process_config_path) as json_file:
        config = json.load(json_file)

    input_folder = os.path.join(process_folder, config['folder'])

    gpkg_input = os.path.join(input_folder, config['geopackagePath'])
    
    # Extract Data
    pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )
    
    dests = gpd.read_file(gpkg_input, layer='destinations' )
    
    fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]
    
    gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]

    # Project Data
    crs = gdf_study_area.crs
    if pop.crs != crs:
        pop = pop.to_crs(crs)
    if fresh_food.crs != crs:
        fresh_food = fresh_food.to_crs(crs)
        
    # Clip Data to Study Area
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
    pop_clipped = gpd.clip(pop, study_area)
    fresh_food_clipped = gpd.clip(fresh_food, study_area)
    
    # Create Population Quantiles
    pop_clipped['pop_quintile'] = pd.qcut(pop_clipped['pop_est'], 5, labels=False)
    
    # Join Pop Hexagons and Destinatinos
    joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')
    
    # Remove Values with No Fresh Food Destination
    cleaned_joined_freshfood = joined_freshfood[~joined_freshfood['dest_name_full'].isnull()]
    
    q1_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0].values.tolist()
    q2_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1].values.tolist()
    q3_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2].values.tolist()
    q4_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3].values.tolist()
    q5_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4].values.tolist()
    
    count = 0
    if len(q1_count_list) < 10:
        q1_sampledeficit = 10 - len(q1_count_list)
        if q1_sampledeficit % 4 == 0:
            count = count
        else:
            count = count + (q1_sampledeficit % 4)
        extra_q1 = (int(q1_sampledeficit/4))
        q1_dests = len(q1_count_list) 
    else:
        extra_q1 = 0
        q1_dests = 10
    if len(q2_count_list) < (extra_q1 + 10):
        q2_sampledeficit = (extra_q1 + 10) - len(q2_count_list)
        if q2_sampledeficit % 3 == 0:
            count = count
        else:
            count = count +(q2_sampledeficit % 3)
        extra_q2 = (int(q2_sampledeficit/3))
        q2_dests = len(q2_count_list)
    else:
        extra_q2 = 0
        q2_dests = 10 + extra_q1
    if len(q3_count_list) < (extra_q1 + extra_q2 + 10):
        q3_sampledeficit = (extra_q1 + extra_q2 + 10) - len(q3_count_list)
        if q3_sampledeficit % 2 == 0:
            count = count
        else:
            count = count + (q3_sampledeficit % 2)
        extra_q3 = (int(q3_sampledeficit/2))
        q3_dests = len(q3_count_list)
    else:
        extra_q3 = 0
        q3_dests = 10 + extra_q1 + extra_q2
    if len(q4_count_list) < (extra_q1 + extra_q2 + extra_q3 + 10):
        q4_sampledeficit = (extra_q1 + extra_q2 + extra_q3 + 10) - len(q4_count_list)
        extra_q4 = (int(q4_sampledeficit))
        q4_dests = len(q4_count_list)
    else:
        extra_q4 = 0
        q4_dests = 10 + extra_q1 + extra_q2 + extra_q3
    if len(q5_count_list) < (extra_q1 + extra_q2 + extra_q3 + extra_q4 + 10):
        print("fewer than 50 destinations exist")
    else:
        q5_dests = 10 + extra_q1 + extra_q2 + extra_q3 + extra_q4 + count
        
    q1_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0]
    q2_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1]
    q3_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2]
    q4_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3]
    q5_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4]

    q1_sample_dests = q1_count.sample(q1_dests)
    q2_sample_dests = q2_count.sample(q2_dests)
    q3_sample_dests = q3_count.sample(q3_dests)
    q4_sample_dests = q4_count.sample(q4_dests)
    q5_sample_dests = q5_count.sample(q5_dests)

    sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

    final_sample_dests = pd.concat(sample_dests)
    
    print(f"shape of dataframe for {city} below")
    print(final_sample_dests.shape)

In [None]:
filenames = {}
for city in cities:
    
    print(f"start {city}")
    
    # Access City-Specific Config
    process_config_path = f"../../process/configuration/{city}.json"
    
    # Retrieve Data Paths
    with open(process_config_path) as json_file:
        config = json.load(json_file)

    input_folder = os.path.join(process_folder, config['folder'])

    gpkg_input = os.path.join(input_folder, config['geopackagePath'])
    
    # Extract Data
    pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )
    
    dests = gpd.read_file(gpkg_input, layer='destinations' )
    
    fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]
    
    gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]

    # Project Data
    crs = gdf_study_area.crs
    if pop.crs != crs:
        pop = pop.to_crs(crs)
    if fresh_food.crs != crs:
        fresh_food = fresh_food.to_crs(crs)
        
    # Clip Data to Study Area
    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
    pop_clipped = gpd.clip(pop, study_area)
    fresh_food_clipped = gpd.clip(fresh_food, study_area)
    
    # Create Population Quantiles
    pop_clipped['pop_quintile'] = pd.qcut(pop_clipped['pop_est'], 5, labels=False)
    
    # Join Pop Hexagons and Destinatinos
    joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')
    
    # Remove Values with No Fresh Food Destination
    cleaned_joined_freshfood = joined_freshfood[~joined_freshfood['dest_name_full'].isnull()]
    
    q1_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0].values.tolist()
    q2_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1].values.tolist()
    q3_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2].values.tolist()
    q4_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3].values.tolist()
    q5_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4].values.tolist()
    
    count = 0
    if len(q1_count_list) < 10:
        q1_sampledeficit = 10 - len(q1_count_list)
        if q1_sampledeficit % 4 == 0:
            count = count
        else:
            count = count + (q1_sampledeficit % 4)
        extra_q1 = (int(q1_sampledeficit/4))
        q1_dests = len(q1_count_list) 
    else:
        extra_q1 = 0
        q1_dests = 10
    if len(q2_count_list) < (extra_q1 + 10):
        q2_sampledeficit = (extra_q1 + 10) - len(q2_count_list)
        if q2_sampledeficit % 3 == 0:
            count = count
        else:
            count = count +(q2_sampledeficit % 3)
        extra_q2 = (int(q2_sampledeficit/3))
        q2_dests = len(q2_count_list)
    else:
        extra_q2 = 0
        q2_dests = 10 + extra_q1
    if len(q3_count_list) < (extra_q1 + extra_q2 + 10):
        q3_sampledeficit = (extra_q1 + extra_q2 + 10) - len(q3_count_list)
        if q3_sampledeficit % 2 == 0:
            count = count
        else:
            count = count + (q3_sampledeficit % 2)
        extra_q3 = (int(q3_sampledeficit/2))
        q3_dests = len(q3_count_list)
    else:
        extra_q3 = 0
        q3_dests = 10 + extra_q1 + extra_q2
    if len(q4_count_list) < (extra_q1 + extra_q2 + extra_q3 + 10):
        q4_sampledeficit = (extra_q1 + extra_q2 + extra_q3 + 10) - len(q4_count_list)
        extra_q4 = (int(q4_sampledeficit))
        q4_dests = len(q4_count_list)
    else:
        extra_q4 = 0
        q4_dests = 10 + extra_q1 + extra_q2 + extra_q3
    if len(q5_count_list) < (extra_q1 + extra_q2 + extra_q3 + extra_q4 + 10):
        print("fewer than 50 destinations exist")
    else:
        q5_dests = 10 + extra_q1 + extra_q2 + extra_q3 + extra_q4 + count
        
    q1_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0]
    q2_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1]
    q3_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2]
    q4_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3]
    q5_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4]

    q1_sample_dests = q1_count.sample(q1_dests)
    q2_sample_dests = q2_count.sample(q2_dests)
    q3_sample_dests = q3_count.sample(q3_dests)
    q4_sample_dests = q4_count.sample(q4_dests)
    q5_sample_dests = q5_count.sample(q5_dests)

    sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

    final_sample_dests = pd.concat(sample_dests)
    
    final_sample_dests = final_sample_dests.to_crs({'init': 'epsg:4326'})
    
    final_sample_dests['lat'] = final_sample_dests.geometry.y
    final_sample_dests['lon'] = final_sample_dests.geometry.x
    
    # Create Excel
    
    print(ox.ts(), f"add {city} to excel")
    destination = {}
    
    for dest in final_sample_dests:
        city_name = {city}
        hexagon_pop_quantile = final_sample_dests['pop_quintile']
        latitude = final_sample_dests['lat']
        longitude = final_sample_dests['lon']
        google_maps_screenshot = f"{latitude}_{longitude}_{city}_google_maps_image"
        google_satellite_screenshot = f"{latitude}_{longitude}_{city}_google_satellite_image"
        google_street_view_screenshot = f"{latitude}_{longitude}_{city}_google_streetview_image"

        # calculate total street length and edge count in each dataset, then add to indicators
        filenames[destination]["City_Name"] = city_name
        filenames[destination]["Hexagon_Pop_Quintile"] = hexagon_pop_quantile
        filenames[destination]["Latitude"] = latitude
        filenames[destination]["Longitude"] = longitude
        filenames[destination]["Google_Maps_Date"] = ""
        filenames[destination]["Google_Maps_Screenshot"] = google_maps_screenshot
        filenames[destination]["Google_Satellite_Date"] = ""
        filenames[destination]["Google_Satellite_Screenshot"] = google_satellite_screenshot
        filenames[destination]["Google_Street_View_Date"] = ""
        filenames[destination]["Google_Street_View_Screenshot"] = google_street_view_screenshot
        filenames[destination]["Assessment"] = ""
        print(ox.ts(), f"finshed names for {city}")

# turn indicators into a dataframe and save to disk
df_filenames = pd.DataFrame(filenames).T
df_filenames.to_csv(filenames_filepath, index=True, encoding="utf-8")
print(ox.ts(), f'all done, saved filenames to disk at "{filenames_filepath}"')