In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random
import numpy as np

%matplotlib inline

In [2]:
cities = ['adelaide',
'auckland',
'baltimore',
'bangkok',
'barcelona',
'belfast',
'bern',
'chennai',
'mexico_city',
'cologne',
'ghent',
'graz',
'hanoi',
'hong_kong',
'lisbon',
'melbourne',
'odense',
'olomouc',
'sao_paulo',          
'phoenix',
'seattle',
'sydney',
'valencia',
'vic'
         ]

In [3]:
process_folder = '../../process'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]
filenames_filepath = "./groundtruthing.csv"
np.random.seed(24)

In [4]:
filenames = {}

for city in cities:
    
    print(f"start {city}")

    process_config_path = f"../../process/configuration/{city}.json"

    with open(process_config_path) as json_file:
        config = json.load(json_file)

    input_folder = os.path.join(process_folder, config['folder'])

    gpkg_input = os.path.join(input_folder, config['geopackagePath'])

    pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )

    dests = gpd.read_file(gpkg_input, layer='destinations' )

    fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]

    gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
    study_area = gdf_study_area["geometry"].iloc[0]

    crs = gdf_study_area.crs
    if pop.crs != crs:
        pop = pop.to_crs(crs)
    if fresh_food.crs != crs:
        fresh_food = fresh_food.to_crs(crs)

    import warnings

    warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
    pop_clipped = gpd.clip(pop, study_area)
    fresh_food_clipped = gpd.clip(fresh_food, study_area)

    joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')

    ordered_joined_freshfood = joined_freshfood.sort_values('pop_est')

    split_joined_freshfood = np.array_split(ordered_joined_freshfood, 5)

    q1_dests = (split_joined_freshfood[0])
    q2_dests = (split_joined_freshfood[1])
    q3_dests = (split_joined_freshfood[2])
    q4_dests = (split_joined_freshfood[3])
    q5_dests = (split_joined_freshfood[4])

    q1_dests['quantile'] = 1
    q2_dests['quantile'] = 2
    q3_dests['quantile'] = 3
    q4_dests['quantile'] = 4
    q5_dests['quantile'] = 5

    q1_sample_dests = q1_dests.sample(10)
    q2_sample_dests = q2_dests.sample(10)
    q3_sample_dests = q3_dests.sample(10)
    q4_sample_dests = q4_dests.sample(10)
    q5_sample_dests = q5_dests.sample(10)

    sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

    final_sample_dests = pd.concat(sample_dests)

    final_sample_dests = final_sample_dests.to_crs({'init': 'epsg:4326'})

    final_sample_dests['lat'] = final_sample_dests.geometry.y
    final_sample_dests['lon'] = final_sample_dests.geometry.x
    
    final_sample_dests = final_sample_dests.set_index('osm_id')
    
    
    print(f"{city} shape below")
    print(final_sample_dests.shape)

    for index, row in final_sample_dests.iterrows():
        filenames[index] = {}

        city_name = city
        hexagon_pop_quantile = row['quantile']
        latitude = row['lat']
        longitude = row['lon']
        google_maps_screenshot = f"{latitude}_{longitude}_{city}_google_maps_image"
        google_satellite_screenshot = f"{latitude}_{longitude}_{city}_google_satellite_image"
        google_street_view_screenshot = f"{latitude}_{longitude}_{city}_google_street_view_image"

        # calculate total street length and edge count in each dataset, then add to indicators
        filenames[index]["Hexagon_Pop_Quintile"] = hexagon_pop_quantile
        filenames[index]["City_Name"] = city_name
        filenames[index]["Latitude"] = latitude
        filenames[index]["Longitude"] = longitude
        filenames[index]["Google_Maps_Date"] = ""
        filenames[index]["Google_Maps_Screenshot"] = google_maps_screenshot
        filenames[index]["Google_Satellite_Date"] = ""
        filenames[index]["Google_Satellite_Screenshot"] = google_satellite_screenshot
        filenames[index]["Google_Street_View_Date"] = ""
        filenames[index]["Google_Street_View_Screenshot"] = google_street_view_screenshot
        filenames[index]["Assessment"] = ""
        filenames[index]["Comments"] = ""

    print(ox.ts(), f"finshed names for {city}")

# turn indicators into a dataframe and save to disk
df_filenames = pd.DataFrame(filenames).T
df_filenames.to_csv(filenames_filepath, index=True, encoding="utf-8")
print(ox.ts(), f'all done, saved filenames to disk at "{filenames_filepath}"')


start adelaide


  return _prepare_from_string(" ".join(pjargs))


adelaide shape below
(50, 29)
2020-11-02 04:32:31 finshed names for adelaide
start auckland


  return _prepare_from_string(" ".join(pjargs))


auckland shape below
(50, 29)
2020-11-02 04:44:55 finshed names for auckland
start baltimore


  return _prepare_from_string(" ".join(pjargs))


baltimore shape below
(50, 29)
2020-11-02 04:45:19 finshed names for baltimore
start bangkok


  return _prepare_from_string(" ".join(pjargs))


bangkok shape below
(50, 29)
2020-11-02 04:47:13 finshed names for bangkok
start barcelona


  return _prepare_from_string(" ".join(pjargs))


barcelona shape below
(50, 29)
2020-11-02 04:48:14 finshed names for barcelona
start belfast


  return _prepare_from_string(" ".join(pjargs))


belfast shape below
(50, 29)
2020-11-02 04:48:27 finshed names for belfast
start bern


  return _prepare_from_string(" ".join(pjargs))


bern shape below
(50, 29)
2020-11-02 04:48:30 finshed names for bern
start chennai


  return _prepare_from_string(" ".join(pjargs))


chennai shape below
(50, 29)
2020-11-02 04:48:48 finshed names for chennai
start mexico_city


  return _prepare_from_string(" ".join(pjargs))


mexico_city shape below
(50, 29)
2020-11-02 04:51:29 finshed names for mexico_city
start cologne


  return _prepare_from_string(" ".join(pjargs))


cologne shape below
(50, 29)
2020-11-02 04:51:59 finshed names for cologne
start ghent


  return _prepare_from_string(" ".join(pjargs))


ghent shape below
(50, 29)
2020-11-02 04:52:05 finshed names for ghent
start graz


  return _prepare_from_string(" ".join(pjargs))


graz shape below
(50, 28)
2020-11-02 04:52:09 finshed names for graz
start hanoi


  return _prepare_from_string(" ".join(pjargs))


hanoi shape below
(50, 29)
2020-11-02 04:53:50 finshed names for hanoi
start hong_kong


  return _prepare_from_string(" ".join(pjargs))


hong_kong shape below
(50, 29)
2020-11-02 05:03:21 finshed names for hong_kong
start lisbon


  return _prepare_from_string(" ".join(pjargs))


lisbon shape below
(50, 29)
2020-11-02 05:03:31 finshed names for lisbon
start melbourne


  return _prepare_from_string(" ".join(pjargs))


melbourne shape below
(50, 29)
2020-11-02 05:08:07 finshed names for melbourne
start odense


  return _prepare_from_string(" ".join(pjargs))


odense shape below
(50, 28)
2020-11-02 05:08:35 finshed names for odense
start olomouc


  return _prepare_from_string(" ".join(pjargs))


olomouc shape below
(50, 29)
2020-11-02 05:08:37 finshed names for olomouc
start sao_paulo


  return _prepare_from_string(" ".join(pjargs))


sao_paulo shape below
(50, 29)
2020-11-02 05:09:16 finshed names for sao_paulo
start phoenix


  return _prepare_from_string(" ".join(pjargs))


phoenix shape below
(50, 29)
2020-11-02 05:10:42 finshed names for phoenix
start seattle


  return _prepare_from_string(" ".join(pjargs))


seattle shape below
(50, 29)
2020-11-02 05:12:33 finshed names for seattle
start sydney


  return _prepare_from_string(" ".join(pjargs))


sydney shape below
(50, 29)
2020-11-02 06:06:58 finshed names for sydney
start valencia


  return _prepare_from_string(" ".join(pjargs))


valencia shape below
(50, 29)
2020-11-02 06:07:12 finshed names for valencia
start vic


  return _prepare_from_string(" ".join(pjargs))


vic shape below
(50, 28)
2020-11-02 06:07:15 finshed names for vic
2020-11-02 06:07:15 all done, saved filenames to disk at "./groundtruthing.csv"
