In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random
import numpy as np

%matplotlib inline

## Load and Clean Data

In [2]:
process_folder = '../../process'
process_config_path = '../../process/configuration/adelaide.json'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]
filenames_filepath = "./filenames.csv"
np.random.seed(24)

In [3]:
with open(process_config_path) as json_file:
    config = json.load(json_file)

input_folder = os.path.join(process_folder, config['folder'])

gpkg_input = os.path.join(input_folder, config['geopackagePath'])

In [4]:
pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )

In [5]:
dests = gpd.read_file(gpkg_input, layer='destinations' )

In [6]:
fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]

In [7]:
gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
study_area = gdf_study_area["geometry"].iloc[0]

In [8]:
crs = gdf_study_area.crs
if pop.crs != crs:
    pop = pop.to_crs(crs)
if fresh_food.crs != crs:
    fresh_food = fresh_food.to_crs(crs)

In [9]:
import warnings

warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
pop_clipped = gpd.clip(pop, study_area)
fresh_food_clipped = gpd.clip(fresh_food, study_area)

## Assign Hexagon-Data to Destinations

In [10]:
joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')

## Create Density-Based Quintiles

In [11]:
ordered_joined_freshfood = joined_freshfood.sort_values('pop_est')

In [12]:
split_joined_freshfood = np.array_split(ordered_joined_freshfood, 5)

In [13]:
q1_dests = (split_joined_freshfood[0])
q2_dests = (split_joined_freshfood[1])
q3_dests = (split_joined_freshfood[2])
q4_dests = (split_joined_freshfood[3])
q5_dests = (split_joined_freshfood[4])

In [14]:
q1_dests['quantile'] = 1
q2_dests['quantile'] = 2
q3_dests['quantile'] = 3
q4_dests['quantile'] = 4
q5_dests['quantile'] = 5

## Select Destinations

In [15]:
q1_sample_dests = q1_dests.sample(10)
q2_sample_dests = q2_dests.sample(10)
q3_sample_dests = q3_dests.sample(10)
q4_sample_dests = q4_dests.sample(10)
q5_sample_dests = q5_dests.sample(10)

In [16]:
sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

In [17]:
final_sample_dests = pd.concat(sample_dests)

In [18]:
final_sample_dests = final_sample_dests.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [19]:
final_sample_dests['lat'] = final_sample_dests.geometry.y
final_sample_dests['lon'] = final_sample_dests.geometry.x

In [24]:
final_sample_dests = final_sample_dests.set_index('osm_id')

In [25]:
final_sample_dests

Unnamed: 0_level_0,dest_oid,dest_name,dest_name_full,edge_ogc_fid,n1,n2,n1_distance,n2_distance,match_point_distance,geometry,...,count_restaurant,count_convenience,count_bar,count_fast_food,count_food_court,count_fresh_food_market,count_pt_any,quantile,lat,lon
osm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
554005639,387,fresh_food_market,Fresh Food / Market,117664,5394607770,5394607769,44,43,15,POINT (138.51536 -35.14344),...,,,,,,1.0,2.0,1,-35.143438,138.51536
4836645274,305,fresh_food_market,Fresh Food / Market,230128,3272076757,3272063320,281,94,58,POINT (138.58036 -34.86515),...,,,,,,1.0,,1,-34.865148,138.580356
2424960625,350,fresh_food_market,Fresh Food / Market,116821,1194009719,1194009719,44,44,7,POINT (138.63968 -34.76358),...,,1.0,,1.0,,1.0,,1,-34.763579,138.63968
5330994352,66,fresh_food_market,Fresh Food / Market,133298,893101717,893101711,10,60,27,POINT (138.52096 -35.10699),...,,,,,,1.0,,1,-35.106989,138.52096
2029087875,71,fresh_food_market,Fresh Food / Market,124730,2919983042,2919983007,35,92,39,POINT (138.51633 -35.07838),...,,,,,,1.0,1.0,1,-35.078378,138.516327
5344476192,18,fresh_food_market,Fresh Food / Market,18952,5414495749,892366909,72,269,13,POINT (138.49892 -35.14358),...,,,,,,2.0,,1,-35.143583,138.498924
4835897220,34,fresh_food_market,Fresh Food / Market,191114,632219155,903731072,80,35,8,POINT (138.49080 -34.88076),...,,,,,,2.0,,1,-34.880764,138.490802
4504321790,332,fresh_food_market,Fresh Food / Market,144749,6983846555,2632190448,36,900,25,POINT (138.69151 -34.83023),...,,,,,,1.0,,1,-34.830226,138.691505
653722296,424,fresh_food_market,Fresh Food / Market,23076,6126342899,6126342897,32,34,24,POINT (138.52331 -34.93503),...,,,,,,1.0,,1,-34.935031,138.523309
5411249887,73,fresh_food_market,Fresh Food / Market,224636,2643422143,2643422139,43,26,25,POINT (138.54054 -35.08980),...,,,,,,1.0,,1,-35.089796,138.540536


In [20]:
cities = ['adelaide']
filenames = {}

for city in cities: 
    for index, row in final_sample_dests.iterrows():
        city_name = city
        hexagon_pop_quantile = row['quantile']
        latitude = row['lat']
        longitude = row['lon']
        google_maps_screenshot = f"{latitude}_{longitude}_{city}_google_maps_image"
        google_satellite_screenshot = f"{latitude}_{longitude}_{city}_google_satellite_image"
        google_street_view_screenshot = f"{latitude}_{longitude}_{city}_google_street_view_image"

        # calculate total street length and edge count in each dataset, then add to indicators
        filenames[index] = {}
        filenames[index]["Hexagon_Pop_Quintile"] = hexagon_pop_quantile
        filenames[index]["City_Name"] = city_name
        filenames[index]["Latitude"] = latitude
        filenames[index]["Longitude"] = longitude
        filenames[index]["Google_Maps_Date"] = ""
        filenames[index]["Google_Maps_Screenshot"] = google_maps_screenshot
        filenames[index]["Google_Satellite_Date"] = ""
        filenames[index]["Google_Satellite_Screenshot"] = google_satellite_screenshot
        filenames[index]["Google_Street_View_Date"] = ""
        filenames[index]["Google_Street_View_Screenshot"] = google_street_view_screenshot
        filenames[index]["Assessment"] = ""
        filenames[index]["Comments"] = ""

print(ox.ts(), f"finshed names for {city}")

# turn indicators into a dataframe and save to disk
df_filenames = pd.DataFrame(filenames).T
df_filenames.to_csv(filenames_filepath, index=True, encoding="utf-8")
print(ox.ts(), f'all done, saved filenames to disk at "{filenames_filepath}"')

2020-11-01 18:57:03 finshed names for adelaide
2020-11-01 18:57:03 all done, saved filenames to disk at "./filenames.csv"
