In [2]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random
import numpy as np

%matplotlib inline

## Load and Clean Data

In [3]:
process_folder = '../../process'
process_config_path = '../../process/configuration/vic.json'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]
filenames_filepath = "./filenames.csv"
np.random.seed(24)

In [4]:
with open(process_config_path) as json_file:
    config = json.load(json_file)

input_folder = os.path.join(process_folder, config['folder'])

gpkg_input = os.path.join(input_folder, config['geopackagePath'])

In [5]:
pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )

In [6]:
pop.columns

Index(['index', 'pop_est', 'area_sqkm', 'pop_per_sqkm', 'intersection_count',
       'intersections_per_sqkm', 'count_fast_food', 'count_fresh_food_market',
       'count_restaurant', 'count_bar', 'count_pt_any', 'count_pub',
       'count_cafe', 'count_convenience', 'geometry'],
      dtype='object')

In [7]:
dests = gpd.read_file(gpkg_input, layer='destinations' )

In [8]:
dests.columns

Index(['dest_oid', 'osm_id', 'dest_name', 'dest_name_full', 'edge_ogc_fid',
       'n1', 'n2', 'n1_distance', 'n2_distance', 'match_point_distance',
       'geometry'],
      dtype='object')

In [9]:
fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]

In [10]:
gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
study_area = gdf_study_area["geometry"].iloc[0]

In [11]:
crs = gdf_study_area.crs
if pop.crs != crs:
    pop = pop.to_crs(crs)
if fresh_food.crs != crs:
    fresh_food = fresh_food.to_crs(crs)

In [12]:
import warnings

warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
pop_clipped = gpd.clip(pop, study_area)
fresh_food_clipped = gpd.clip(fresh_food, study_area)

## Create Density-Based Quintiles

In [13]:
pop_clipped['pop_quintile'] = pd.qcut(pop_clipped['pop_est'], 5, labels=False)

## Assign Hexagon-Data to Destinations

In [14]:
joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')

In [15]:
joined_freshfood.shape

(58, 27)

In [16]:
cleaned_joined_freshfood = joined_freshfood[~joined_freshfood['dest_name_full'].isnull()]

In [17]:
cleaned_joined_freshfood.columns

Index(['dest_oid', 'osm_id', 'dest_name', 'dest_name_full', 'edge_ogc_fid',
       'n1', 'n2', 'n1_distance', 'n2_distance', 'match_point_distance',
       'geometry', 'index_right', 'index', 'pop_est', 'area_sqkm',
       'pop_per_sqkm', 'intersection_count', 'intersections_per_sqkm',
       'count_fast_food', 'count_fresh_food_market', 'count_restaurant',
       'count_bar', 'count_pt_any', 'count_pub', 'count_cafe',
       'count_convenience', 'pop_quintile'],
      dtype='object')

## Select Destinations

In [18]:
q1_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0].values.tolist()
q2_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1].values.tolist()
q3_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2].values.tolist()
q4_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3].values.tolist()
q5_count_list = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4].values.tolist()

In [19]:
count = 0
good_quant = 0 
if len(q1_count_list) < 10:
    q1_dests = len(q1_count_list)
    count = count + len(q1_count_list)
else:
    q1_dests = 10
    count = count + 10
    good_quant = good_quant + 1

if len(q2_count_list) < 10:
    q2_dests = len(q2_count_list)
    count = count + len(q2_count_list)
else:
    q2_dests = 10
    count = count + 10
    good_quant = good_quant + 1

if len(q3_count_list) < 10:
    q3_dests = len(q3_count_list)
    count = count + len(q3_count_list)
else:
    q3_dests = 10
    count = count + 10
    good_quant = good_quant + 1

if len(q4_count_list) < 10:
    q4_dests = len(q4_count_list)
    count = count + len(q4_count_list)
else:
    q4_dests = 10
    count = count + 10
    good_quant = good_quant + 1

if len(q5_count_list) < 10:
    q5_dests = len(q5_count_list)
    count = count + len(q5_count_list)
else:
    q5_dests = 10
    count = count + 10
    good_quant = good_quant + 1

In [20]:
extra_dests = 50 - count
extra_dests_per_quant = int(extra_dests / good_quant)

if extra_dests > 0:

    count_2 = 0
    good_quant_2 = 0 

    if len(q1_count_list) < (10 + extra_dests_per_quant):
        q1_dests = len(q1_count_list)
        count_2 = count_2 + len(q1_count_list)
    else:
        q1_dests = 10 + extra_dests_per_quant
        count_2 = count_2 + q1_dests
        good_quant_2 = good_quant_2 + 1

    if len(q2_count_list) < (10 + extra_dests_per_quant):
        q2_dests = len(q2_count_list)
        count_2 = count_2 + len(q2_count_list)
    else:
        q2_dests = 10 + extra_dests_per_quant
        count_2 = count_2 + q2_dests
        good_quant_2 = good_quant_2 + 1

    if len(q3_count_list) < (10 + extra_dests_per_quant):
        q3_dests = len(q3_count_list)
        count_2 = count_2 + len(q3_count_list)
    else:
        q3_dests = 10 + extra_dests_per_quant
        count_2 = count_2 + q3_dests
        good_quant_2 = good_quant_2 + 1

    if len(q4_count_list) < (10 + extra_dests_per_quant):
        q4_dests = len(q4_count_list)
        count_2 = count_2 + len(q4_count_list)
    else:
        q4_dests = 10 + extra_dests_per_quant
        count_2 = count_2 + q4_dests
        good_quant_2 = good_quant_2 + 1

    if len(q5_count_list) < (10 + extra_dests_per_quant):
        q5_dests = len(q5_count_list)
        count_2 = count_2 + len(q5_count_list)
    else:
        q5_dests = 10 + extra_dests_per_quant + (extra_dests%good_quant)
        count_2 = count_2 + q5_dests
        good_quant_2 = good_quant_2 + 1

In [21]:
extra_dests_2 = 50 - count_2
extra_dests_per_quant_2 = int(extra_dests_2 / good_quant_2)

if extra_dests_2 == 0:
    count_3 = 50

if extra_dests_2 > 0:

    count_3 = 0
    good_quant_3 = 0 

    if len(q1_count_list) < (10 + extra_dests_per_quant + extra_dests_per_quant_2):
        q1_dests = len(q1_count_list)
        count_3 = count_3 + len(q1_count_list)
    else:
        q1_dests = 10 + extra_dests_per_quant + extra_dests_per_quant_2
        count_3 = count_3 + q1_dests
        good_quant_3 = good_quant_3 + 1

    if len(q2_count_list) < (10 + extra_dests_per_quant + extra_dests_per_quant_2):
        q2_dests = len(q2_count_list)
        count_3 = count_3 + len(q2_count_list)
    else:
        q2_dests = 10 + extra_dests_per_quant + extra_dests_per_quant_2
        count_3 = count_3 + q2_dests
        good_quant_3 = good_quant_3 + 1

    if len(q3_count_list) < (10 + extra_dests_per_quant + extra_dests_per_quant_2):
        q3_dests = len(q3_count_list)
        count_3 = count_3 + len(q3_count_list)
    else:
        q3_dests = 10 + extra_dests_per_quant + extra_dests_per_quant_2
        count_3 = count_3 + q3_dests
        good_quant_3 = good_quant_3 + 1

    if len(q4_count_list) < (10 + extra_dests_per_quant + extra_dests_per_quant_2):
        q4_dests = len(q4_count_list)
        count_3 = count_3 + len(q4_count_list)
    else:
        q4_dests = 10 + extra_dests_per_quant + extra_dests_per_quant_2
        count_3 = count_3 + q4_dests
        good_quant_3 = good_quant_3 + 1

    if len(q5_count_list) < (10 + extra_dests_per_quant + extra_dests_per_quant_2):
        q5_dests = len(q5_count_list)
        count_3 = count_3 + len(q5_count_list)
    else:
        q5_dests = 10 + extra_dests_per_quant + extra_dests_per_quant_2 + (extra_dests%good_quant) + (extra_dests_2%good_quant_2)
        count_3 = count_3 + q5_dests
        good_quant_3 = good_quant_3 + 1

In [22]:
extra_dests_3 = 50 - count_3

if extra_dests_3 == 1:
    q5_dests = q5_dests + 1
elif extra_dests_3 == 0:
    q5_dests = q5_dests
else:
    print('did not distribute 50 destinations to quitiles')

In [23]:
q1_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 0]
q2_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 1]
q3_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 2]
q4_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 3]
q5_count = cleaned_joined_freshfood.loc[cleaned_joined_freshfood['pop_quintile'] == 4]

In [24]:
q1_sample_dests = q1_count.sample(q1_dests)
q2_sample_dests = q2_count.sample(q2_dests)
q3_sample_dests = q3_count.sample(q3_dests)
q4_sample_dests = q4_count.sample(q4_dests)
q5_sample_dests = q5_count.sample(q5_dests)

In [25]:
sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

In [26]:
final_sample_dests = pd.concat(sample_dests)

In [27]:
final_sample_dests = final_sample_dests.reset_index(drop=True)

In [28]:
final_sample_dests.head(1)

Unnamed: 0,dest_oid,osm_id,dest_name,dest_name_full,edge_ogc_fid,n1,n2,n1_distance,n2_distance,match_point_distance,...,intersections_per_sqkm,count_fast_food,count_fresh_food_market,count_restaurant,count_bar,count_pt_any,count_pub,count_cafe,count_convenience,pop_quintile
0,66,315822627,fresh_food_market,Fresh Food / Market,5133,3270502273,3270502271,2,8,18,...,98.648515,,2.0,,,,,,,3


In [29]:
final_sample_dests = final_sample_dests.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [30]:
final_sample_dests['lat'] = final_sample_dests.geometry.y
final_sample_dests['lon'] = final_sample_dests.geometry.x

In [31]:
from collections import defaultdict

In [32]:
cities = ['vic']
filenames = {}

for city in cities: 
    for index, row in final_sample_dests.iterrows():
        city_name = city
        hexagon_pop_quantile = row['pop_quintile']
        latitude = row['lat']
        longitude = row['lon']
        google_maps_screenshot = f"{latitude}_{longitude}_{city}_google_maps_image"
        google_satellite_screenshot = f"{latitude}_{longitude}_{city}_google_satellite_image"
        google_street_view_screenshot = f"{latitude}_{longitude}_{city}_google_street_view_image"

        # calculate total street length and edge count in each dataset, then add to indicators
        filenames[index] = {}
        filenames[index]["City_Name"] = city_name
        filenames[index]["Hexagon_Pop_Quintile"] = hexagon_pop_quantile
        filenames[index]["Latitude"] = latitude
        filenames[index]["Longitude"] = longitude
        filenames[index]["Google_Maps_Date"] = ""
        filenames[index]["Google_Maps_Screenshot"] = google_maps_screenshot
        filenames[index]["Google_Satellite_Date"] = ""
        filenames[index]["Google_Satellite_Screenshot"] = google_satellite_screenshot
        filenames[index]["Google_Street_View_Date"] = ""
        filenames[index]["Google_Street_View_Screenshot"] = google_street_view_screenshot
        filenames[index]["Assessment"] = ""
        fil

print(ox.ts(), f"finshed names for {city}")

# turn indicators into a dataframe and save to disk
df_filenames = pd.DataFrame(filenames).T
df_filenames.to_csv(filenames_filepath, index=True, encoding="utf-8")
print(ox.ts(), f'all done, saved filenames to disk at "{filenames_filepath}"')

2020-10-26 16:29:23 finshed names for vic
2020-10-26 16:29:23 all done, saved filenames to disk at "./filenames.csv"


# OLD

In [None]:
count = 0
if len(q1_count_list) < 10:
    q1_sampledeficit = 10 - len(q1_count_list)
    if q1_sampledeficit % 4 == 0:
        count = count
    else:
        count = count + (q1_sampledeficit % 4)
    extra_q1 = (int(q1_sampledeficit/4))
    q1_dests = len(q1_count_list) 
else:
    extra_q1 = 0
    q1_dests = 10
if len(q2_count_list) < (extra_q1 + 10):
    q2_sampledeficit = (extra_q1 + 10) - len(q2_count_list)
    if q2_sampledeficit % 3 == 0:
        count = count
    else:
        count = count +(q2_sampledeficit % 3)
    extra_q2 = (int(q2_sampledeficit/3))
    q2_dests = len(q2_count_list)
else:
    extra_q2 = 0
    q2_dests = 10 + extra_q1
if len(q3_count_list) < (extra_q1 + extra_q2 + 10):
    q3_sampledeficit = (extra_q1 + extra_q2 + 10) - len(q3_count_list)
    if q3_sampledeficit % 2 == 0:
        count = count
    else:
        count = count + (q3_sampledeficit % 2)
    extra_q3 = (int(q3_sampledeficit/2))
    q3_dests = len(q3_count_list)
else:
    extra_q3 = 0
    q3_dests = 10 + extra_q1 + extra_q2
if len(q4_count_list) < (extra_q1 + extra_q2 + extra_q3 + 10):
    q4_sampledeficit = (extra_q1 + extra_q2 + extra_q3 + 10) - len(q4_count_list)
    extra_q4 = (int(q4_sampledeficit))
    q4_dests = len(q4_count_list)
else:
    extra_q4 = 0
    q4_dests = 10 + extra_q1 + extra_q2 + extra_q3
if len(q5_count_list) < (extra_q1 + extra_q2 + extra_q3 + extra_q4 + 10):
    print("fewer than 50 destinations exist")
else:
    q5_dests = 10 + extra_q1 + extra_q2 + extra_q3 + extra_q4 + count