In [1]:
import json
import os
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import osmnx as ox
import random
import numpy as np

%matplotlib inline

## Load and Clean Data

In [2]:
process_folder = '../../process'
process_config_path = '../../process/configuration/vic.json'
pop_col = ["pop_ghs_2015"]
dest_col = ["destinations"]
filenames_filepath = "./filenames.csv"
np.random.seed(24)

In [3]:
with open(process_config_path) as json_file:
    config = json.load(json_file)

input_folder = os.path.join(process_folder, config['folder'])

gpkg_input = os.path.join(input_folder, config['geopackagePath'])

In [4]:
pop = gpd.read_file(gpkg_input, layer='pop_ghs_2015' )

In [5]:
dests = gpd.read_file(gpkg_input, layer='destinations' )

In [6]:
fresh_food = dests[dests['dest_name_full'].str.contains('Fresh Food / Market')]

In [7]:
gdf_study_area = gpd.read_file(gpkg_input, layer="urban_study_region")
study_area = gdf_study_area["geometry"].iloc[0]

In [8]:
crs = gdf_study_area.crs
if pop.crs != crs:
    pop = pop.to_crs(crs)
if fresh_food.crs != crs:
    fresh_food = fresh_food.to_crs(crs)

In [9]:
import warnings

warnings.filterwarnings("ignore", "GeoSeries.notna", UserWarning)  # temp warning suppression
pop_clipped = gpd.clip(pop, study_area)
fresh_food_clipped = gpd.clip(fresh_food, study_area)

## Assign Hexagon-Data to Destinations

In [10]:
joined_freshfood = gpd.sjoin(fresh_food_clipped, pop_clipped, how='left', op='within')

## Create Density-Based Quintiles

In [11]:
ordered_joined_freshfood = joined_freshfood.sort_values('pop_est')

In [12]:
split_joined_freshfood = np.array_split(ordered_joined_freshfood, 5)

In [13]:
q1_dests = (split_joined_freshfood[0])
q2_dests = (split_joined_freshfood[1])
q3_dests = (split_joined_freshfood[2])
q4_dests = (split_joined_freshfood[3])
q5_dests = (split_joined_freshfood[4])

In [14]:
q1_dests['quantile'] = 1
q2_dests['quantile'] = 2
q3_dests['quantile'] = 3
q4_dests['quantile'] = 4
q5_dests['quantile'] = 5

## Select Destinations

In [15]:
q1_sample_dests = q1_dests.sample(10)
q2_sample_dests = q2_dests.sample(10)
q3_sample_dests = q3_dests.sample(10)
q4_sample_dests = q4_dests.sample(10)
q5_sample_dests = q5_dests.sample(10)

In [16]:
sample_dests = [q1_sample_dests, q2_sample_dests, q3_sample_dests, q4_sample_dests, q5_sample_dests]

In [17]:
final_sample_dests = pd.concat(sample_dests)

In [19]:
final_sample_dests = final_sample_dests.to_crs({'init': 'epsg:4326'})

  return _prepare_from_string(" ".join(pjargs))


In [20]:
final_sample_dests['lat'] = final_sample_dests.geometry.y
final_sample_dests['lon'] = final_sample_dests.geometry.x

In [22]:
cities = ['vic']
filenames = {}

for city in cities: 
    for index, row in final_sample_dests.iterrows():
        city_name = city
        hexagon_pop_quantile = row['quantile']
        latitude = row['lat']
        longitude = row['lon']
        google_maps_screenshot = f"{latitude}_{longitude}_{city}_google_maps_image"
        google_satellite_screenshot = f"{latitude}_{longitude}_{city}_google_satellite_image"
        google_street_view_screenshot = f"{latitude}_{longitude}_{city}_google_street_view_image"

        # calculate total street length and edge count in each dataset, then add to indicators
        filenames[index] = {}
        filenames[index]["Hexagon_Pop_Quintile"] = hexagon_pop_quantile
        filenames[index]["City_Name"] = city_name
        filenames[index]["Latitude"] = latitude
        filenames[index]["Longitude"] = longitude
        filenames[index]["Google_Maps_Date"] = ""
        filenames[index]["Google_Maps_Screenshot"] = google_maps_screenshot
        filenames[index]["Google_Satellite_Date"] = ""
        filenames[index]["Google_Satellite_Screenshot"] = google_satellite_screenshot
        filenames[index]["Google_Street_View_Date"] = ""
        filenames[index]["Google_Street_View_Screenshot"] = google_street_view_screenshot
        filenames[index]["Assessment"] = ""
        filenames[index]["Comments"] = ""

print(ox.ts(), f"finshed names for {city}")

# turn indicators into a dataframe and save to disk
df_filenames = pd.DataFrame(filenames).T
df_filenames.to_csv(filenames_filepath, index=True, encoding="utf-8")
print(ox.ts(), f'all done, saved filenames to disk at "{filenames_filepath}"')

2020-10-26 22:20:54 finshed names for vic
2020-10-26 22:20:55 all done, saved filenames to disk at "./filenames.csv"
