# Generate test data for city of Cape Town demand estimation

 * Author: ejwillemse
 * Date: 2022-03-014

In [2]:
%reload_kedro
import pandas as pd
import geopandas as gpd
import numpy as np
from GPSOdyssey import Kepler

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

2022-05-17 10:44:39,377 - kedro.framework.session.store - INFO - `read()` not implemented for `BaseSessionStore`. Assuming empty store.
2022-05-17 10:44:39,443 - root - INFO - ** Kedro project Demand estimation and waste collection routing optimisation for the City of Cape Town
2022-05-17 10:44:39,444 - root - INFO - Defined global variable `context`, `session` and `catalog`
2022-05-17 10:44:39,453 - root - INFO - Registered line magic `run_viz`


# Load data

In [3]:
catalog = context.catalog
cape_town_boundary = catalog.load("cape_town_geojson")

2022-05-17 10:44:41,944 - kedro.io.data_catalog - INFO - Loading data from `cape_town_geojson` (GeoJSONDataSet)...


In [75]:
households = catalog.load("households")

2022-04-04 22:57:23,859 - kedro.io.data_catalog - INFO - Loading data from `households` (CSVDataSet)...


In [None]:
population = catalog.load("population")

## Generate random demand

In [42]:
waste_random = households[["id"]].copy()
waste_random["wasteInKgPerWeek"] = np.random.normal(7, 2, waste_random.shape[0])
waste_random.loc[waste_random["wasteInKgPerWeek"] < 0.01] = 0.01

In [43]:
catalog.save("waste_generation", waste_random)

2022-04-04 22:49:43,261 - kedro.io.data_catalog - INFO - Saving data to `waste_generation` (CSVDataSet)...


## Load and assign parcels

In [26]:
gap_zones = catalog.load("gap_zones")

2022-04-04 22:47:23,471 - kedro.io.data_catalog - INFO - Loading data from `gap_zones` (GeoJSONDataSet)...


## Assign gap-zones to parcels

In [27]:
%%time
households_geojson = gpd.GeoDataFrame(
    households,
    geometry=gpd.points_from_xy(households["parcelLon"], households["parcelLat"]),
    crs="EPSG:4326",
)

KeyError: 'parcelLon'

In [73]:
catalog.save("households_geojson", households_geojson)

2022-03-04 12:30:30,071 - kedro.io.data_catalog - INFO - Saving data to `households_geojson` (GeoJSONDataSet)...


KeyboardInterrupt: 

In [88]:
gap_zone_sample = gap_zones.loc[gap_zones["OBJECTID"].isin([24645])]

map_zone = Kepler(data={"zone": gap_zones})
map_zone.get_render()

2022-03-04 12:36:59,330 - root - INFO - Start preparation of render parameters...
2022-03-04 12:36:59,332 - root - INFO - No map configs specified!
2022-03-04 12:36:59,341 - root - INFO - Next columns are converted to string: ['IsMetroLM', 'NSDF_URB_N', 'SA_FTT_Cat', 'MunCode16', 'FuncTownAr', 'ProvCode16', 'MESO_ID', 'DistCode16', 'DistName16', 'MunName16', 'SA_FTT_Typ']
2022-03-04 12:36:59,343 - root - INFO - Start renderding KeplerGL map...
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'zone':     OBJECTID     MESO_ID ProvCode16 MunCode16          MunName16 DistCode16  \
0      2…

In [90]:
%%time
households_gap_join = gpd.tools.sjoin(gap_zone_sample, households_geojson, how="left")

CPU times: user 33.3 s, sys: 1.51 s, total: 34.8 s
Wall time: 36.6 s


## Save sub-sample

In [95]:
households_geojson_sample = households_geojson.loc[households_geojson["id"].isin(households_gap_join["id"])]
households_geojson_sample.shape

(13024, 15)

In [101]:
catalog.save("population_sample", households_geojson_sample)

2022-03-04 12:43:11,419 - kedro.io.data_catalog - INFO - Saving data to `population_sample` (GeoJSONDataSet)...
