# Results combination and analysis

In [2]:
%%html
<style>
table {float:left}
</style>

| Document info | |
| --- | --- | 
| Area of interest: | Cape Town |
| Planning type: | All REL type producers |
| Prepared by: | Waste Labs (wastelabs.co) |
| Prepared for: | Johan W. Joubert |
| Contact: | elias@wastelabs.co |

In [3]:
import csv
import glob
import io
import logging
import os
import subprocess
from datetime import datetime

import plotly.express as px
import plotly.io as pio
import requests

pio.renderers.default = "iframe"

import re

import boto3
import geopandas as gpd
import numpy as np
import pandas as pd
import requests
import yaml
from GPSOdyssey import Kepler
from IPython.core.interactiveshell import InteractiveShell

# Ipython configs
from IPython.display import HTML, display
from shapely.geometry import LineString

display(HTML("<style>.container { width:100% !important; }</style>"))
InteractiveShell.ast_node_interactivity = "all"

pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

## Find partition keys

In [4]:
df_producer_geo_all = catalog.load("syn_pop_scenarios_local_20220601")
waste_all = catalog.load("waste_gen_scenarios_local_20220601")
results_all = catalog.load("resource_summary_table_20220601")
keys = list(df_producer_geo_all)
keys = [x for x in keys if x[0] != "."]

2022-06-03 00:21:08,515 - kedro.io.data_catalog - INFO - Loading data from `syn_pop_scenarios_local_20220601` (PartitionedDataSet)...
2022-06-03 00:21:08,529 - kedro.io.data_catalog - INFO - Loading data from `waste_gen_scenarios_local_20220601` (PartitionedDataSet)...
2022-06-03 00:21:08,543 - kedro.io.data_catalog - INFO - Loading data from `resource_summary_table_20220601` (PartitionedDataSet)...


## Extract and summaries

In [5]:
i = 0

In [6]:
all_results = []
for key in keys:
    results = results_all[key]()
    results = results.loc[results["Unnamed: 0"] == "Total"]
    results["scenario"] = key
    all_results.append(results)
all_results = pd.concat(all_results)
all_results = all_results.drop(columns=["Unnamed: 0", "Collection day"])
all_results[
    ["Tons disposed at @ Offload 3", "Tons disposed at @ Offload 2"]
] = all_results[
    ["Tons disposed at @ Offload 3", "Tons disposed at @ Offload 2"]
].fillna(
    0
)

## Store results

In [7]:
%reload_kedro
catalog.save("resource_scenario_results_20220601", all_results)

2022-06-03 00:21:13,203 - kedro.framework.session.store - INFO - `read()` not implemented for `BaseSessionStore`. Assuming empty store.
2022-06-03 00:21:13,276 - root - INFO - ** Kedro project Demand estimation and waste collection routing optimisation for the City of Cape Town
2022-06-03 00:21:13,277 - root - INFO - Defined global variable `context`, `session` and `catalog`
2022-06-03 00:21:13,286 - root - INFO - Registered line magic `run_viz`
2022-06-03 00:21:13,287 - kedro.io.data_catalog - INFO - Saving data to `resource_scenario_results_20220601` (CSVDataSet)...


In [8]:
all_results

Unnamed: 0,Vehicle,Route,Offloads,Bins collected,Units served,Demand collected (ton),Time collecting (h),Time travelling (h),Time at treatment facility (h),Route duration (h),Traveling distance (km),Collecting distance (km),Total route distance (km),Electrical consumption (kWh),Tons disposed at @ Offload 2,Tons disposed at @ Offload 3,scenario
10,2,10,47,38603,38603,432.33,78.31,11.71,11.75,101.72,524.76,419.25,944.02,2116.72,176.14,256.18,households_001
10,2,10,47,38655,38655,432.51,78.39,11.63,11.75,101.72,520.93,419.04,939.96,2111.95,146.97,285.54,households_002
10,2,10,46,38714,38714,433.33,78.5,11.71,11.5,101.66,524.68,419.27,943.95,2118.14,148.68,284.64,households_003
10,2,10,46,38693,38693,433.33,78.46,11.86,11.5,101.75,530.67,419.02,949.69,2124.84,162.01,271.33,households_004
10,2,10,47,38674,38674,435.89,78.43,12.07,11.75,102.2,540.78,419.21,959.99,2141.19,160.76,275.14,households_005
10,2,10,47,38666,38666,433.58,78.4,12.07,11.75,102.17,541.08,418.62,959.71,2136.91,157.55,276.03,households_006
10,2,10,48,38652,38652,435.64,78.35,11.94,12.0,102.24,535.02,417.87,952.89,2131.22,170.15,265.5,households_007
10,2,10,46,38677,38677,435.55,78.42,12.05,11.5,101.89,538.6,418.84,957.44,2137.33,149.83,285.74,households_008
10,2,10,47,38660,38660,434.58,78.41,12.1,11.75,102.2,541.87,419.33,961.2,2140.78,181.31,253.27,households_009
10,2,10,46,38685,38685,434.72,78.44,11.69,11.5,101.59,524.26,419.08,943.34,2119.35,149.05,285.65,households_010


## Early graphs

In [7]:
fig = px.histogram(all_results, x="Demand collected (ton)")
fig

fig = px.histogram(all_results, x="Units served")
fig

In [8]:
fig = px.histogram(all_results, x="Route duration (h)")
fig

In [9]:
fig = px.histogram(all_results, x="Total route distance (km)")
fig

In [10]:
fig = px.scatter(all_results, x="Units served", y="Route duration (h)")
fig
fig = px.scatter(all_results, x="Units served", y="Total route distance (km)")
fig

In [12]:
fig = px.scatter(all_results, x="Demand collected (ton)", y="Route duration (h)")
fig
fig = px.scatter(all_results, x="Demand collected (ton)", y="Total route distance (km)")
fig

In [13]:
fig = px.scatter(all_results, x="Units served", y="Demand collected (ton)")
fig

In [14]:
fig = px.scatter(all_results, x="Demand collected (ton)", y="Total route distance (km)", color="Units served")
fig

In [15]:
fig = px.scatter(all_results, x="Demand collected (ton)", y="Route duration (h)", color="Units served")
fig

In [16]:
from pandas_profiling import ProfileReport

2022-06-03 00:14:15,477 - visions.backends - INFO - Pandas backend loaded 1.4.2
2022-06-03 00:14:15,489 - visions.backends - INFO - Numpy backend loaded 1.22.3
2022-06-03 00:14:15,491 - visions.backends - INFO - Pyspark backend NOT loaded
2022-06-03 00:14:15,492 - visions.backends - INFO - Python backend loaded


In [17]:
profile = ProfileReport(all_results, title="Waste generation scenario analysis - 20220601")

In [18]:
profile.to_file("../data/08_reporting/Waste generation scenario analysis - 20220601.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


The 'b' parameter of grid() has been renamed 'visible' since Matplotlib 3.5; support for the old name will be dropped two minor releases later.



Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]