In [None]:
%load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
from glob import glob
from io import StringIO
from IPython.display import display
from pathlib import Path

from azure.storage.blob import BlobServiceClient
from pandas import read_csv, set_option, to_datetime
from plotly.subplots import make_subplots

In [None]:
PROJECT_ROOT = Path.cwd()
sys.path.append(str(PROJECT_ROOT / "aci-dash" / "app" / "src"))
sys.path.append(str(PROJECT_ROOT / "pyviz_panel" / "app" / "src"))

In [None]:
%aimport dash_helpers
from dash_helpers import (
    plot_heatmap,
    load_add_district_or_side_to_geojson,
    filter_geodata_and_merge,
    plot_choro,
)

%aimport visualization_helpers_altair
from visualization_helpers_altair import load_prep_geodata

In [None]:
set_option("display.max_rows", 500)
set_option("display.max_columns", 500)
set_option("display.width", 1000)

<a id="toc"></a>

## [Table of Contents](#table-of-contents)
0. [About](#about)
1. [User Inputs](#user-inputs)
2. [Load data](#load-data)
3. [Generate choropleth maps](#generate-choropleth-maps)
4. [Generate heatmaps](#generate-heatmaps)

<a id="about"></a>

## 0. [About](#about)

In this notebook, we will use [Plotly](https://plot.ly/) to generate the plots to be shown on a dashboard using the date-time and geographical data exported to `data/processed/heat_mapping_inputs.csv` and `data/processed/choro_mapping_inputs.csv`

<a id="user-inputs"></a>

## 1. [User Inputs](#user-inputs)

We'll define below the variables that are to be used throughout the code.

First, we'll specify the variabes that will be picked up from dashboard inputs. Next, we'll specify inputs related to the location of input and output files relevant to the dashboard. We'll then specify plotting preferences.

In [None]:
PROJECT_DIR = str(Path().cwd())  # type: Path
data_dir = str(Path(PROJECT_DIR) / "data")
dash_data_dir = str(Path(PROJECT_DIR) / "aci-dash" / "app" / "data")
figs_dir = str(Path(PROJECT_DIR) / "reports" / "figures")

choro_data_dir = str(Path(dash_data_dir) / "processed" / "choro_mapping_inputs.csv")
heat_data_dir = str(Path(dash_data_dir) / "processed" / "heat_mapping_inputs.csv")

primary_types = ["CRIMINAL_DISTURBANCE", "VIOLENCE_TO_HUMAN", "PROPERTY_DAMAGE"]
da_choices = ["district"]  # "beat" (or "district"), "community_area"

# Plotting preferences
pf = "Police*"
ca = "Community*"
nb = "Neighborhoods"
agg_dict = {"arrest": ["sum"], "datetime": ["count"]}
da = {
    "neighbourhood": {
        # "file": glob(str(Path(data_dir) / "raw" / nb / "*.shp"))[0],
        # "geojson": str(Path(data_dir) / "raw" / f"Boundaries - {nb}.geojson"),
        "basic_view_cols": "pri|sec|geometry",
        "pre-post-explosition-compare": "pri_neigh",
        "left_join_col": "pri_neigh_x",
    },
    "district": {
        # "file": glob(str(Path(data_dir) / "raw" / pf / "*.shp"))[0],
        # "geojson": str(Path(data_dir) / "raw" / "CPD districts.geojson"),
        "basic_view_cols": "district|sect|geometry",
        "pre-post-explosition-compare": "district",
        "left_join_col": "district",
    },
    "community_area": {
        # "file": glob(str(Path(data_dir) / "raw" / ca / "*.shp"))[0],
        # "geojson": str(Path(data_dir) / "raw" / f"Boundaries - {ca}.geojson"),
        "basic_view_cols": "area_num_1|community|geometry",
        "pre-post-explosition-compare": "comarea",
        "left_join_col": "area_num_1_x",
    },
}

general_plot_specs = {
    "choromap_projectiontype": "mercator",
    "color_by_column": "datetime|count",
    "colorscheme": "YlOrRd",
    "choro_map_figsize": {"width": 800, "height": 600},
    "legend_title": ["Occurrences"],
    "heatmap_xy": {"x": "month:O", "y": "day:O", "yscale": "linear"},
    "heat_map_figsize": {"width": 300, "height": 535},
}

dt_hmap = {
    "x": {"value": "month", "title": "Month", "type": "int", "format": 0},
    "y": {"value": "day", "title": "Day", "type": "int", "format": 0},
    "z": {
        "value": "datetime|count",
        "title": "Occurrences",
        "type": "int",
        "format": 0,
    },
    "e1": {"value": "arrest|sum", "title": "Arrests", "type": "int", "format": 0},
    "e2": {
        "value": "probability_of_max_class|mean",
        "title": "Probability (Avg.)",
        "type": "float",
        "format": 2,
    },
}
dt_choro = {
    "district": {"title": "District",},
    "area": {"title": "Area (sq. km)",},
    "side": {"title": "Side",},
    "datetime|count": {"title": "Ocurrences",},
    "arrest|sum": {"title": "Arrests",},
    "probability_of_max_class|mean": {"title": "Probability (Avg.)",},
}

district_to_side = {
    s: k
    for k, v in {
        "North": [11, 14, 15, 16, 17, 19, 20, 24, 25],
        "Central": [1, 2, 3, 8, 9, 10, 12, 13, 18],
        "South": [4, 5, 6, 7, 22],
    }.items()
    for s in v
}

In [None]:
if choro_data_dir == "cloud":
    az_storage_container_name = "myconedesx7"
    conn_str = (
        "DefaultEndpointsProtocol=https;"
        f"AccountName={os.getenv('AZURE_STORAGE_ACCOUNT')};"
        f"AccountKey={os.getenv('AZURE_STORAGE_KEY')};"
        f"EndpointSuffix={os.getenv('ENDPOINT_SUFFIX')}"
    )
    blob_service_client = BlobServiceClient.from_connection_string(conn_str=conn_str)
    choro_blob_client = blob_service_client.get_blob_client(
        container=az_storage_container_name, blob="blobedesz4"
    )
    choro_data_dir = StringIO(choro_blob_client.download_blob().content_as_text())
    heat_blob_client = blob_service_client.get_blob_client(
        container=az_storage_container_name, blob="blobedesz5"
    )
    heat_data_dir = StringIO(heat_blob_client.download_blob().content_as_text())
else:
    da["neighbourhood"]["file"] = glob(str(Path(data_dir) / "raw" / nb / "*.shp"))[0]
    da["district"]["file"] = glob(str(Path(data_dir) / "raw" / pf / "*.shp"))[0]
    da["community_area"]["file"] = glob(str(Path(data_dir) / "raw" / ca / "*.shp"))[0]
    da["neighbourhood"]["geojson"] = glob(
        str(Path(data_dir) / "raw" / f"Boundaries - {nb}*.geojson")
    )[0]
    da["district"]["geojson"] = glob(str(Path(data_dir) / "raw" / "*istricts.geojson"))[
        0
    ]
    da["community_area"]["geojson"] = glob(
        str(Path(data_dir) / "raw" / f"Boundaries - {ca}.geojson")
    )[0]
district_to_side = {int(k): v for k, v in district_to_side.items()}
da_choice = da_choices[0]
heatmap_dir_path = figs_dir
choromap_dir_path = figs_dir

<a id="load-data"></a>

## 2. [Load data](#load-data)

We'll start by loading the two mapping files into separate `DataFrame`s

In [None]:
df_ch = read_csv(choro_data_dir, dtype={da_choice: int})
df_h = read_csv(heat_data_dir)
df_h["month"] = to_datetime(df_h["month"], format="%m").dt.month_name()
df_h["probability_of_max_class|mean"] *= 100
data = load_add_district_or_side_to_geojson(
    district_geojson_file_path=da[da_choice]["geojson"],
    key="dist_num",
    division_type=da_choice,
    district_to_side=district_to_side,
)
gdf_out = load_prep_geodata(gpd_path=da[da_choice]["file"], da_choice=da_choice)
df_choro_data = filter_geodata_and_merge(
    gdf_out=gdf_out,
    df_ch=df_ch,
    da_choice=da_choice,
    district_to_side=district_to_side,
)

<a id="generate-choropleth-maps"></a>

## 3. [Generate choropleth maps](#generate-choropleth-maps)

Next, we'll generate each version of the choropleth map
- one version per user specification for `primary_type`

In [None]:
d = {}
for primary_type in primary_types:
    df_mapping_choro = df_choro_data.loc[df_choro_data["primary_type"] == primary_type]
    d[primary_type] = plot_choro(
        df=df_mapping_choro,
        geodata=data,
        color_by_col=general_plot_specs["color_by_column"],
        colorscheme=general_plot_specs["colorscheme"],
        da_choice=da_choice,
        choro_tooltip_dict=dt_choro,
        projection_type=general_plot_specs["choromap_projectiontype"],
        figsize=(
            general_plot_specs["choro_map_figsize"]["width"],
            general_plot_specs["choro_map_figsize"]["height"],
        ),
        file_path=Path(figs_dir) / f"choromap_{primary_type}_dash.html",
        save_to_html=True,
    )

Display each version of the choropleth map

In [None]:
for k, v in d.items():
    v.show()

<a id="generate-heatmaps"></a>

## 4. [Generate heatmaps](#generate-heatmaps)

Next, we'll generate each version of the datetime heatmap
- one version per user specification for `primary_type`

In [None]:
dh = {}
for primary_type in primary_types:
    df_mapping_choro = df_h.loc[df_h["primary_type"] == primary_type]
    dh[primary_type] = fig = plot_heatmap(
        df=df_mapping_choro,
        x="month",
        y="day",
        z="datetime|count",
        xtitle="month",
        ytitle="day",
        xautorange=True,
        yautorange="reversed",
        c="YlOrRd",
        hover_data=dt_hmap,
        viz=True,
        margins={"r": 50, "t": 0, "l": 75, "b": 0, "pad": 0},
        fig_size=(400, 535),
        file_path=Path(figs_dir) / f"heatmap_{primary_type}_dash.html",
        save_to_html=True,
    )

Display each version of the heatmap

In [None]:
for k, v in dh.items():
    v.show()