In [None]:
%load_ext nb_black
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
from IPython.display import display
from glob import glob
from io import StringIO
from pathlib import Path

import altair as alt
from azure.storage.blob import BlobServiceClient
from pandas import read_csv, set_option, to_datetime

In [None]:
PROJECT_ROOT = Path.cwd()
sys.path.append(str(PROJECT_ROOT / "pyviz_panel" / "app" / "src"))

In [None]:
%aimport visualization_helpers_altair
from visualization_helpers_altair import gen_choro_map, gen_heat_map

In [None]:
set_option("display.max_rows", 500)
set_option("display.max_columns", 500)
set_option("display.width", 1000)
# alt.renderers.enable("default")
alt.data_transformers.disable_max_rows()

<a id="toc"></a>

## [Table of Contents](#table-of-contents)
0. [About](#about)
1. [User Inputs](#user-inputs)
2. [Load data](#load-data)
3. [Generate choropleth maps](#generate-choropleth-maps)
4. [Generate heatmaps](#generate-heatmaps)
5. [Generate compound plots](#generate-compound-plots)

<a id="about"></a>

## 0. [About](#about)

In this notebook, we will use [Altair](https://altair-viz.github.io/) to generate the plots to be shown on a dashboard using the date-time and geographical data exported to `data/processed/heat_mapping_inputs.csv` and `data/processed/choro_mapping_inputs.csv`

<a id="user-inputs"></a>

## 1. [User Inputs](#user-inputs)

We'll define below the variables that are to be used throughout the code.

First, we'll specify the variabes that will be picked up from dashboard inputs. Next, we'll specify inputs related to the location of input and output files relevant to the dashboard. We'll then specify plotting preferences.

In [None]:
PROJECT_DIR = str(Path().cwd())  # type: Path
data_dir = str(Path(PROJECT_DIR) / "data")
dash_data_dir = str(Path(PROJECT_DIR) / "aci-dash" / "app" / "data")
figs_dir = str(Path(PROJECT_DIR) / "reports" / "figures")

choro_data_dir = str(Path(dash_data_dir) / "processed" / "choro_mapping_inputs.csv")
heat_data_dir = str(Path(dash_data_dir) / "processed" / "heat_mapping_inputs.csv")

primary_types = ["CRIMINAL_DISTURBANCE", "VIOLENCE_TO_HUMAN", "PROPERTY_DAMAGE"]
da_choices = ["district"]  # "beat" (or "district"), "community_area"
heatmap_dir_path = figs_dir
choromap_dir_path = figs_dir

pf = "Police*"
ca = "Community*"
nb = "Neighborhoods"
da = {
    "neighbourhood": {
        # "file": glob(str(Path(data_dir) / "raw" / nb / "*.shp"))[0],
        "basic_view_cols": "pri|sec|geometry",
        "pre-post-explosition-compare": "pri_neigh",
        "left_join_col": "pri_neigh_x",
    },
    "district": {  # "beat" or "district"
        # "file": glob(str(Path(data_dir) / "raw" / pf / "*.shp"))[0],
        "basic_view_cols": "district|sect|geometry",  # "beat|sect|geometry" or "district|sect|geometry"
        "pre-post-explosition-compare": "district",  # "beat" or "district"
        "left_join_col": "district",  # "beat_num_x" or "district_x"
    },
    "community_area": {
        # "file": glob(str(Path(data_dir) / "raw" / ca / "*.shp"))[0],
        "basic_view_cols": "area_num_1|community|geometry",
        "pre-post-explosition-compare": "comarea",
        "left_join_col": "area_num_1_x",
    },
}

# Plotting preferences
agg_dict = {"arrest": ["sum"], "datetime": ["count"]}

general_plot_specs = {
    "choromap_projectiontype": "mercator",
    "color_by_column": ["datetime|count"],
    "colorscheme": "yelloworangered",
    "choro_map_figsize": {"width": 400, "height": 600},
    "legend_title": ["Occurrences"],
    "heatmap_xy": {"x": "month:O", "y": "day:O", "yscale": "linear"},
    "heat_map_figsize": {"width": 300, "height": 535},
}

dt_hmap = {
    "sum(datetime|count):Q": {
        "title": "Occurrences",
        "type": "quantitative",
        "format": ".2f",
    },
    "sum(arrest|sum):Q": {"title": "Arrests", "type": "quantitative", "format": ".2f"},
    "mean(probability_of_max_class|mean):Q": {
        "title": "Probability (Avg.)",
        "type": "quantitative",
        "format": ".2f",
    },
}

dt_choro = {
    "properties.sector": {"title": "Sector", "type": "nominal"},
    "properties.beat_num": {"title": "Beat", "type": "nominal"},
    "properties.area": {
        "title": "Area (sq. km)",
        "type": "quantitative",
        "format": ".2f",
    },
    "properties.datetime|count": {"title": "Ocurrences", "type": "quantitative"},
    "properties.arrest|sum": {"title": "Arrests", "type": "quantitative"},
    "properties.probability_of_max_class|mean": {
        "title": "Probability (Avg.)",
        "type": "quantitative",
        "format": ".2f",
    },
}

In [None]:
if choro_data_dir == "cloud":
    az_storage_container_name = "myconedesx7"
    conn_str = (
        "DefaultEndpointsProtocol=https;"
        f"AccountName={os.getenv('AZURE_STORAGE_ACCOUNT')};"
        f"AccountKey={os.getenv('AZURE_STORAGE_KEY')};"
        f"EndpointSuffix={os.getenv('ENDPOINT_SUFFIX')}"
    )
    blob_service_client = BlobServiceClient.from_connection_string(conn_str=conn_str)
    choro_blob_client = blob_service_client.get_blob_client(
        container=az_storage_container_name, blob="blobedesz4"
    )
    choro_data_dir = StringIO(choro_blob_client.download_blob().content_as_text())
    heat_blob_client = blob_service_client.get_blob_client(
        container=az_storage_container_name, blob="blobedesz5"
    )
    heat_data_dir = StringIO(heat_blob_client.download_blob().content_as_text())
else:
    da["neighbourhood"]["file"] = glob(str(Path(data_dir) / "raw" / nb / "*.shp"))[0]
    da["district"]["file"] = glob(str(Path(data_dir) / "raw" / pf / "*.shp"))[0]
    da["community_area"]["file"] = glob(str(Path(data_dir) / "raw" / ca / "*.shp"))[0]
    (choro_data_dir, heat_data_dir) = (
        Path(choro_data_dir),
        Path(heat_data_dir),
    )

In [None]:
(heatmap_dir_path, choromap_dir_path) = [
    Path(heatmap_dir_path),
    Path(choromap_dir_path),
]

Next, we'll programmatically assemble a list dictionaries to be used as for fields in Altair hover tooltips

In [None]:
tooltips_choro_map = []
for da_choice in da_choices:
    tooltip_field = da_choice if da_choice != "community_area" else "community_x"
    tooltip_list = []
    for k, v in dt_choro.items():
        if "format" not in v:
            tooltip_list.append({"title": v["title"], "field": k, "type": v["type"]})
        else:
            tooltip_list.append(
                {
                    "title": v["title"],
                    "field": k,
                    "type": v["type"],
                    "format": v["format"],
                }
            )
    tooltip_list.insert(
        0,
        {
            "title": f"{da_choice.title()}",
            "field": f"properties.{tooltip_field}",
            "type": "nominal",
        },
    )
    tooltips_choro_map.append(tooltip_list)

tooltip_hmap = [
    alt.Tooltip(k, title=v["title"], type=v["type"], format=v["format"])
    for k, v in dt_hmap.items()
]

Finally, we'll define helper function to be used for
- [setting Altair plotting styles](https://github.com/altair-viz/altair/issues/1021#issuecomment-406145719)
- checking if a point (specified by its Latitude and Longitude) occurs inside a list of [Polygons](http://geopandas.org/data_structures.html#geoseries)

In [None]:
def no_cell():
    return {"config": {"style": {"cell": {"strokeOpacity": 0}}}}

In [None]:
# register the custom theme under a chosen name
alt.themes.register("no_cell", no_cell)
# enable the newly registered theme
alt.themes.enable("no_cell")

<a id="load-data"></a>

## 2. [Load data](#load-data)

We'll start by loading the two mapping files into separate `DataFrame`s

In [None]:
df_ch = read_csv(choro_data_dir)
df_h = read_csv(heat_data_dir)
# df_h["month"] = to_datetime(df_h["month"], format="%m").dt.month_name()
df_h["probability_of_max_class|mean"] *= 100
display(df_h.head())
display(df_ch.head())

<a id="generate-choropleth-maps"></a>

## 3. [Generate choropleth maps](#generate-choropleth-maps)

Next, we'll generate each version of the choropleth map
- one version per user specification for `primary_type`

In [None]:
d = gen_choro_map(
    primary_types=primary_types,
    df_ch=df_ch,
    da=da,
    da_choices=da_choices,
    tooltips_choro_map=tooltips_choro_map,
    general_plot_specs=general_plot_specs,
    figs_dir=figs_dir,
    save_to_html=True,
)

Display each version of the choropleth map

In [None]:
for k, primary_type in enumerate(primary_types):
    alt_maps = alt.hconcat(list(d.values())[k]).resolve_scale(color="independent")
    display(alt_maps)

<a id="generate-heatmaps"></a>

## 4. [Generate heatmaps](#generate-heatmaps)

Next, we'll generate each version of the datetime heatmap
- one version per user specification for `primary_type`

In [None]:
dh = gen_heat_map(
    x=general_plot_specs["heatmap_xy"]["x"],
    y=general_plot_specs["heatmap_xy"]["y"],
    yscale=general_plot_specs["heatmap_xy"]["yscale"],
    primary_types=primary_types,
    df_h=df_h,
    tooltip_hmap=tooltip_hmap,
    general_plot_specs=general_plot_specs,
    figs_dir=figs_dir,
    save_to_html=False,
)

Display each version of the heatmap

In [None]:
for k, primary_type in enumerate(primary_types):
    alt_maps = alt.hconcat(list(dh.values())[k]).resolve_scale(color="independent")
    display(alt_maps)

<a id="generate-compound-plots"></a>

## 5. [Generate compound plots](#generate-compound-plots)

Next, we'll generate each view of the pairs of plots to be displayed on the dashboard
- one view per user specification for `primary_type`

In [None]:
for (_, choro_map), (_, heat_map) in zip(d.items(), dh.items()):
    dash = alt.hconcat(choro_map, heat_map).resolve_scale(color="independent")
    display(dash)