In [None]:
import sys

# append the directory of law module to sys.path list
sys.path.append('../modules/')

In [None]:
import json
import re
from textwrap import wrap

import altair as alt
import altair_reveal as reveal
import arrest
import law
import numpy as np
import pandas as pd
import requests
from altair.expr import datum
from altair_saver import save
from scipy.stats import chi2_contingency
from scipy.stats.contingency import expected_freq

alt.themes.register('reveal', reveal.theme)
alt.themes.enable('reveal')
alt.renderers.enable('mimetype')

## Bunch o' code you don't need to read!

In [None]:
def load_chart_json(file):
    with open(file) as jsonfile:
        data = json.dumps(json.load(jsonfile))
    new_chart = alt.Chart.from_json(data)
    return new_chart

In [None]:
def custom_wrap(text, max_width):
    width = max_width
    wrapped = wrap(text, width)
    while ' ' not in wrapped[-1]:
        width -= 1
        wrapped = custom_wrap(text, width)
    return wrapped

In [None]:
def facet_and_config(
    base,
    city_sort,
    title_str='Draft/Reference',
    subtitle_str=None,
    title_size=28,
    subtitle_size=20,
):
    """This effectively reproduces the config in the template,
    but I make it explicit here in case we need to make adjustments.
    """
    chart = (
        alt.layer(base)
        .facet(
            row=alt.Row(
                'city:N',
                sort=city_sort,
                title=None,
                header=alt.Header(
                    labelFontSize=15,
                    labelFont='Tenon',
                    labelOrient='top',
                    labelAlign='left',
                    labelAnchor='start',
                    labelPadding=5,
                ),
            )
        )
        .resolve_axis(x='independent')
        .configure_title(
            font='Tenon',
            fontSize=title_size,
            color='#222222',
            fontWeight=500,
            align='left',
            anchor='start',
            subtitleFont='Tenon',
            subtitleColor='#222222',
            subtitleFontSize=subtitle_size,
            subtitleFontWeight=300,
            subtitlePadding=10,
            subtitleLineHeight=24,
            offset=22,
        )
        .configure_axis(
            gridColor='#dddddd',
            title=None,
            titleColor='#666666',
            titleFontWeight=300,
            labelColor='#666666',
            labelFont='Tenon',
            labelFontSize=13,
            labelFontWeight=400,
            labelFlush=False,
            labelPadding=5,
            tickSize=6,
        )
        .configure_axisX(
            # labels=False,
            domainColor='#666666',
            tickColor='#666666',
        )
        .configure_axisY(labels=False, domainColor='#f9f9f9', tickColor='#f9f9f9')
        .configure_legend(
            title=None,
            orient='top',
            direction='horizontal',
            offset=40,
            columnPadding=20,
            titleFont='Tenon',
            titleFontSize=16,
            titleFontWeight=400,
            labelAlign='left',
            labelFont='Tenon',
            labelFontSize=15,
            labelFontWeight=300,
            labelColor='#222222',
            labelBaseline='middle',
            rowPadding=10,
            symbolType='square',
        )
    )
    if subtitle_str == None:
        return chart.properties(
            title={
                'text': title_str,
            },
        )
    else:
        return chart.properties(
            title={
                'text': title_str,
                'subtitle': subtitle_str,
            },
        )

### Importing data

In [None]:
story_df = pd.read_csv('../US/04_outputs/c05_nibrs_charge_sets_merged.csv',
                       dtype=str)

In [None]:
seattle_df = pd.read_csv('../US/04_outputs/a01_seattle.csv',
                         usecols=['_arrest_id', '_arrest_date', '_housing_status', '_city'])

In [None]:
df = pd.concat([story_df, seattle_df], ignore_index=True)

In [None]:
df.columns = [re.sub('^_', '', x) for x in df.columns]

In [None]:
df['housing_status'] = df['housing_status'].str.title()

In [None]:
df['simplified_housing_status'] = df['housing_status'].replace(
    {'No Information': 'Address missing or unknown',
     'Unknown': 'Address missing or unknown'})

### Plot

In [None]:
arrests_by_simplified_housing = df.groupby(['city', 'simplified_housing_status']).agg(
    arrests=('arrest_id', 'nunique')
)

In [None]:
arrests_by_housing = df.groupby(['city', 'housing_status']).agg(
    arrests=('arrest_id', 'nunique'))

In [None]:
arrests_by_housing

#### Aggregation

In [None]:
arrests_by_simplified_housing = df.groupby(['city', 'simplified_housing_status']).agg(
    arrests=('arrest_id', 'nunique')
)

In [None]:
arrests_by_housing = df.groupby(['city', 'housing_status']).agg(
    arrests=('arrest_id', 'nunique'))

In [None]:
arrests_by_city = df.groupby(['city']).agg(arrests=('arrest_id', 'nunique'))

In [None]:
percent_df = arrests_by_housing.div(arrests_by_city).reset_index()

In [None]:
simplified_percent_df = arrests_by_simplified_housing.div(
    arrests_by_city).reset_index()

#### Generate field to sort by housing status

In [None]:
c = dict(zip(['Unhoused', 'Housed', 'No Information',
            'Unknown', 'Address missing or unknown'], [1, 2, 3, 3, 3]))

In [None]:
percent_df['_order'] = percent_df['housing_status'].replace(c)

In [None]:
simplified_percent_df['_order'] = simplified_percent_df['simplified_housing_status'].replace(
    c)

In [None]:
simplified_percent_df

#### Chart

In [None]:
simplified_arrests_by_housing = (
    alt.Chart(simplified_percent_df)
    .mark_bar(size=25)
    .encode(
        x=alt.X(
            'arrests:Q',
            axis=None,
            title=None,
            stack='zero'
        ),
        order='_order:Q',
        fill=alt.Color(
            'simplified_housing_status',
            legend=alt.Legend(
                orient='top',
                title=None,
                values=[
                    'Unhoused',
                    'Housed',
                    'No information/Unknown',
                ],
                titleLimit=0,
                labelLimit=0,
            ),
            scale=alt.Scale(
                domain=['Unhoused', 'Housed', 'Address missing or unknown'],
                range=['#004488', '#349AC2', '#CCCCCC'],
            ),
        ),
        opacity=alt.condition(
            datum.city == 'Seattle' or datum.city != 'Seattle',
            alt.value(0.5),
            alt.value(1)),
    )
)

#### Text

In [None]:
simplified_arrests_text = (
    alt.Chart(simplified_percent_df)
    .mark_text(font='Tenon', fontSize=14, align='right', dx=-5)
    .encode(
        x=alt.X('arrests:Q', title=None, stack='zero'),
        order='_order:Q',
        color=alt.condition(
            datum.simplified_housing_status == 'Address missing or unknown',
            alt.value('black'),
            alt.value('white'),
        ),
        text=alt.Text('arrests:Q', format='.0%'),
    )
).transform_filter(datum.arrests > 0.04)

#### Base

In [None]:
arrests_base_story = (
    simplified_arrests_by_housing + simplified_arrests_text
).properties(width=400, height=35, title=alt.TitleParams(text=datum.city)).transform_filter(datum.city != 'Seattle')

In [None]:
arrests_base_seattle = (
    simplified_arrests_by_housing + simplified_arrests_text
).properties(width=400, height=35, title=alt.TitleParams(text=datum.city))

#### Title, subtitle

In [None]:
all_arrests_title = 'Police disproportionately arrest unhoused people in West Coast cities'

In [None]:
all_arrests_title_formatted = custom_wrap(all_arrests_title, 30)

In [None]:
all_arrests_subtitle = 'From 2017 through 2020, unhoused people made up at most an estimated 2% of the population in each of the following cities.'

In [None]:
all_arrests_subtitle_formatted = custom_wrap(all_arrests_subtitle, 40)

In [None]:
facet_and_config(
    arrests_base_seattle,
    city_sort=['Portland', 'Sacramento', 'Los Angeles',
          'Seattle', 'San Diego', 'Oakland'],
    title_size=28,
    subtitle_size=20,
)

#### [In story draft](https://docs.google.com/document/d/13YtdcIQttSUras5WUCrisBa8xG8waVq6OkOUpPaENZE/edit#bookmark=id.ilojw9v0ijv8)

In [None]:
all_arrests = facet_and_config(
    arrests_base_story,
    city_sort=['Portland', 'Sacramento', 'Los Angeles', 'San Diego', 'Oakland'],
    title_str=all_arrests_title_formatted,
    subtitle_str=all_arrests_subtitle_formatted,
    title_size=28,
    subtitle_size=20,
)

all_arrests

***