In [1]:
!pip install censusgeocode
!pip install sodapy
!pip install geopandas
!pip install nbconvert

Collecting censusgeocode
  Downloading https://files.pythonhosted.org/packages/86/6f/84434d2f5e382dd224b6f1035cb78a718ce31a9c2125803ac03a94357d6b/censusgeocode-0.4.4-py2.py3-none-any.whl
Collecting requests-toolbelt<1,>=0.8.0
[?25l  Downloading https://files.pythonhosted.org/packages/60/ef/7681134338fc097acef8d9b2f8abe0458e4d87559c689a8c306d0957ece5/requests_toolbelt-0.9.1-py2.py3-none-any.whl (54kB)
[K     |████████████████████████████████| 61kB 5.3MB/s 
Collecting cryptography>=1.3.4; extra == "security"
[?25l  Downloading https://files.pythonhosted.org/packages/c9/de/7054df0620b5411ba45480f0261e1fb66a53f3db31b28e3aa52c026e72d9/cryptography-3.3.1-cp36-abi3-manylinux2010_x86_64.whl (2.6MB)
[K     |████████████████████████████████| 2.6MB 14.5MB/s 
[?25hCollecting pyOpenSSL>=0.14; extra == "security"
[?25l  Downloading https://files.pythonhosted.org/packages/b2/5e/06351ede29fd4899782ad335c2e02f1f862a887c20a3541f17c3fa1a3525/pyOpenSSL-20.0.1-py2.py3-none-any.whl (54kB)
[K     |███

# Grab and clean ISR data
Below, I grab Investigatory Stop Report (ISR) data from the Chicago Police Department's website which is available for stops over time periods [January 1, 2016 to January 16, 2018](https://colab.research.google.com/drive/1IM5yhAozHbsHFVq51h8hBecuCdR9nh6w?authuser=1#scrollTo=PrW0ftWIfhgb&line=2&uniqifier=1) and [January 1, 2018 to December 31, 2018](https://home.chicagopolice.org/isr-data-2018/). I then deduplicate records and do some data type conversions to prepare for the visualizations. <br><br>
Note: I load data from the web and clean all within this file.

In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
import json
import pandas as pd
import altair as alt
import censusgeocode as cg
from io import BytesIO
from zipfile import ZipFile
import urllib
from urllib.request import urlopen, Request
import io
from sodapy import Socrata
import time
import geopandas as gpd
import requests

In [4]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}

urls = ['http://home.chicagopolice.org/wp-content/uploads/2019/06/ISR-Data-2016-2017.zip',
'http://home.chicagopolice.org/wp-content/uploads/2019/06/ISR-Data-2018.zip']

for url in urls:
	req = Request(url=url, headers=headers)
	html=urllib.request.urlopen(req)
	zipfile = ZipFile(BytesIO(html.read()))
	zipfile.extractall()

ISR_2018 = pd.read_csv("01-JAN-18 thru 01-JAN-19 - ISR- JUV Redacted.csv")
ISR_2016 = pd.read_csv("01-JAN-2016 to 28-FEB-2017 - ISR - JUV Redacted.csv")
ISR_2017 = pd.read_csv("29-FEB-2016 thru 16-JAN-2018 - ISR - JUV Redacted.csv")
ALL_ISRS = [ISR_2018, ISR_2017, ISR_2016]

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


In [6]:
def grab_isrs():
  '''
  This function grabs ISR data, cleans and dedupes it, and returns a dataframe
  of ISRs from 2016-2018.
  '''
  try:
    isrs = pd.read_csv('isrs_16-18.csv')
    isrs['CONTACT_DATE'] = pd.to_datetime(isrs['CONTACT_DATE'])
    isrs['month_year'] = pd.to_datetime(isrs['month_year'])
    return isrs
  except:
    isrs_all = pd.concat(ALL_ISRS, sort=True)
    isrs_all.drop_duplicates(inplace=True)
    isrs_all = isrs_all.loc[isrs_all.CARD_NO != 'REDACTED']
    isrs_all['CONTACT_DATE'] = pd.to_datetime(isrs_all.CONTACT_DATE,
                            format='%m/%d/%y %H:%M',
                            infer_datetime_format=True)
    isrs_all['year'] = isrs_all.apply(lambda x: x['CONTACT_DATE'].year, axis=1)
    isrs_all['month'] = isrs_all.apply(lambda x: x['CONTACT_DATE'].month, axis=1)

    #DROPPING DUPLICATE CONTACT_CARD BASED ON MODIFIED_DATE FIELD
    isrs_all['MODIFIED_DATE'] = pd.to_datetime(isrs_all.MODIFIED_DATE,
                                              format='%m/%d/%y %H:%M',
                                              infer_datetime_format=True)
    max_dates = pd.DataFrame(
      isrs_all.groupby('CARD_NO')['MODIFIED_DATE'].max()).reset_index()
    isrs_max = isrs_all.merge(max_dates, on=['CARD_NO', 'MODIFIED_DATE'])
    isrs_unique = isrs_max.drop_duplicates('CARD_NO')
    isrs_unique['month_year'] = \
      pd.to_datetime(isrs_unique['CONTACT_DATE']).dt.to_period('M')
    isrs_unique['month_year'] = \
      isrs_unique.apply(lambda x: x['month_year'].to_timestamp(), axis=1)
    isrs_unique.to_csv("isrs_16-18.csv")
    return isrs_unique

In [7]:
isrs = grab_isrs()
isrs.head()
isrs.to_csv("isrs_clean.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [8]:
def group_month(df, col_name, column=None):
    '''
    Takes the isrs dataframe and a column name and returns a dataframe
    with row counts grouped by month_year and the optional column.
    '''
    if column:
      column.append('month_year')
      return pd.DataFrame(df.groupby(column).size()).reset_index().rename(
          {0: col_name}, axis=1)
    else:
        return pd.DataFrame(df.groupby(
            'month_year').size()).reset_index().rename(
            {0: col_name}, axis=1)

#Set themes

In [9]:
def krista_theme():
    font = "Helvetica"
    labelFont = "Helvetica" 
    sourceFont = "Helvetica"
    axisColor = "#000000"
    gridColor = "#DEDDDD"
    main_palette = ["#7F3C8D",
                    "#11A579",
                    "#3969AC",
                    "#F2B701",
                    "#E73F74",
                    "#80BA5A",
                    "#E68310",
                    "#008695",
                    "#CF1C90",
                    "#f97b72",
                    "#4b4b8f",
                    "#A5AA99"
                   ]
    sequential_palette = ["#ebf0f1",
                          "#ffffff",
                          "#c3d2d4",
                          "#96b0b4",
                          "#76999d",
                          "#678e93",
                          "#588489",
                          "#4a7a7f",
                          "#3a6f76",
                          "#29656c"
                         ]
    return {
        "config": {
            "title": {
                "fontSize": 18,
                "font": font,
                "anchor": "start",
                "fontColor": "#000000"
            },
            "axisX": {
                "domain": True,
                "domainColor": axisColor,
                "grid": False,
                "labelFont": labelFont,
                "tickColor": axisColor,
                "tickSize": 5,
                "titleFont": font,
                "title": "X Axis Title (units)", 
                "stroke": "transparent"
            },
            "axisY": {
                "domain": False,
                "grid": True,
                "gridColor": gridColor,
                "gridWidth": 1,
                "labelFont": labelFont,
                "labelFontSize": 12,
                "labelAngle": 0, 
                "ticks": False,
                "titleFont": font,
                "titleFontSize": 12,
                "titlePadding": 10,
                "title": "Y Axis Title (units)",  
                "titleAngle": 0,
                "titleY": -10,
                "titleX": 18,
            },
            "range": {
                "category": main_palette,
                "diverging": sequential_palette,
            },
            "view": {
                "stroke": "transparent"
            }
}
    }

In [10]:
def alternate_theme():
    font = "Helvetica"
    labelFont = "Helvetica" 
    sourceFont = "Helvetica"
    axisColor = "#000000"
    gridColor = "#DEDDDD"
    main_palette = ["#7F3C8D",
                    "#11A579",
                    "#3969AC",
                    "#F2B701",
                    "#E73F74",
                    "#80BA5A",
                    "#E68310",
                    "#008695",
                    "#CF1C90",
                    "#f97b72",
                    "#4b4b8f",
                    "#A5AA99"
                   ]
    sequential_palette = ["#80ba5a",
                          "#8fb15d",
                          "#a3a462",
                          "#b19865",
                          "#bf8969",
                          "#c3846a",
                          "#cd766d",
                          "#d36d6e",
                          "#e15072",
                          "#ea3475"
                         ]
    return {
        "config": {
            "title": {
                "fontSize": 18,
                "font": font,
                "anchor": "start",
                "fontColor": "#000000"
            },
            "axisX": {
                "domain": True,
                "domainColor": axisColor,
                "grid": False,
                "labelFont": labelFont,
                "tickColor": axisColor,
                "tickSize": 5,
                "titleFont": font,
                "title": "X Axis Title (units)", 
                "stroke": "transparent"
            },
            "axisY": {
                "domain": False,
                "grid": True,
                "gridColor": gridColor,
                "gridWidth": 1,
                "labelFont": labelFont,
                "labelFontSize": 12,
                "labelAngle": 0, 
                "ticks": False,
                "titleFont": font,
                "titleFontSize": 12,
                "titlePadding": 10,
                "title": "Y Axis Title (units)",  
                "titleAngle": 0,
                "titleY": -10,
                "titleX": 18,
            },
            "range": {
                "category": main_palette,
                "diverging": sequential_palette,
            },
            "view": {
                "stroke": "transparent"
            }
}
    }

In [11]:
alt.themes.register("krista_theme", krista_theme)
alt.themes.register("alternate_theme", alternate_theme)
alt.themes.enable("krista_theme")

ThemeRegistry.enable('krista_theme')

#Time Trends


I begin with a basic line chart that demonstrates the total ISRs by month over time. Seasonal dips in the number of stops per month occur towards the end of fall and tend to increase again in late winter/early spring. There is a noticeable increase in the volume of stops during the high season of 2018 as compared to the prior two years. In later charts, I continue to examine trends over time to see if there are patterns which may explain this increase.

In [12]:
by_month = group_month(isrs, 'isr_count')
alt.Chart(by_month, width=500).mark_line().encode(
    alt.X('month_year:T', title='Month', axis=alt.Axis(format="%b %Y")),
    alt.Y('isr_count', title="Total ISRs per Month"),
    color=alt.value("teal")
    ).properties(
title={
  "text": ["Monthly ISRs 2016-2018"], 
  "subtitle": ["ISRs have fluctuated seasonally and noticeably increased in 2018.",
               "Source: Chicago Police Department Investigatory Stop Reports*"],
  }
)

#District Trends

After observing how stops have fluctuated over time, I want to examine whether these trends demonstrate any differences by police district. Below, it is apparent that District 7 likely made up the lion's share of the spike in ISRs in mid-2016. The subsequent spike in stops in mid-2018 occurred in multiple police districts but appears particularly strong in districts 11 and 7.



In [13]:
by_district = group_month(isrs, 'isr_count', column=['DISTRICT'])
by_district['DISTRICT'] = by_district['DISTRICT'].astype(int).astype(str)

alt.Chart(by_district).mark_rect().encode(
    alt.X('yearmonth(month_year):O', title='Month'),
    alt.Y('DISTRICT', title=['Police District'], axis=alt.Axis(grid=False),
          sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
                '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
                '23', '24', '25', '31', '41', '51']),
    alt.Color('isr_count', title=['Total ISRs per Month'])
).properties(
title={
  "text": ["Monthly ISRs by Police District 2016-2018"], 
  "subtitle": ["Total ISRs per month vary across police districts and over time.",
               "Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

#Mapping ISRs by District

In the city of Chicago, district 11 is located on the west side and primarily encompasses West Garfield Park, a neighborhood notorious for high rates of homicide. District 7 primarily encompasses Englewood on the south side. The populations in both districts are predominantly African American. Below, I map ISRs in the year 2018 by police district to provide geographical context.

Note: I have every intention of mapping the specific location of stops but I only just got access to a Midway account at RCC for geocoding.

##Grab district boundaries and clean ISR data

In [14]:
districts_gdf = gpd.read_file("https://data.cityofchicago.org/api/geospatial/fthy-xz3r?method=export&format=GeoJSON")

In [15]:
isrs_2018 = isrs.loc[isrs['year'] == 2018]
isrs_district_2018 = isrs_2018.groupby(
    ['DISTRICT']).size().reset_index().rename(
        columns={0: 'isrs', 'DISTRICT': 'dist_num'})

In [16]:
isrs_district_2018.dist_num = isrs_district_2018['dist_num'].astype(int)
districts_gdf.dist_num = districts_gdf['dist_num'].astype(int)

In [17]:
districts_gdf.drop_duplicates('dist_num', inplace=True)
isrs_district_2018.drop_duplicates('dist_num', inplace=True)

In [18]:
districts_gdf_isrs = districts_gdf.merge(
    isrs_district_2018, how='left', on='dist_num', validate="one_to_one")

In [19]:
districts_geo_isrs = alt.Data(
    values=json.loads(districts_gdf_isrs.to_json())['features'])

In [20]:
district_centroids = districts_gdf[['dist_label', 'dist_num', 'geometry']]
district_centroids['centroid'] = districts_gdf.geometry.centroid
district_centroids['longitude'] = district_centroids.centroid.x
district_centroids['latitude'] = district_centroids.centroid.y
district_centroids = district_centroids.drop(columns='centroid')


  

  This is separate from the ipykernel package so we can avoid doing imports until

  after removing the cwd from sys.path.


In [21]:
district_centroids = district_centroids.loc[district_centroids.dist_num != 31]

##Visual

In [22]:
background = alt.Chart(districts_geo_isrs).mark_geoshape(
    stroke='white').properties(
    width=500,
    height=500
).encode(
    color=alt.Color('properties.isrs:Q', title="Total ISRs")
)

labels = alt.Chart(district_centroids).mark_text().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    text='dist_label',
    size=alt.value(8),
    opacity=alt.value(0.6)
).properties(
title={
  "text": ["2018 ISRs by Police District"], 
  "subtitle": ["Relative to geographic area, ISRs were highly concentrated in districts 11 and 7.",
               "Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

background + labels

#Stops by Race
Racially discriminatory policing is a pernicious issue in police departments across the country and Chicago is no exception. In the stack area chart below, I used color and area to visualize how the stops have varied by race over time. CPD has consistently stopped Black/African American individuals at the highest rate and by a wide margin, followed by Hispanic.

Note: Black and White Hispanic are combined into "Hispanic". "Other, Unknown" includes Asian/Pacific Islanders and American Indian/Alaska Natives.


In [23]:
race_codes = {'BLK': 'BLACK/AFR AM',
              'WHI': 'WHITE',
              'API': 'OTHER, UNKNOWN',
              'WBH': 'HISPANIC',
              'WWH': 'HISPANIC',
              'WHT': 'WHITE',
              'I': 'OTHER, UNKNOWN',
              'U': 'OTHER, UNKNOWN'}
isrs['RACE'] = isrs.apply(lambda x: race_codes.get(x['RACE_CODE_CD']), axis=1)
by_race = group_month(isrs, 'isr_count', column=['RACE'])

In [24]:
alt.Chart(by_race, width=500).mark_area().encode(
    alt.X("month_year", title='Month'),
    alt.Y('isr_count', title='Total ISRs'),
    alt.Color('RACE', title='Race')
).properties(
title={
  "text": ["Monthly ISRs by Subject Race 2016-2018"], 
  "subtitle": ["Black/African American subjects have consistently experienced \
the highest rate of stops by CPD.",
"Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

#Pat Downs and Searches
The ISR data provided by the city of Chicago includes an indicator for whether or not the stop included a pat down and whether or not those pat downs included a search. Below, I explore how pat downs and searches have varied over time. We see that pat downs and searches have varied in conjunction with ISRs overall over time. However, most of the rise in ISRs in 2018 were driven by ISRs that did not involve pat downs or searches.

In [25]:
def patdown_search(row):
  if row['PAT_DOWN_I'] == "Y" and row['SEARCH_I'] == "N":
    return "Pat Down Without Search"
  elif row['SEARCH_I'] == "Y":
    return "Pat Down With Search"
  else:
    return "No Pat Down or Search"

isrs['patdown_search'] = isrs.apply(patdown_search, axis=1)

In [26]:
monthly_patdown_search = group_month(isrs, "ISR count", column=["patdown_search"])

In [27]:
monthly_patdown_search_race = group_month(isrs, "ISR count", column=["patdown_search", "RACE"])

In [28]:
alt.Chart(monthly_patdown_search, width=600).mark_area(
    opacity=0.3,
    interpolate='step'
).encode(
    alt.X('month_year', title="Month"),
    alt.Y('ISR count', stack=None, title="ISRs"),
    alt.Color('patdown_search', title="ISR Type",
              scale=alt.Scale(
            domain=['Pat Down Without Search', 'Pat Down With Search', 'No Pat Down or Search'],
            range=['#008695', '#CF1C90', '#4b4b8f']))
).properties(
title={
  "text": ["Monthly Pat Downs, Searches, and other ISRs 2016-2018"], 
  "subtitle": ["Monthly ISRs distinguished by stops involving pat downs \
without searches, searches, and stops without either pat downs or searches.",
"Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

##Pat Downs and Searches by Race
Given findings from the previous two visualizations, one might wonder if the relative likelihood of being patted down or searched after a stop differs by race. I have found that from 2016-2018, the chances of a Black/African American or Hispanic subjects being patted down or searched after a stop has hovered at close to 5%. However, for white subjects, the chances of being patted down or searched after a stop has not exceeded 4% since the early months of 2016.

In [29]:
pct_by_race = monthly_patdown_search_race.merge(by_race, on=['RACE', 'month_year'])
pct_by_race['Percent of stops'] = pct_by_race['ISR count'] / pct_by_race['isr_count']

In [30]:
alt.Chart(pct_by_race, width=150).mark_area(interpolate='step').encode(
    alt.X('yearmonth(month_year)', title="Month"),
    alt.Y('Percent of stops', title="% ISRs"),
    alt.Color('patdown_search', title="ISR Type",
              scale=alt.Scale(
            domain=['Pat Down Without Search', 'Pat Down With Search', 'No Pat Down or Search'],
            range=['#008695', '#CF1C90', '#4b4b8f'])),
    alt.Facet('RACE')
).properties(
title={
  "text": ["Monthly Pat Downs, Searches, and other ISRs 2016-2018"], 
  "subtitle": ["Monthly ISRs distinguished by stops involving pat downs \
without searches, searches, and stops without either pat downs or searches.",
"Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

#Use of Body Cameras
Many policy advocates have touted the use of body cameras as a mechanism to prevent police brutality. Below, I explore the volume of ISRs that have been recorded by a body camera. The use of body cameras has noticeable increased over the past couple years. The rate of adoption has varied by district but increased across all of them. 

The adoption of body cameras has not resulted in a reduction of the volume of stops overall and stops have continued to disproportionately target people of color, as seen in previous charts.

In [31]:
isrs_district = isrs.groupby(['DISTRICT']).size().reset_index()

In [32]:
isrs_district_camera = isrs.groupby(
    ['DISTRICT', 'BODY_CAMERA_I', 'year']).size().reset_index().rename(
        columns={0: 'isrs'}
    )

In [33]:
isrs_district_camera['DISTRICT'] = isrs_district_camera['DISTRICT'].astype(int).astype(str)
alt.Chart(isrs_district_camera).mark_bar().encode(
    column='year',
    x='isrs',
    y=alt.Y('DISTRICT', sort=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
                '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
                '23', '24', '25', '31', '41', '51']),
    color=alt.Color('BODY_CAMERA_I', scale=alt.Scale(
            domain=['Y', 'N'],
            range=['#80BA5A', '#E73F74']),
            title="Recorded by Body Camera")
).properties(
title={
  "text": ["Total ISRs by District and use of Body Cameras 2016-2018"], 
  "subtitle": ["Across every police district, the use of body cameras during stops has increased in both volume and proportion.",
               "Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

##Mapping Use of Body Cameras

###Combine with districts

In [34]:
isrs_district_camera['DISTRICT'] = isrs_district_camera['DISTRICT'].astype(int)
camera_gdf = isrs_district_camera.merge(districts_gdf, left_on='DISTRICT', right_on='dist_num')

In [35]:
total_annual = isrs_district_camera.groupby(['year', 'DISTRICT']).sum().reset_index()
camera_pct = isrs_district_camera.merge(total_annual, on=['DISTRICT', 'year'])
camera_pct = camera_pct.loc[camera_pct['BODY_CAMERA_I'] == "Y"]
camera_pct['Percent'] = camera_pct.apply(
    lambda x:x['isrs_x'] / x['isrs_y'], axis=1)
camera_pct.rename(columns={'DISTRICT': 'dist_num'}, inplace=True)
camera_gdf = districts_gdf.merge(camera_pct, on=['dist_num'])
camera_gdf = camera_gdf[['dist_label', 'year', 'Percent', 'geometry']]

In [36]:
camera_alt = alt.Data(
    values=json.loads(
        camera_gdf.loc[camera_gdf['year'] == 2018].to_json())['features'])

###Visual

In [37]:
background = alt.Chart(camera_alt).mark_geoshape(stroke='white').properties(
    width=500,
    height=500).encode(
    color=alt.Color('properties.Percent:Q', title="Percent with camera",
                    scale=alt.Scale(scheme="pinkyellowgreen"))
)

labels = alt.Chart(district_centroids).mark_text().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    text='dist_label',
    size=alt.value(8),
    opacity=alt.value(0.6)
).properties(
title={
  "text": ["2018 Percent ISRs Recorded by Body Camera"], 
  "subtitle": ["The 11th and 9th police districts lag behind other districts for the percentage of ISRs recorded by a body camera.",
               "Source: Chicago Police Department Investigatory Stop Reports*"]
}
)

background + labels


#Grab and clean crime data


The next topic I will explore is the relationship between ISRs and crime. To do so, I take data from the [city of Chicago's data portal](https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2).

In [38]:
def get_socrata_data(client, dataset_id, page_size=5000, filter=None):
    '''
    Iterate over a dataset's pages using the Socrata API and return the data.
    This can take a while for large datasets.
    '''
    page_num = 0
    more_pages = True
    api_data = []
    while more_pages:
      try:
        next_page = client.get(
            dataset_id,
            limit=page_size,
            offset=page_size * page_num,
            where=filter
        )
        api_data = api_data + next_page
        if len(next_page) < page_size:
            more_pages = False
        page_num += 1
      except:
        time.sleep(10)
    return api_data

In [39]:
client = Socrata("data.cityofchicago.org", None)
crime_data = get_socrata_data(client, "ijzp-q8t2", filter="year >= 2016 AND year <= 2018")



In [40]:
crime_df = pd.DataFrame(crime_data)

In [41]:
crime_df['date'] = pd.to_datetime(crime_df['date'])
crime_df['month_year'] = crime_df['date'].dt.to_period('M')
crime_df['month_year'] = \
  crime_df.apply(lambda x: x['month_year'].to_timestamp(), axis=1)


#Time Trends in ISRs and Crime by District
Below, I repeat the monthly line chart that I created for ISRs above, but I add in a new line demonstrating crimes and distinguish the two using color. There is some correlation between the volume of ISRs and crimes over time. However, throughout the year, it seems ISRs begin pre-emptively increase around December/January whereas crime does not begin its uptick until later in the year in February/March.

##Combine crime and ISR data

In [42]:
monthly_crimes = group_month(crime_df, 'crime_count')

In [43]:
by_month['month_year'] = pd.to_datetime(by_month['month_year'])
monthly_crimes['value'] = "Crimes"
by_month['value'] = "ISRs"
monthly_counts = pd.concat([monthly_crimes.rename({"crime_count": "count"}, axis=1), 
          by_month.rename({"isr_count": "count"}, axis=1)])

##Visual

In [44]:
alt.Chart(monthly_counts, width=500).mark_line().encode(
    alt.X('month_year'),
    alt.Y('count'),
    alt.Color('value', scale=alt.Scale(
            domain=['Crimes', 'ISRs'],
            range=['#f97b72', "#29656c"]))
).properties(
title={
  "text": ["Monthly ISRs and Crimes 2016-2018"], 
  "subtitle": ["Crimes and ISRs both experience seasonal fluctuations.",
"Source: Chicago Police Department Investigatory Stop Reports*, \
Chicago Data Portal-Crimes^"]
}
)

#Crimes and ISRs by district
Below, I replicate the line chart above but this time I facet each chart by district. In this way, I use a text encoding to denote which chart represents each district. Here, we see some disparities and variation over time and by district with regards to the relationship between crimes and ISRs. For example, in district 11, ISRs suddenly drop in 2017 without a corresponding drop in crime. In district 1, ISRs suddenly increase in 2018 without a corresponding increas in crime. Districts 17 and 18 appear to have relatively similar levels of ISRs over time despite district 18 having consistently higher levels of crime.

##Combine crime and ISR data by district

In [45]:
district_crimes = group_month(crime_df, 'crime_count', column=['district'])

In [46]:
district_crimes.district = district_crimes.district.astype(int)
by_district['district'] = by_district.DISTRICT.astype(int)
by_district.month_year = pd.to_datetime(by_district['month_year'])
district_merged = by_district.merge(district_crimes, on=['month_year', 'district'])

In [47]:
district_merged['crime_isr_ratio'] = \
  round(district_merged['crime_count'] / district_merged['isr_count'], 1)
district_merged['district'] = district_merged['district'].astype(str)

In [48]:
by_district.drop(columns='DISTRICT', inplace=True)

In [49]:
district_crimes['value'] = "Crimes"
by_district['value'] = "ISRs"
district_counts = pd.concat([district_crimes.rename({"crime_count": "count"}, axis=1), 
          by_district.rename({"isr_count": "count"}, axis=1)])

##Visual

In [50]:
alt.Chart(district_counts).mark_line().encode(
    x=alt.X('month_year', title="Month"),
    y=alt.Y(
        'count',
        axis=alt.Axis(format='~s'),
        title="ISR Count"
    ),
    color=alt.Color('value', scale=alt.Scale(
            domain=['Crimes', 'ISRs'],
            range=['#f97b72', "#29656c"])),
    facet=alt.Facet('district', columns=5),
).properties(
    width=90,
    height=80,
    title={"text": ["Monthly ISRs and Crimes by District 2016-2018"],
"subtitle": ["There are visible inconsistencies in the relationship between \
ISRs and crime across districts and over time.",
"Source: Chicago Police Department Investigatory Stop Reports*, \
Chicago Data Portal-Crimes^"]
}
)

#Mapping Crimes and ISRs
Below, I map police districts to provide visual context for where there is a discrepancy between crimes and ISRs. In 2018, crimes were highly concentrated in the loop and the Near North Side neighborhoods which roughly corresponds to police districts 1 and 18. We see in the map below that all lakefront districts near downtown had above average crime rates but below average ISRs compared to the rest of the city.

In [55]:
district_counts['year'] = district_counts.apply(
    lambda x: x['month_year'].year, axis=1)

In [57]:
crimes_isrs_year = district_counts.groupby(['year', 'district', 'value']).sum().reset_index()
annual_averages = crimes_isrs_year.groupby(['year', 'value']).mean().reset_index().drop(columns='district')
crimes_isrs_year = crimes_isrs_year.merge(annual_averages, on=['year', 'value'], suffixes=('_district', '_city'))
crimes_isrs_year['Above average'] = crimes_isrs_year.apply(
    lambda x: True if x['count_district'] > x['count_city'] else False, axis=1)

In [58]:
average_status = crimes_isrs_year.set_index(['year', 'district', 'value']).unstack('value')['Above average'].reset_index()

def over_status(row):
  if row['Crimes'] and row['ISRs']:
    return "Both above"
  elif row['Crimes']:
    return "Crimes above"
  elif row['ISRs']:
    return "ISRs above"
  else:
    return "Both below"

average_status['Overall'] = average_status.apply(lambda x: over_status(x), axis=1)

In [59]:
avg_status_gdf = districts_gdf_isrs.merge(average_status, left_on='dist_num', right_on='district')

In [60]:
def map_one_year(gdf, year, subtitle):
  gdf_year = avg_status_gdf.loc[avg_status_gdf['year'] == year]
  geo_data = alt.Data(
      values=json.loads(gdf_year.to_json())['features'])
  background = alt.Chart(geo_data).mark_geoshape(
      stroke='white').properties(
      width=500,
      height=500
  ).encode(
      color=alt.Color('properties.Overall:O', scale=alt.Scale(
          domain=("Both above", "Both below", "Crimes above", "ISRs above"),
          range=("#7d7f80", "#bca09f", "#f7bebb", "#466367")),
          title="Crimes and ISRs")
  )
  labels = alt.Chart(district_centroids).mark_text().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    text='dist_label',
    size=alt.value(8),
    opacity=alt.value(0.6)
  ).properties(
  title={
    "text": [str(year) + " Crimes and ISRs Compared to Citywide Average by Police District"], 
    "subtitle": subtitle
  }
  )
  return background + labels

In [61]:
map_one_year(avg_status_gdf, 2018, 
             ["Police districts along the lakefront near downtown had above average crimes compared to ISRs.",
               "Source: Chicago Police Department Investigatory Stop Reports*, Chicago Data Portal-Crimes^"])

#Data Sources
*Chicago Police Department's Investigatory Stop Reports data can be found at https://home.chicagopolice.org/statistics-data/isr-data/ and https://home.chicagopolice.org/isr-data-2018/ <br>
^The crimes dataset on Chicago's Data Portal can be found at https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2/data