In [424]:
import altair as alt
import pandas as pd
import numpy as np

In [425]:
budgets = pd.read_csv("../data/budgets.csv")
budgets["year"] = budgets["year"].astype(str)

In [426]:
def budget_by_year_area(df):
    df["department2"] = np.where(df["dept_num"].isin([57, 81, 84, 91, 41, 58, 59]), 
                                 df["department"], "Other")
    df = df.groupby(["year", "department2"], as_index = False)["ordinance"].sum()
    chart = alt.Chart(df, title = "City of Chicago Budget 2019-2023"
            ).mark_area().encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("ordinance").title("Amount ($)"),
        color = alt.Color("department2").title("Department").scale(scheme = "dark2")
    ).properties(height=500, 
                 width=500
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        symbolSize = 200,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    )

    return chart

budget_by_year_area(budgets)

This area chart gives an overview of the city budget.  It shows the total budget for the city of Chicago from 2019-2023, broken into different departments.  Since there are so many departments, the graph began getting crowded so I chose a few departments that I think are particularly relevant to residents' day-to-day lives.  

In [427]:
def budget_by_year_dept(df):
    df = df[df["dept_num"].isin([57, 81, 84, 91, 41, 58, 59])]
    df = df.groupby(["year", "department"], as_index = False)["ordinance"].sum()
    
    chart = alt.Chart(df, title = "Selected Department Budgets 2019-2023"
            ).mark_line(point=True, size = 3).encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("ordinance").title("Amount ($)"),
        color = alt.Color("department").title("Department").scale(scheme = "dark2")
    ).properties(width = 500, height = 400
    ).configure_legend(labelLimit= 0,
                       padding = 10,
                       cornerRadius = 5,
                       labelFontSize = 12,
                       symbolSize = 200,
                       titleFontSize = 14,
                       fillColor = "#FFF5DB"
    )

    return chart

budget_by_year_dept(budgets)

This line graph further focuses on the budgets of the aforementioned departments to get a better idea of the change in amount of money allocated to each of them over time.  It also allows the viewer to compare the department budgets to each other more easily.  For example, we can see that the CPD budget is much higher than the other departments and CDOT and CDPH budgets have grown since 2020.   

In [428]:
requests_311 = pd.read_csv("../data/311_requests.csv")
requests_311["by_year_created_date"] = requests_311["by_year_created_date"].str.extract(r"(\d{4})")
requests_311["OWNER_DEPARTMENT"] = requests_311["OWNER_DEPARTMENT"].str.replace(r"(.*(?=-)- )", "", regex = True)
requests_311 = requests_311.rename(columns = {"SR_NUMBER": "num_requests",
                                               "by_year_created_date": "year",
                                               "OWNER_DEPARTMENT": "department",
                                               "SR_TYPE": "type"})

In [441]:
def stacked_bar_311(df):
    df["department2"] = np.where(df["department"].isin(["Department of Transportation",
       'Animal Care and Control', 'Streets and Sanitation', 
       'Health', 'Department of Water Management', '311 City Services', 
       'Buildings','Aviation']), df["department"], "Other")
    df = df.groupby(["year", "department2"], as_index = False)["num_requests"].sum()

    chart = alt.Chart(df, title = "311 Service Requests by Department").mark_bar().encode(
        x=alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("num_requests").stack("normalize").title("Percent of Requests"),
        color = alt.Color("department2").title("Department").scale(scheme = "dark2")
    ).properties(width = 400, height = 600
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        symbolSize = 200,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    )
    return chart

stacked_bar_311(requests_311)

This stacked bar chart shows what department handles most 311 requests in the hopes of understanding which city departments deal with the most day-to-day non-emergency issues.  Because there are many departments, I chose to show those that had the most requests and put the rest in the "Other" category.  We can see that the 311 City Services and Department of Streets and Sanitation get the most requests by far.  The 311 City Services requests are all requests for information.

In [430]:
dept_dict = {"Department of Transportation": "Chicago Department of Transportation",
    'Animal Care and Control': 'Chicago Animal Care and Control', 
    'Streets and Sanitation': 'Department of Streets and Sanitation', 
    'Health': 'Chicago Department of Public Health', 
    'Department of Water Management': 'Department of Water Management', 
    '311 City Services': "Office of Emergency Management and Communications", 
    'Buildings': 'Department of Buildings', 'Aviation': 'Chicago Department of Aviation'}

def bar_311_facet(df):
    df = df[df["department"].isin(["Department of Transportation",
       'Animal Care and Control', 'Streets and Sanitation', 
       'Health', 'Department of Water Management', '311 City Services', 
       'Buildings','Aviation'])]
    df["department"] = df["department"].map(dept_dict)

    df = df.groupby(["year", "department"], as_index = False)["num_requests"].sum()
    budgets_311_subset =  budgets[budgets["department"].isin([
        "Chicago Department of Transportation",
       'Chicago Animal Care and Control', 'Department of Streets and Sanitation', 
       'Chicago Department of Public Health', 'Department of Water Management', 
       "Office of Emergency Management and Communications", 
       'Department of Buildings','Chicago Department of Aviation'])]
    budgets_311_subset = budgets_311_subset.groupby(["year", "department"], as_index = False)["ordinance"].sum()
    df = pd.merge(df, budgets_311_subset, how = "left", on = ["year", "department"])

    chart = alt.Chart(df, title = "311 Requests by Department Budget").mark_bar().encode(
        x = alt.X("num_requests").title("# 311 Requests"),
        y = alt.Y("department").title("Department"),
        color = alt.Color("ordinance").title("Budget ($)").scale(scheme = "viridis"),
        facet = alt.Facet("year").title("Year")
    ).properties(width = 100, height = 200
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        symbolSize = 200,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    )
    return chart

bar_311_facet(requests_311)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["department"] = df["department"].map(dept_dict)


In this faceted bar chart I wanted to get an understanding of how the distribution of 311 requests changed alongside the departmental budgets.  The Office of Emergency Management and Communications houses 311 City Services which is why their bar is so large.  I was hoping to see if there was any relationship between budgetary changes and changes in number of requests.  Interestingly, CDOT's budget has increases over the past five years while their number of requests have decreased.

In [431]:
crashes = pd.read_csv("../data/traffic_crashes.csv")
crashes["by_year_crash_date"] = crashes["by_year_crash_date"].str.extract(r"(\d{4})")
crashes = crashes.rename(columns = {"CRASH_RECORD_ID": "num_crashes",
                                    "TRAFFIC_CONTROL_DEVICE": "traffic_control",
                                    "DEVICE_CONDITION": "device_condition",
                                     "ROAD_DEFECT": "road_defect",
                                     "by_year_crash_date": "year"})

In [432]:
def concat_chart_crashes_cdot(df):
    df = df.groupby(["year"], as_index = False)["num_crashes"].sum()
    budgets_cdot =  budgets[budgets["department"] == "Chicago Department of Transportation"]
    budgets_cdot = budgets_cdot.groupby(["year"], as_index = False)["ordinance"].sum()
    df = df.join(budgets_cdot.set_index("year"), on = "year")

    upper = alt.Chart(df, title = "Traffic Crashes 2019-2023").mark_bar(color = "#66A61E").encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("num_crashes").title("# Crashes"),
    ).properties(width = 600, height = 400
    )

    lower = alt.Chart(df, title = "CDOT Budget 2019-2023").mark_line(
        color = "#66A61E").encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("ordinance").title("Amount ($)"),
    ).properties(width = 600, height = 100
    )

    chart = alt.vconcat(upper, lower)

    return chart

concat_chart_crashes_cdot(crashes)

This concatenated bar and line chart shows the number of traffic crashes each year alongside the change in CDOT's budget.  I wanted to see if there was any correlation between these two variables.  It seems that traffic crashes stay very constant over the years, save 2020 which is presumeably a result of the pandemic.  However, CDOT's budget continues to increase by a rather large amount.

In [433]:
crimes = pd.read_csv("../data/crimes.csv")
crimes = crimes.rename(columns = {"ID": "num_crimes",
                                  "Primary Type": "prim_type",
                                  "Location Description": "location",
                                  "Arrest": "arrest",
                                  "Domestic": "domestic",
                                  "Community Area": "community_area",
                                  "Year": "year"})
crimes["year"] = crimes["year"].astype(str)
budgets_cpd =  budgets[budgets["department"] == "Chicago Police Department"]
budgets_cpd = budgets_cpd.groupby(["year"], as_index = False)["ordinance"].sum()

In [434]:
def crime_cpd_budget_bubble(df):
    arrests = df[df["arrest"] == True]
    arrests = arrests.groupby(["year"], as_index = False)["num_crimes"].sum()
    arrests = arrests.rename(columns = {"num_crimes": "num_arrests"})
    df = df.groupby(["year"], as_index = False)["num_crimes"].sum()

    df = df.join(arrests.set_index("year"), on = "year")
    df = df.join(budgets_cpd.set_index("year"), on = "year")

    chart = alt.Chart(df, title = "Chicago Crime Statistics and CPD Budget 2019-2023").mark_circle(fillOpacity = 0.8).encode(
        x = alt.X("num_arrests").title("# Crimes Reported Ending in Arrest"),
        y = alt.Y("num_crimes").title("# Crimes Reported").scale(zero=False, padding=1),
        color = alt.Color("year").title("Year").scale(scheme = "dark2"),
        size = alt.Size("ordinance").title("Budget ($)").scale(domain = [1600000000, 1700000000])
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        symbolSize = 200,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    )
    return chart

crime_cpd_budget_bubble(crimes)

Here I wanted to be able to show some crime statistics alongside the police budget to determine if there were any patterns.  This bubbleplot shows the number of crimes reported and the number of crimes that ended in arrest, while the size of the bubble indicates the police budget for that year.  It's interesting to note that the years with the highest budget and lowest budget both had around the same number of crimes.  

In [435]:
police_sentiment = pd.read_csv("../data/police_sentiment_scores.csv")
police_sentiment["by_year_start_date"] = police_sentiment["by_year_start_date"].str.extract(r"(\d{4})")
police_sentiment = police_sentiment.rename(columns = {"by_year_start_date": "year",
                                                      "TRUST": "Trust",
                                                      "SAFETY": "Safety"})

In [436]:
def scatter_matrix_cpd(df):
    crimes_df = crimes.groupby(["year"], as_index = False)["num_crimes"].sum()
    safety = df.groupby(["year"], as_index = False)["Safety"].mean()
    df = df.groupby(["year"], as_index = False)["Trust"].mean()
    df = df.join(safety.set_index("year"), on = "year")
    df = df.join(budgets_cpd.set_index("year"), on = "year")
    df = df.join(crimes_df.set_index("year"), on = "year")

    chart = alt.Chart(df, title = "CPD Sentiment").mark_circle(size = 100, fillOpacity = 0.8).encode(
        x = alt.X(alt.repeat("column"), type='quantitative').scale(zero=False, padding=1),
        y = alt.Y(alt.repeat("row"), type='quantitative').scale(zero=False, padding=1),
        color = alt.Color("year").title("Year").scale(scheme = "dark2")
    ).properties(width = 200, height = 100
    ).repeat(
        row=["Safety", "Trust", "ordinance"],
        column=["ordinance", "Trust", "Safety"]
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    ).resolve_axis()

    return chart

scatter_matrix_cpd(police_sentiment)

This scatterplot matrix plots the CPD's budget, and two metrics of resident police sentiment over 5 years.  The police sentiment survey asked residents questions about how safe they felt in their neighborhoods and how much they believed the police listened to and respected residents.  These safety and trust scores were out of 10, a 55 would indicate a 5.5/10 score.  I wanted to know if police budget changes and police sentiment changed in tandem or in any unexpected ways.  

In [437]:
library_visitors = pd.read_csv("../data/library_visitors.csv")
library_visitors["year"] = library_visitors["year"].astype(str)

In [438]:
def heatmap_library_visitors(df):
    df = df[df["month"] != "YTD"]
    df = df.groupby(["year", "month"], as_index = False)["num_visitors"].sum()

    chart = alt.Chart(df, title = "Chicago Public Library Visits 2019-2023").mark_rect().encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("month", sort = ['JANUARY', 'FEBRUARY', 'MARCH', 'APRIL', 'MAY', 
                                   'JUNE', 'JULY', 'AUGUST', 'SEPTEMBER', 'OCTOBER', 
                                   'NOVEMBER', 'DECEMBER']).title("Month"),
        color = alt.Color("num_visitors").title("# Visitors").scale(scheme = "viridis")
    ).properties(width = 500, height = 400
    ).configure_legend(labelLimit= 0,
                        padding = 10,
                        cornerRadius = 5,
                        labelFontSize = 12,
                        titleFontSize = 14,
                        fillColor = "#FFF5DB"
    )

    return chart

heatmap_library_visitors(library_visitors)

This heatmap shows the number of visitors to Chicago Public Libraries by month from 2019-2023.  I wanted to get an overview of library usage and how it has changed overtime before looking into the relationship with budget.  We can clearly see that visitation was much higher before the pandemic, and it has been slow to recover since.  

In [439]:
budgets_cpl =  budgets[budgets["department"] == "Chicago Public Library"]
budgets_cpl = budgets_cpl.groupby(["year"], as_index = False)["ordinance"].sum()


In [440]:
def concat_chart_library_budget(df):
    df = df.groupby(["year"], as_index = False)["num_visitors"].sum()
    df = df.join(budgets_cpl.set_index("year"), on = "year")

    upper = alt.Chart(df, title = "Chicago Public Library Visitors 2019-2023").mark_bar(color = "#7570B3").encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("num_visitors").title("# Visitors"),
    ).properties(width = 600, height = 400
    )

    lower = alt.Chart(df, title = "Chicago Public Library Budget").mark_line(
        color = "#7570B3").encode(
        x = alt.X("year").title("Year").axis(labelAngle = 0),
        y = alt.Y("ordinance").title("Amount ($)"),
    ).properties(width = 600, height = 100
    )

    chart = alt.vconcat(upper, lower)

    return chart

concat_chart_library_budget(library_visitors)

Similarly to the CDOT chart, I wanted to get an overview of how CPL's budget might correlate with a metric of library usage.  We can see that the # of visitors dropped sharply after the pandemic and has slowly been growing since.  The library's budget did not decrease following the pandemic, it increased from 2019-2022 but interestingly experienced a decrease in the 2023 budget.  