In [1]:
import pandas as pd
from datawrapper import Datawrapper

In [2]:
df = pd.read_csv(
    "./arrests.csv",
    parse_dates=["ArrestDateTime"]
)

In [3]:
df.head()

Unnamed: 0,RowID,X,Y,IncidentNumber,ArrestNumber,Age,Gender,Race,ArrestDateTime,ArrestLocation,...,ChargeDescription,District,Post,Neighborhood,Latitude,Longitude,GeoLocation,Shape,Year,year
0,29,1401347.0,608148.870493,22L09338,23000037.0,39.0,M,B,2022-12-31 23:50:00,4000 OAKFORD ST,...,HAND GUN VIOLATION,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",,2022,2022
1,30,1401347.0,608148.870493,22L09338,23000039.0,50.0,M,B,2022-12-31 23:50:00,4000 OAKFORD ST,...,HAND GUN VIOLATION,Northwest,621.0,West Arlington,39.3361,-76.6853,"(39.3361,-76.6853)",,2022,2022
2,31,,,,23000010.0,27.0,M,B,2022-12-31 23:40:00,,...,FAILURE TO APPEAR,,,,,,"(,)",,2022,2022
3,32,1417636.0,595206.835862,22L09343,23000050.0,42.0,M,B,2022-12-31 23:15:00,500 DOLPHIN ST,...,HAND GUN VIOLATION,Central,123.0,Upton,39.3004,-76.6279,"(39.3004,-76.6279)",,2022,2022
4,33,,,22L09312,22157183.0,43.0,F,W,2022-12-31 21:00:00,1500 BECKLOW AVE,...,STOLEN AUTO,,,,,,"(,)",,2022,2022


In [4]:
df['year'] = df.ArrestDateTime.dt.year

In [5]:
total_by_year = df.year.value_counts().sort_index().reset_index()

In [6]:
total_by_year

Unnamed: 0,year,count
0,2010,45224
1,2011,43364
2,2012,42333
3,2013,39542
4,2014,37078
5,2015,25732
6,2016,23089
7,2017,21989
8,2018,20543
9,2019,19407


In [7]:
dw = Datawrapper()

In [8]:
chart_config = dw.create_chart(
    title="Baltimore Arrests",
    chart_type="column-chart",
    data=total_by_year
)

In [9]:
chart_id = chart_config["id"]

In [10]:
%%capture
dw.publish_chart(chart_id)

In [11]:
dw.display_chart(chart_id)

In [12]:
%%capture
dw.update_description(
    chart_id,
    source_name="OpenBaltimore",
    source_url="https://data.baltimorecity.gov/datasets/baltimore::bpd-arrests/about",
    byline="Ben Welsh",
)

In [13]:
%%capture
dw.publish_chart(chart_id)

In [14]:
dw.display_chart(chart_id)

In [15]:
metadata = {
    "visualize": {
        "base-color": "#bf7836"  # IRE's accent color
    }
}

In [16]:
%%capture
dw.update_chart(
    chart_id,
    metadata=metadata
)

In [17]:
%%capture
dw.publish_chart(chart_id)

In [18]:
dw.display_chart(chart_id)

In [19]:
df.District.value_counts()

District
Western      29842
Central      28501
Eastern      28114
Southern     25780
Northeast    24180
Southeast    23710
Northwest    22044
Southwest    21822
Northern     13087
Name: count, dtype: int64

In [20]:
def create_chart(district: str):
    district_df = df[df.District == district]
    district_by_year = district_df.year.value_counts().sort_index().reset_index()
    chart_config = dw.create_chart(
        title=f"Arrests in Baltimore's {district} District",
        chart_type="column-chart",
        data=district_by_year,
        metadata={
            "visualize": {
                "base-color": "#113421"  # IRE's accent color
            }
        }
    )
    chart_id = chart_config["id"]
    dw.update_description(
        chart_id,
        source_name="OpenBaltimore",
        source_url="https://data.baltimorecity.gov/datasets/baltimore::bpd-arrests/about",
        byline="Ben Welsh",
    )

    dw.publish_chart(chart_id)
    return dw.display_chart(chart_id)

In [21]:
create_chart("Western")

In [22]:
chart_list = []
for district in df.District.dropna().unique():
    print(f"Creating chart for the {district} District")
    c = create_chart(district)
    chart_list.append(c)

Creating chart for the Northwest District
Creating chart for the Central District
Creating chart for the Western District
Creating chart for the Eastern District
Creating chart for the Northeast District
Creating chart for the Southern District
Creating chart for the Southeast District
Creating chart for the Northern District
Creating chart for the Southwest District


In [23]:
from IPython.display import display

In [24]:
display(*chart_list)

In [25]:
from datetime import timedelta

In [26]:
df.ArrestDateTime.max()

Timestamp('2023-12-30 22:54:00')

In [27]:
max_date = df.ArrestDateTime.max()

In [28]:
seven_days_ago = max_date - timedelta(days=7)

In [29]:
seven_days_ago

Timestamp('2023-12-23 22:54:00')

In [30]:
last_week_df = df[df.ArrestDateTime >= seven_days_ago]

In [31]:
last_week_df.ChargeDescription.value_counts()

ChargeDescription
FAILURE TO APPEAR        34
ASSAULT-SECOND DEGREE    30
CONTROLLED SUBSTANCE     20
HAND GUN VIOLATION       14
ASSAULT-FIRST DEGREE      8
AGGRAVATED ASSAULT        6
COMMON ASSAULT            6
ATT 1ST DEG. MURDER       6
STOLEN AUTO               5
BURGLARY                  4
Name: count, dtype: int64

In [32]:
top_charges_df = last_week_df.ChargeDescription.value_counts().reset_index().head(10)

In [49]:
chart_config = dw.create_chart(
    title=f"Top 10 arrest charges in Baltimore last week",
    chart_type="d3-bars",
    data=top_charges_df,
    metadata={
        "visualize": {
            "base-color": "#113421",
            "thick": True,
        },
        "describe": {
            "source-name": "OpenBaltimore",
            "source-url": "https://data.baltimorecity.gov/datasets/baltimore::bpd-arrests/about",
            "byline": "Ben Welsh"
        }
    }
)

In [50]:
chart_id = chart_config["id"]

In [51]:
%%capture
dw.publish_chart(chart_id)

In [52]:
dw.display_chart(chart_id)