# Data Visualization 3: Crime Type Breakdown

### Type: Bar Chart

### Tool: Altair

### Interactivity: Dropdown to select year or district

### Customization: Color per crime type

### Description: Shows top 10 crime types for a given district or year.

In [1]:
import pandas as pd
import altair as alt

# Load datasets
paths = {
    "2019": "2019.csv",
    "2020": "2020.csv",
    "2021": "2021.csv",
    "2022": "crime_reports_2022.csv",
    "2023": "2023_to_present.csv"
}

# Read necessary columns
dfs = [pd.read_csv(path, low_memory=False)[['OFFENSE_DESCRIPTION', 'YEAR', 'DISTRICT']] for path in paths.values()]

# Merge datasets
crime_types_df = pd.concat(dfs, ignore_index=True)

# Drop rows missing important values
crime_types_df.dropna(subset=['OFFENSE_DESCRIPTION', 'YEAR', 'DISTRICT'], inplace=True)

# Group by year and offense type
offense_summary = (
    crime_types_df.groupby(['YEAR', 'OFFENSE_DESCRIPTION'])
    .size()
    .reset_index(name='Count')
)

# Keep top 10 crime types per year
top_offenses = (
    offense_summary.groupby('YEAR')
    .apply(lambda df: df.nlargest(10, 'Count'))
    .reset_index(drop=True)
)

# Build interactive bar chart
chart = alt.Chart(top_offenses).mark_bar().encode(
    x=alt.X('Count:Q', title='Number of Crimes'),
    y=alt.Y('OFFENSE_DESCRIPTION:N', sort='-x', title='Crime Type'),
    color=alt.Color('YEAR:N', title='Year'),
    tooltip=['OFFENSE_DESCRIPTION', 'YEAR', 'Count']
).properties(
    width=700,
    height=400,
    title='Top 10 Crime Types per Year in Boston'
).interactive()

chart.display()

  .apply(lambda df: df.nlargest(10, 'Count'))


In [2]:
# Save the chart as an HTML file
chart.save('yearly_crime_types.html')