# Data Visualization 4: Hourly Crime Distribution

### Type: Line Chart

### Tool: Altair

### Interactivity: Slider to choose day or weekday

### Customization: Color per crime type

### Description: Shows top 10 crime types for a given district or year.

In [1]:
import pandas as pd
import altair as alt

# Load datasets
paths = {
    "2019": "2019.csv",
    "2020": "2020.csv",
    "2021": "2021.csv",
    "2022": "crime_reports_2022.csv",
    "2023": "2023_to_present.csv"
}

# Load and extract relevant columns
dfs = [pd.read_csv(path, low_memory=False)[['YEAR', 'HOUR', 'DAY_OF_WEEK']] for path in paths.values()]
hourly_df = pd.concat(dfs, ignore_index=True)

# Drop missing values
hourly_df.dropna(subset=['HOUR', 'DAY_OF_WEEK'], inplace=True)
hourly_df['HOUR'] = pd.to_numeric(hourly_df['HOUR'], errors='coerce')
hourly_df['YEAR'] = pd.to_numeric(hourly_df['YEAR'], errors='coerce')

# Group by day of week and hour
hourly_summary = (
    hourly_df.groupby(['DAY_OF_WEEK', 'HOUR'])
    .size()
    .reset_index(name='Crime_Count')
)

# Order weekdays properly
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
hourly_summary['DAY_OF_WEEK'] = pd.Categorical(hourly_summary['DAY_OF_WEEK'], categories=day_order, ordered=True)
hourly_summary = hourly_summary.sort_values(['DAY_OF_WEEK', 'HOUR'])

# Altair line plot
chart = alt.Chart(hourly_summary).mark_line().encode(
    x=alt.X('HOUR:O', title='Hour of Day'),
    y=alt.Y('Crime_Count:Q', title='Number of Crimes'),
    color=alt.Color('DAY_OF_WEEK:N', title='Day of Week'),
    tooltip=['DAY_OF_WEEK', 'HOUR', 'Crime_Count']
).properties(
    title='Hourly Crime Distribution by Day of Week',
    width=700,
    height=400
).interactive()

chart.display()