# Data Visualization 1: Crime Trends Over Time

### Type: Line Chart

### Tool: Altair

### Interactivity: Dropdown filter for year/month

### Customization: Color-coded lines by year

### Description: Shows how crime fluctuates monthly/yearly; great to highlight seasonal patterns or yearly drops.

In [2]:
import pandas as pd
import altair as alt

# Load all datasets
paths = {
    "2019": "2019.csv",
    "2020": "2020.csv",
    "2021": "2021.csv",
    "2022": "crime_reports_2022.csv",
    "2023": "2023_to_present.csv"
}

# Read and extract necessary columns
dfs = [pd.read_csv(path, low_memory=False)[['OCCURRED_ON_DATE', 'YEAR', 'MONTH']] for path in paths.values()]

# Combine all datasets
crime_all_years = pd.concat(dfs, ignore_index=True)

# Parse and clean data
crime_all_years['OCCURRED_ON_DATE'] = pd.to_datetime(crime_all_years['OCCURRED_ON_DATE'], errors='coerce')
crime_all_years['YEAR'] = pd.to_numeric(crime_all_years['YEAR'], errors='coerce')
crime_all_years['MONTH'] = pd.to_numeric(crime_all_years['MONTH'], errors='coerce')

# Group by year and month to count incidents
crime_monthly = crime_all_years.groupby(['YEAR', 'MONTH']).size().reset_index(name='Crime_Count')
crime_monthly = crime_monthly.sort_values(['YEAR', 'MONTH'])

# Altair line chart
chart = alt.Chart(crime_monthly).mark_line(point=True).encode(
    x=alt.X('MONTH:O', title='Month'),
    y=alt.Y('Crime_Count:Q', title='Crime Count'),
    color=alt.Color('YEAR:N', title='Year'),
    tooltip=['YEAR', 'MONTH', 'Crime_Count']
).properties(
    title='Monthly Crime Trends in Boston (2019–2023)',
    width=700,
    height=400
).interactive()

# Show the chart
chart.display()

# Save the chart as an HTML file
chart.save('monthly_crime_trends.html')
print("Chart saved as 'monthly_crime_trends.html'")

Chart saved as 'monthly_crime_trends.html'
