In [1]:
# 1. Import libraries and load cleaned data
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define processed data directory
PROC_DIR = Path('../data/processed')

# Load cleaned datasets
# Ensure 'date' columns are parsed as datetime

df_crime = pd.read_csv(PROC_DIR / 'crime_clean.csv', parse_dates=['date'])
df_weather = pd.read_csv(PROC_DIR / 'weather_clean.csv', parse_dates=['date'])

In [2]:
## 2. Prepare time dimension columns

# %%
# Extract year, month, weekday names, and define seasons
df_crime['year'] = df_crime['date'].dt.year
df_crime['month'] = df_crime['date'].dt.month
df_crime['weekday'] = df_crime['date'].dt.day_name()

# Map month to season

def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# Apply season mapping

df_crime['season'] = df_crime['month'].apply(get_season)


AttributeError: Can only use .dt accessor with datetimelike values

In [None]:
# 1. Import libraries and load cleaned data
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define processed data directory
PROC_DIR = Path('../data/processed')

# Load cleaned datasets
# Ensure 'date' columns are parsed as datetime

df_crime = pd.read_csv(PROC_DIR / 'crime_clean.csv', parse_dates=['date'])
df_weather = pd.read_csv(PROC_DIR / 'weather_clean.csv', parse_dates=['date'])

In [None]:
# 1. Import libraries and load cleaned data
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define processed data directory
PROC_DIR = Path('../data/processed')

# Load cleaned datasets
# Ensure 'date' columns are parsed as datetime

df_crime = pd.read_csv(PROC_DIR / 'crime_clean.csv', parse_dates=['date'])
df_weather = pd.read_csv(PROC_DIR / 'weather_clean.csv', parse_dates=['date'])

In [None]:
# 1. Import libraries and load cleaned data
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define processed data directory
PROC_DIR = Path('../data/processed')

# Load cleaned datasets
# Ensure 'date' columns are parsed as datetime

df_crime = pd.read_csv(PROC_DIR / 'crime_clean.csv', parse_dates=['date'])
df_weather = pd.read_csv(PROC_DIR / 'weather_clean.csv', parse_dates=['date'])

In [None]:
# 1. Import libraries and load cleaned data
import pandas as pd
import plotly.express as px
from pathlib import Path

# Define processed data directory
PROC_DIR = Path('../data/processed')

# Load cleaned datasets
# Ensure 'date' columns are parsed as datetime

df_crime = pd.read_csv(PROC_DIR / 'crime_clean.csv', parse_dates=['date'])
df_weather = pd.read_csv(PROC_DIR / 'weather_clean.csv', parse_dates=['date'])

In [None]:
## 3. Daily Crime Count Time Series

# %%
# Aggregate daily crime counts
daily_counts = (
    df_crime.groupby('date')
           .size()
           .reset_index(name='crime_count')
)

# Plot interactive line chart of daily crime counts
fig = px.line(
    daily_counts,
    x='date',
    y='crime_count',
    title='Daily Crime Count Over Time',
    labels={'crime_count': 'Number of Crimes', 'date': 'Date'}
)
fig.show()

In [None]:
## 4. Monthly Crime Trend

# %%
# Aggregate by year and month
df_monthly = (
    df_crime.groupby(['year', 'month'])
           .size()
           .reset_index(name='crime_count')
)
# Create a datetime for the first day of each month for plotting
df_monthly['month_start'] = pd.to_datetime(
    df_monthly[['year', 'month']].assign(day=1)
)

# Plot interactive bar chart for monthly counts
fig = px.bar(
    df_monthly,
    x='month_start',
    y='crime_count',
    title='Monthly Crime Count',
    labels={'crime_count': 'Number of Crimes', 'month_start': 'Month'}
)
fig.show()

In [None]:
## 5. Crime Count by Weekday

# %%
# Order weekdays
weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Count crimes by weekday
df_weekday = (
    df_crime['weekday']
            .value_counts()
            .reindex(weekday_order)
            .reset_index()
)
df_weekday.columns = ['weekday', 'crime_count']

# Plot bar chart for weekdays
fig = px.bar(
    df_weekday,
    x='weekday',
    y='crime_count',
    title='Crime Count by Weekday',
    labels={'crime_count': 'Number of Crimes'}
)
fig.show()

In [None]:
## 6. Crime Distribution by Season

# %%
# Count crimes per season
df_season = (
    df_crime['season']
            .value_counts()
            .reset_index()
)
df_season.columns = ['season', 'crime_count']

# Plot pie chart for seasonal distribution
fig = px.pie(
    df_season,
    names='season',
    values='crime_count',
    title='Crime Distribution by Season'
)
fig.show()


In [None]:
## 7. Compare Daily Crime Counts with Average Temperature

# %%
# Calculate daily average temperature
df_temp_daily = (
    df_weather.groupby('date')
              .agg(temp_avg=('temp_max', 'mean'))
              .reset_index()
)

# Merge crime counts with temperature data

df_combined = pd.merge(
    daily_counts,
    df_temp_daily,
    on='date',
    how='inner'
)

# Plot line chart with two metrics
y = ['crime_count', 'temp_avg']
fig = px.line(
    df_combined,
    x='date',
    y=y,
    labels={'value': 'Value', 'variable': 'Metric', 'date': 'Date'},
    title='Daily Crime Count vs. Average Temperature'
)
fig.show()
