In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

In [None]:
crime_data = pd.read_csv("./datasets/crawled_data.csv")
crime_data.head()

In [None]:
crime_data.info() #Get all the type of each column

In [None]:
crime_data.describe()

In [None]:
# Create a Year column to get more generality of the dataset
crime_data["Date"] = pd.to_datetime(crime_data["Date"], format='%m/%d/%Y %I:%M:%S %p')
crime_data["Year"] = crime_data["Date"].dt.year
crime_data["Month"] = crime_data["Date"].dt.month

# crime_data = crime_data.drop(columns=["Location"])

typeToDrop = {
    'NON-CRIMINAL',
    'OTHER OFFENSE',
    'OTHER NARCOTIC VIOLATION',
    'OBSCENITY',
    'PUBLIC INDECENCY',
    'CONCEALED CARRY LICENSE VIOLATION',
    'LIQUOR LAW VIOLATION',
    'GAMBLING',
    'RITUALISM',
}
crime_data = crime_data[~crime_data['Primary Type'].isin(typeToDrop)]
crime_data.columns



In [None]:
# --- OVERVIEW OF THE CRIME ---

plt.figure(figsize=(30, 10))

crime_types = crime_data.groupby('Primary Type', as_index=False).size()
assault_types = crime_data[crime_data['Primary Type'] == 'ASSAULT'].groupby('Description', as_index=False).size()
auto_theft_types = crime_data[crime_data['Primary Type'] == 'MOTOR VEHICLE THEFT'].groupby('Description', as_index=False).size()

# plt.subplot(221)
sns.barplot(x='Primary Type', y='size', data=crime_types.sort_values(by='size', ascending=False))
plt.title('Crime Types Overview', fontsize='xx-large')
plt.xlabel('Primary Type', fontsize='x-large')
plt.ylabel('Crime Count', fontsize='x-large')
plt.xticks(rotation=90)

# plt.tight_layout()
plt.show()


In [None]:
# --- OVERVIEW OF MOTOR VEHICLE THEFT ---

# plt.subplot(223)
sns.barplot(x='Description', y='size', data=auto_theft_types.sort_values(by='size', ascending=False))
plt.title('Description Distribution in MOTOR VEHICLE THEFT', fontsize='xx-large')
plt.xlabel('Description', fontsize='x-large')
plt.ylabel('Count', fontsize='x-large')
plt.xticks(rotation=90)
# plt.tight_layout()

In [None]:
#  --- OVERVIEW OF ASSAULT ---

sns.barplot(x='Description', y='size', data=assault_types.sort_values(by='size', ascending=False))
plt.title('Description Distribution in ASSAULT', fontsize='xx-large')
plt.xlabel('Description', fontsize='x-large')
plt.ylabel('Count', fontsize='x-large')
plt.xticks(rotation=90)

In [None]:
# --- PLOT BY YEAR ---

yearly_crimes = crime_data.groupby('Year').size().reset_index(name='Total Crimes')
plt.figure(figsize=(15, 6))
sns.barplot(x='Year', y='Total Crimes', data=yearly_crimes, palette='Blues_d')

plt.title('Total Crimes per Year in Chicago', fontsize=20)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Number of Crimes', fontsize=14)
plt.xticks(rotation=45)
plt.show()

In [None]:
# --- PLOTTING MONTHLY CRIME IN FROM 2020 TO 2024 ---

# Map all the month in word format
month_dict = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December"
}

# Data filter from crime_data
crime_distribution = crime_data.groupby(by=["Year", "Month"], dropna=False, as_index=False).size()
crime_distribution = crime_distribution[crime_distribution["Year"] < 2025]
crime_distribution.rename(columns={'size': "Crime Count"}, inplace=True)
crime_distribution['Year'] = crime_distribution["Year"].astype(str)
crime_distribution["Month"] = crime_distribution["Month"].map(month_dict)
crime_distribution["MonthAndYear"] = crime_distribution["Month"] + ", " + crime_distribution["Year"]


x = np.arange(len(crime_distribution))  # Numeric index for months (0 to length-1)
coefficients = np.polyfit(x, crime_distribution["Crime Count"], deg=1)  # Linear fit (slope and intercept)
trend_line = np.polyval(coefficients, x)  # Evaluate the linear fit over x


# Plot the time series
plt.figure(figsize=(20,10))
plt.grid('on')
plt.plot(crime_distribution["MonthAndYear"], crime_distribution["Crime Count"], color="red")
plt.plot(crime_distribution["MonthAndYear"], trend_line, color="blue", label="Linear Trend", linewidth=2)
plt.xlabel("Month Stream")
plt.ylabel("Total crime count per month")
plt.xticks(np.arange(len(crime_distribution["Month"])), rotation=90)
plt.title("Monthly Crime Trends from 2020 to 2024")
plt.show()


In [None]:
# --- PLOTTING THE DATA IN A HEATMAP ---
table = crime_distribution.pivot_table(index="Year", columns="Month", values="Crime Count")
plt.figure(figsize=(14, 6))
sns.heatmap(table, annot=True, fmt=".0f", cmap="YlOrRd", linewidths=0.5)
plt.title("Monthly Crime Trends from 2020 to 2024")
plt.xlabel("Month")
plt.ylabel("Year")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()