In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

In [None]:
crime_data = pd.read_csv("./datasets/crawled_data.csv")
crime_data.head()

In [None]:
crime_data.info() #Get all the type of each column

In [None]:
crime_data.describe()

In [None]:
# Create a Year column to get more generality of the dataset
crime_data["Date"] = pd.to_datetime(crime_data["Date"], format='%m/%d/%Y %I:%M:%S %p')
crime_data["Year"] = crime_data["Date"].dt.year
crime_data["Month"] = crime_data["Date"].dt.month

# crime_data = crime_data.drop(columns=["Location"])

typeToDrop = {
    'NON-CRIMINAL',
    'OTHER OFFENSE',
    'OTHER NARCOTIC VIOLATION',
    'OBSCENITY',
    'PUBLIC INDECENCY',
    'CONCEALED CARRY LICENSE VIOLATION',
    'LIQUOR LAW VIOLATION',
    'GAMBLING',
    'RITUALISM',
}
crime_data = crime_data[~crime_data['Primary Type'].isin(typeToDrop)]
crime_data.columns



In [None]:
# --- OVERVIEW OF THE CRIME ---

plt.figure(figsize=(30, 10))

crime_types = crime_data.groupby('Primary Type', as_index=False).size()
assault_types = crime_data[crime_data['Primary Type'] == 'ASSAULT'].groupby('Description', as_index=False).size()
auto_theft_types = crime_data[crime_data['Primary Type'] == 'MOTOR VEHICLE THEFT'].groupby('Description', as_index=False).size()

# plt.subplot(221)
sns.barplot(x='Primary Type', y='size', data=crime_types.sort_values(by='size', ascending=False))
plt.title('Crime Types Overview', fontsize='xx-large')
plt.xlabel('Primary Type', fontsize='x-large')
plt.ylabel('Crime Count', fontsize='x-large')
plt.xticks(rotation=90)

# plt.tight_layout()
plt.show()


In [None]:
# --- OVERVIEW OF MOTOR VEHICLE THEFT ---

# plt.subplot(223)
sns.barplot(x='Description', y='size', data=auto_theft_types.sort_values(by='size', ascending=False))
plt.title('Description Distribution in MOTOR VEHICLE THEFT', fontsize='xx-large')
plt.xlabel('Description', fontsize='x-large')
plt.ylabel('Count', fontsize='x-large')
plt.xticks(rotation=90)
# plt.tight_layout()

In [None]:
#  --- OVERVIEW OF ASSAULT ---

sns.barplot(x='Description', y='size', data=assault_types.sort_values(by='size', ascending=False))
plt.title('Description Distribution in ASSAULT', fontsize='xx-large')
plt.xlabel('Description', fontsize='x-large')
plt.ylabel('Count', fontsize='x-large')
plt.xticks(rotation=90)

In [None]:
# --- PLOT BY YEAR ---

yearly_crimes = crime_data.groupby('Year').size().reset_index(name='Total Crimes')
plt.figure(figsize=(15, 6))
sns.barplot(x='Year', y='Total Crimes', data=yearly_crimes, palette='Blues_d')

plt.title('Total Crimes per Year in Chicago', fontsize=20)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Number of Crimes', fontsize=14)
plt.xticks(rotation=45)
plt.show()