# Crime Data Analysis

**Description**
This dataset _"Crimes_-_2001_to_Present"_ consists of over 7 million crime records with 22 columns including case details, crime type, location, and arrest status.

# Loading Data

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("utkarshx27/crimes-2001-to-present")

print("Path to dataset files:", path)

In [None]:
dfo = pd.read_csv(r'C:\Users\John Doe\.cache\kagglehub\datasets\utkarshx27\crimes-2001-to-present\versions\1\Crimes_-_2001_to_Present.csv')

dfo.head(5)

In [None]:
df = dfo.copy()

# Cleaning Data

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.reset_index(drop = True, inplace = True)

In [None]:
df.head(5)

In [None]:
df.isnull().sum()

In [None]:
df = df['Year']

In [None]:
df.dropna(subset = ['Case Number', 'District'], inplace = True)

In [None]:
df['Location Description'] = df['Location Description'].fillna("Unknown")

In [None]:
df.drop(columns=['Ward', 'Community Area', 'X Coordinate', 'Y Coordinate', 'Longitude', 'Latitude'], inplace = True)

In [None]:
df.head(5)

In [None]:
df = df.rename(columns= lambda x:x.strip().lower().replace(" ","_"))

In [None]:
df.isnull().sum()

# Analysing Data

In [None]:
df.describe(include = "all")

In [None]:
crime_counts = df['primary_type'].value_counts()
print(crime_counts)

In [None]:
crime_in_10_years = crime_counts[crime_counts > 2013]
crime_in_10_years

In [43]:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['year'] = df['date'].dt.year

KeyboardInterrupt: 

In [None]:
arrests = df['Arrest'].value_counts()

In [None]:
arrests_in_10years = arrests[arrests > 2013]
arrests_in_10years

In [None]:
crimes_per_year = df.groupby('Year').size()
print(crimes_per_year)

In [None]:
crimes_by_location = df['Location Description'].value_counts()
print(crimes_by_location)

# Visualization

In [None]:
# this is for top 10 crimes over the years
plt.figure(figsize=(12, 8))
crime_counts.head(10).plot(kind='bar', color='red')
plt.title("Top 10 Crimes")
plt.ylabel("Count")
plt.xlabel("Crime Type")
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
crimes_per_year.plot(kind = 'line', marker = 'o')
plt.title("Crimes per Year")
plt.ylabel("Number of Crimes")
plt.xlabel("Year")
plt.show()

In [None]:
plt.figure(figsize = (6, 6))
df['Arrest'].value_counts().plot(kind = 'pie', autopct = '%1.1f%%', startangle = 90)
plt.title("Arrest vs Non-Arrest")
plt.show()

In [None]:
crime_heatmap = df.groupby(['Year', 'Primary Type']).size().unstack(fill_value = 0)
plt.figure(figsize = (12, 8))
sns.heatmap(crime_heatmap, cbar = False)
plt.title("Crimes by Year and Type")
plt.show()

In [44]:
df.to_csv("cleaned_crime_data.csv", index = False)

In [None]:
df.to_csv('')