# Chicago Crime Analysis 

### Import  packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.options.mode.chained_assignment = None 

KeyboardInterrupt: 

### Reading Crimes - 2001 to Present

In [None]:
df = pd.read_csv("Crimes_-_2001_to_Present.csv")

In [None]:
df.columns

In [None]:
df.head()

### Filtering attributes which can be used for classification

In [None]:
df = df[['Date', 'Primary Type', 'Location Description', 'Arrest', 'Domestic', 'Community Area', 'Year']]

In [None]:
df

### Removing entries with missing values

In [None]:
print(" \nTotal numner of missing values of each column: \n\n", df.isnull().sum())

In [None]:
df = df.dropna()

In [None]:
print(" \nTotal numner of missing values of each column: \n\n", df.isnull().sum())

In [None]:
df

### Reading Area names from CommAreas.csv

In [None]:
areas = pd.read_csv('CommAreas.csv')[['AREA_NUMBE', 'COMMUNITY']]

In [None]:
with pd.option_context('display.max_rows', None,):
   print(areas)

### Merging both data

In [None]:
chi_crime = df.merge(areas, how='inner', left_on='Community Area', right_on='AREA_NUMBE').drop(['Community Area'], axis=1)

In [None]:
chi_crime[['Date', 'Primary Type', 'Location Description', 'Arrest', 'Domestic', 'Year', 'Area Number', 'Community Area']]=chi_crime[['Date', 'Primary Type', 'Location Description', 'Arrest', 'Domestic', 'Year', 'AREA_NUMBE', 'COMMUNITY']]
chi_crime = chi_crime.drop(['AREA_NUMBE', 'COMMUNITY'], axis=1)
chi_crime.head()

### Converting Date to DataTime for further analysis

In [None]:
chi_crime['DateTime'] = pd.to_datetime(chi_crime['Date'], format='%m/%d/%Y %I:%M:%S %p')
chi_crime['DateTime']

### Extracting Hour and Day from DateTime

In [None]:
chi_crime['Hour'] = chi_crime['DateTime'].dt.hour

In [None]:
chi_crime['Day'] = chi_crime['DateTime'].dt.strftime('%d')

In [None]:
chi_crime['Month'] = chi_crime['DateTime'].dt.strftime('%m')

In [None]:
chi_crime = chi_crime.drop(['DateTime'], axis=1)
chi_crime

In [None]:
Num_crimes_type = chi_crime['Primary Type'].value_counts()
type = pd.DataFrame(data=Num_crimes_type.index, columns=["Primary Type"])
type['values'] = Num_crimes_type.values

In [None]:
print(type['Primary Type'])

In [None]:
chi_crime['Type Code'], _ = pd.factorize(chi_crime['Primary Type'])
chi_crime['Location Code'], _ = pd.factorize(chi_crime['Location Description'])
chi_crime.head()

### Pie Plot showing Top 10 crimes in Chicago

In [None]:
fig = px.pie(type[:10], values='values', names='Primary Type', title='Top 10 Crime Types', color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()

In [None]:
Number_crimes = chi_crime['Community Area'].value_counts()
values = Number_crimes.values
categories = pd.DataFrame(data=Number_crimes.index, columns=["Community Area"])
categories['values'] = values

### Treemap showing Crime density by Area

In [None]:
fig = px.treemap(categories, path=['Community Area'], values=categories['values'], height=900,
                 title='Crime in Chicago by Community', color_discrete_sequence = px.colors.sequential.RdBu)
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Histogram showing Chicago Crime by Year

In [None]:
fig = px.histogram(chi_crime, x='Year',color='Year')
fig.update_layout(
        title_text='Chicago Crime by Year',
        xaxis_title_text='Crime', 
        yaxis_title_text='Count', 
        bargap=0.2, 
        bargroupgap=0.1
    )
fig.show()

In [None]:
num_arrest = chi_crime['Arrest'].value_counts()
print(num_arrest.index)
arrest = pd.DataFrame(data=num_arrest.index, columns=["Arrested"])
arrest['Arrested']=num_arrest.index
arrest['values'] = num_arrest.values
arrest = arrest.replace({'Arrested': {True: 'Yes', False: 'No'}})
print(arrest)

In [None]:
num_domestic = chi_crime['Domestic'].value_counts()
print(num_domestic.index)
domestic = pd.DataFrame(data=num_domestic.index, columns=["Domestic"])

domestic['values'] = num_domestic.values
domestic = domestic.replace({'Domestic': {True: 'Yes', False: 'No'}})
print(domestic)

In [None]:
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}]])

### Pie Plot showing some more information about each crime

In [None]:
fig.add_trace(go.Pie(labels=arrest['Arrested'], values=arrest['values'], hole=.5, name="Arrested", title='Arrested'), row=1, col=1)
fig.add_trace(go.Pie(labels=domestic['Domestic'], values=domestic['values'], hole=.5, name='Domestic', title='Domestic Related'), row=1, col=2)
fig.update_layout(
    title_text='Chicago Crime over the years',
)
fig.show()

### Creating Correlation Matrix

In [None]:
chi_crime_correlation = chi_crime[['Type Code', 'Location Code']]
chi_crime_correlation['Arrest'], _ = pd.factorize(chi_crime['Arrest'])
chi_crime_correlation['Domestic'], _ = pd.factorize(chi_crime['Domestic'])
chi_crime_correlation[['Area Number', 'Year', 'Month', 'Day', 'Hour']] = chi_crime[['Area Number', 'Year', 'Month', 'Day', 'Hour']]
chi_crime_correlation.head()

In [None]:
correlation_matrix = chi_crime_correlation.corr()

print(correlation_matrix)

In [None]:
import seaborn as sns
# Create a heatmap for the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm')
plt.title("Correlation Matrix Heatmap")
plt.tight_layout()
plt.show()