# InsightCrime: Transparent Crime Prediction
The following is an explainable AI for Crime Prediction project where we build a transparent and interpretable crime prediction model that not only makes accurate predictions but also provides explanations for its decisions. Using techniques such as LIME (Local Interpretable Model-agnostic Explanations) or SHAP (SHapley Additive exPlanations) to generate human-understandable explanations for individual predictions.

In [158]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [159]:
df = pd.read_csv('Crimes_-_2024_20240313.csv')

In [160]:
df.shape

(40887, 22)

In [161]:
df.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

In [162]:
df.head(3)

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,13391079,JH179019,03/05/2024 12:00:00 AM,011XX W 103RD PL,820,THEFT,$500 AND UNDER,RESIDENCE,False,False,...,21,73.0,06,1170554.0,1836171.0,2024,03/12/2024 03:41:23 PM,41.705931,-87.651054,POINT (-87.651054456 41.705931335)
1,13391463,JH179316,03/05/2024 12:00:00 AM,067XX S PERRY AVE,890,THEFT,FROM BUILDING,RESIDENCE,False,False,...,6,69.0,06,1176558.0,1860286.0,2024,03/12/2024 03:41:23 PM,41.771973,-87.628345,POINT (-87.628344899 41.771973284)
2,13390265,JH177773,03/05/2024 12:00:00 AM,021XX S MICHIGAN AVE,2820,OTHER OFFENSE,TELEPHONE THREAT,APARTMENT,False,True,...,3,33.0,08A,1177540.0,1890161.0,2024,03/12/2024 03:41:23 PM,41.853931,-87.623841,POINT (-87.623841071 41.853930752)


In [163]:
df.drop(['ID','Date', 'Case Number', 'X Coordinate', 'Y Coordinate', 'Updated On', 'Location', 'Latitude', 'Longitude', 'Block', 'Community Area'], axis=1, inplace=True)
# df.drop(['ID', 'Case Number', 'Block', 'IUCR', 'X Coordinate', 'Y Coordinate', 'Updated On', 'Location', 'District', 'Ward', 'Community Area', 'FBI Code', 'Beat', 'Year', 'Latitude', 'Longitude'], axis=1, inplace=True)

In [164]:
df.head(3)

Unnamed: 0,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,FBI Code,Year
0,820,THEFT,$500 AND UNDER,RESIDENCE,False,False,2232,22,21,06,2024
1,890,THEFT,FROM BUILDING,RESIDENCE,False,False,722,7,6,06,2024
2,2820,OTHER OFFENSE,TELEPHONE THREAT,APARTMENT,False,True,132,1,3,08A,2024


In [165]:
pip install pandas numpy scikit-learn shap

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [166]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import shap

In [167]:
X = df[['Arrest', 'Domestic']]
y = df['Primary Type']

In [168]:
# encode columns
X = pd.get_dummies(X)

# 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [169]:
y.head(5)

0                         THEFT
1                         THEFT
2                 OTHER OFFENSE
3                         THEFT
4    OFFENSE INVOLVING CHILDREN
Name: Primary Type, dtype: object

In [170]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [171]:
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

In [172]:
y_pred = clf.predict(X_test)

In [173]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.31413548544876496


In [174]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)

In [177]:
y_predictions = model.predict(X_test)


In [178]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_predictions)
print("Accuracy:", accuracy)


Accuracy: 0.31413548544876496
