In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
import pandas as pd
import numpy as np

first_df=pd.read_csv('Arrests_cleaned.csv')

In [6]:
import requests, json
r = requests.get('https://data.cityofchicago.org/resource/crimes.json?$limit=600000')
data = r.json()
sec_df = pd.DataFrame(data)
sec_df = sec_df[['case_number','ward','district']]

In [7]:
merged_df = pd.merge(first_df, sec_df, on="case_number")
print(len(merged_df))
merged_df.head(1)

68416


Unnamed: 0,id,case_number,date_month,date_day,date_year,time,meridiem,race,charge_1_statute,charge_1_description,...,charge_4_statute,charge_4_description,charge_4_type,charge_4_class,CHARGES STATUTE,CHARGES DESCRIPTION,CHARGES TYPE,CHARGES CLASS,ward,district
0,30039812,JE183770,3,30,2021,12:10:00,AM,WHITE HISPANIC,720 ILCS 5.0/12-3.2-A-1,DOMESTIC BATTERY - BODILY HARM,...,,,,,720 ILCS 5.0/12-3.2-A-1 | | |,DOMESTIC BATTERY - BODILY HARM | | |,M | | |,A | | |,14,8


In [8]:
races = merged_df.loc[:, "race"].unique().tolist()
race_dummies = pd.get_dummies(merged_df, columns=["race"])
races = ["race_"+r for r in races]
race_dummies = race_dummies[races]
race_dummies

Unnamed: 0,race_WHITE HISPANIC,race_BLACK,race_BLACK HISPANIC,race_WHITE,race_ASIAN / PACIFIC ISLANDER,race_AMER INDIAN / ALASKAN NATIVE,race_UNKNOWN / REFUSED
0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0
2,0,1,0,0,0,0,0
3,0,1,0,0,0,0,0
4,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...
68411,0,0,1,0,0,0,0
68412,0,1,0,0,0,0,0
68413,0,1,0,0,0,0,0
68414,1,0,0,0,0,0,0


In [9]:

merged_df = merged_df[['ward','district','date_day','date_year','charge_1_type']]

merged_df = pd.concat([merged_df, race_dummies], axis=1).dropna()

In [10]:
merged_df

Unnamed: 0,ward,district,date_day,date_year,charge_1_type,race_WHITE HISPANIC,race_BLACK,race_BLACK HISPANIC,race_WHITE,race_ASIAN / PACIFIC ISLANDER,race_AMER INDIAN / ALASKAN NATIVE,race_UNKNOWN / REFUSED
0,14,008,30,2021,M,1,0,0,0,0,0,0
1,22,010,23,2021,M,0,1,0,0,0,0,0
2,7,004,16,2021,F,0,1,0,0,0,0,0
3,24,011,16,2021,F,0,1,0,0,0,0,0
4,13,008,16,2021,M,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
68411,22,010,7,2022,F,0,0,1,0,0,0,0
68412,34,022,7,2022,M,0,1,0,0,0,0,0
68413,9,005,7,2022,F,0,1,0,0,0,0,0
68414,7,003,7,2022,F,1,0,0,0,0,0,0


In [11]:
from sklearn.model_selection import train_test_split

x = merged_df[['race_WHITE HISPANIC','race_BLACK','race_BLACK HISPANIC','race_WHITE','race_ASIAN / PACIFIC ISLANDER','race_AMER INDIAN / ALASKAN NATIVE', \
    'race_UNKNOWN / REFUSED','ward','district','date_day','date_year']]
y = merged_df[['charge_1_type']]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [12]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

from statistics import mean 

from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

In [13]:
net_classifier = MLPClassifier()
net_classifier.fit(x_train,y_train['charge_1_type'].tolist())
classifier_preds = net_classifier.predict(x_test)
score = net_classifier.score(x_test,y_test)
print(score)

0.5097208010524777


In [14]:
scalers = [MinMaxScaler(), StandardScaler(), PCA()]
scores = []
for scaling in scalers:
    pipe = make_pipeline(scaling, net_classifier)
    pipe.fit(x_train,y_train['charge_1_type'].tolist())
    scores.append(pipe.score(x_test,y_test))

print("MinMaxScore =",scores[0])
print("StandardScaler =",scores[1])
print("PCA =",scores[2])

MinMaxScore = 0.5730887297178775
StandardScaler = 0.5770355211226429
PCA = 0.579959070311358


In [None]:
parameters = {
    # 'solver': ['lbfgs', 'sgd', 'adam'],
    # 'activation' : ['identity', 'logistic', 'tanh', 'relu'],
    # 'alpha' : [0.001, 0.01,0.0001,0.002,0.003],
    'learning_rate' : ['constant', 'invscaling', 'adaptive'],
    'learning_rate_init' : [0.001,0.002,0.0011,0.0009],
    'random_state':[0,100,200,300,400],
}

search = GridSearchCV(net_classifier,parameters)
search.fit(x_train,y_train['charge_1_type'].tolist())

best = search.best_estimator_
best_score = best.score(x_test,y_test)
print(best_score)

In [None]:
scores = []
for scaling in scalers:
    pipe = make_pipeline(scaling, best)
    pipe.fit(x_train,y_train['charge_1_type'].tolist())
    scores.append(pipe.score(x_test,y_test))

print("MinMaxScore =",scores[0])
print("StandardScaler =",scores[1])
print("PCA =",scores[2])