In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, f1_score, recall_score
import joblib

########## Accuracy 0.9250810165444312 ##########
########## Train Accuracy 0.9283848753291333 ##########


# Load Data
dataSet = pd.read_csv("Crime_processed1.csv")

totalFeatures = dataSet.drop(columns=['Arrest'])
targetFeature = dataSet['Arrest']

testSize = 0.2
randomState = 42
xTrain, xTest, yTrain, yTest = train_test_split(totalFeatures, targetFeature, test_size = testSize, random_state = randomState)

# Choose K Neighbours
k = 15
knn = KNeighborsClassifier(n_neighbors = k)

# Train The Model
knn.fit(xTrain, yTrain)

# Test The Model
yPredicted = knn.predict(xTest)


accuracy = accuracy_score(yTest, yPredicted)
print("Accuracy", accuracy)

train_acc = accuracy_score(yTrain, knn.predict(xTrain))
print("Train Acc", train_acc)

cv_scores = cross_val_score(knn, totalFeatures, targetFeature, cv=5, scoring='accuracy')

print("Cross Validation Scores:", cv_scores)
print("Average Cross Validation Scores:", cv_scores.mean())

cm = confusion_matrix(yTest, yPredicted)

precision = precision_score(yTest, yPredicted, average='macro')

recall = recall_score(yTest, yPredicted, average='macro')

f1 = f1_score(yTest, yPredicted, average='macro')

print("Confusion Matrix:")
print(cm)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
joblib.dump(knn, 'knn_model.pkl')

Accuracy 0.9250810165444312
Train Acc 0.9283848753291333
Cross Validation Scores: [0.92599778 0.92940901 0.92870544 0.92687035 0.92013304]
Average Cross Validation Scores: 0.926223123841847
Confusion Matrix:
[[41107   303]
 [ 3211  2283]]
Precision: 0.9051884979355593
Recall: 0.7041135784492173
F1 Score: 0.7620544508258216


['knn_model.pkl']

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, f1_score, recall_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

########## Accuracy 0.9250810165444312 ##########
########## Train Accuracy 0.9283848753291333 ##########


# Load Data
dataSet =pd.read_csv('Crime_processed_no_outliers.csv')

totalFeatures = dataSet[['IUCR', 'Primary Type', 'Longitude', 'Latitude', 'Day', 'Hour']]
targetFeature = dataSet['Arrest']

categorical_cols = ['IUCR', 'Primary Type', 'Day']
numerical_cols = ['Longitude', 'Latitude', 'Hour']
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)
])


xTrain, xTest, yTrain, yTest = train_test_split(totalFeatures, targetFeature, test_size = 0.2, random_state = 42)

# Choose K Neighbours

knn =Pipeline([
    ('preprocessor', preprocessor),
    ('knn', KNeighborsClassifier(n_neighbors = 15))
])
# Train The Model
knn.fit(xTrain, yTrain)

yPredicted = knn.predict(xTest)
yPredicted_train = knn.predict(xTrain)
print("Train Predicted", yPredicted_train)
train_accuracy = accuracy_score(yTrain, yPredicted_train)
print("Train Accuracy", train_accuracy)
accuracy = accuracy_score(yTest, yPredicted)
print("Accuracy", accuracy)

# cv_scores = cross_val_score(knn, totalFeatures, targetFeature, cv=5, scoring='accuracy')

# print("Cross Validation Scores:", cv_scores)
# print("Average Cross Validation Scores:", cv_scores.mean())

# cm = confusion_matrix(yTest, yPredicted)

# precision = precision_score(yTest, yPredicted, average='macro')

# recall = recall_score(yTest, yPredicted, average='macro')

# f1 = f1_score(yTest, yPredicted, average='macro')

# print("Confusion Matrix:")
# print(cm)
# print("Precision:", precision)
# print("Recall:", recall)
# print("F1 Score:", f1)
joblib.dump(knn, 'knn_model.pkl')



Train Predicted [1 0 0 ... 0 0 0]
Accuracy 0.9288606504415249


['knn_model.pkl']