# Neighborhood prediction
In this section, the neighborhood of a given listing is predicted using KNN, Naive-Bayes, and a random forest.

## Import packages and read in data

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import math
import seaborn as sns
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000

In [3]:
# import data
dir = str(Path().resolve())
df = pd.read_csv(dir + "/../data/listings.csv")

## Prepare data

In [4]:
# split into X and y
X = df.drop('neighbourhood_cleansed', axis=1)
y = LabelEncoder().fit_transform(df['neighbourhood_cleansed'])

# get dummies
X = pd.get_dummies(X)

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y)

## Train models

In [52]:
# train models
print("Training KNN...")
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
print("Training decision tree...")
nb = GaussianNB().fit(X_train, y_train)
print("Training random forest...")
rf = RandomForestClassifier(n_estimators=int(math.sqrt(X.shape[1])), max_depth=10).fit(X_train, y_train)
print("Done")

Training KNN...
Training decision tree...
Training random forest...
Done


In [53]:
# get predictions
print("Generating predictions...")
knn_pred = knn.predict(X_test)
nb_pred = nb.predict(X_test)
rf_pred = rf.predict(X_test)
print("Done")

Generating predictions...
Done


## View results

In [55]:
# display performance metrics
print("KNN Results:")
print("accuracy: " + str(accuracy_score(y_test, knn_pred)))
print("precision: " + str(precision_score(y_test, knn_pred, average="macro")))
print("recall: " + str(recall_score(y_test, knn_pred, average="macro")))
# print("auc: " + str(roc_auc_score(y_test, knn_pred)))
print()
print("Naive Bayes Results:")
print("accuracy: " + str(accuracy_score(y_test, nb_pred)))
print("precision: " + str(precision_score(y_test, nb_pred, average="macro")))
print("recall: " + str(recall_score(y_test, nb_pred, average="macro")))
#print("auc: " + str(roc_auc_score(y_test, nb_pred)))
print()
print("Random Forest Results:")
print("accuracy: " + str(accuracy_score(y_test, rf_pred)))
print("precision: " + str(precision_score(y_test, rf_pred, average="macro")))
print("recall: " + str(recall_score(y_test, rf_pred, average="macro")))
#print("auc: " + str(roc_auc_score(y_test, rf_pred)))

KNN Results:
accuracy: 0.777760025563189
precision: 0.7721725205515614
recall: 0.7237850559072051

Naive Bayes Results:
accuracy: 0.11199872184054961
precision: 0.05855255442334741
recall: 0.07022498947156558

Random Forest Results:
accuracy: 0.9129253874420834
precision: 0.8929981744782387
recall: 0.7782661572565764


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
