# Classification

##### Import all modules and packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, classification_report,f1_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

##### Load data

In [2]:
sgData = pd.read_csv("listings.csv")

##### Change the not a NaN value in numeric features to zero

In [3]:
sgDataToZero = sgData.select_dtypes(include=[np.number])
sgDataToZero = sgDataToZero.fillna(0)
print(sgDataToZero.isna().values.any())

False


##### Find unique entries in "neighbourhood_group"

In [4]:
sgData.neighbourhood_group.unique()

array(['North Region', 'Central Region', 'East Region', 'West Region',
       'North-East Region'], dtype=object)

##### Encode each entry into number

In [5]:
sgDataY = sgData[["neighbourhood_group"]].values
encode=preprocessing.LabelEncoder().fit(['North Region','Central Region','East Region', 'West Region', 'North-East Region'])
sgDataY[:,0]=encode.transform(sgDataY[:,0])

newSgData = sgDataToZero[["latitude", "longitude"]]
sgDataY= pd.DataFrame(sgDataY)
sgDataY=sgDataY.astype(int)

##### Combine sgDataY into newSgData dataframe

In [6]:
newSgData = newSgData.assign(neighbourhood_group=sgDataY)

##### Split train and test data

In [7]:
train,test = train_test_split(newSgData, test_size=0.25)

### A.) Using KNN

##### Training data with KNeighborsClassifier

In [8]:
KNN = KNeighborsClassifier(n_neighbors = 3).fit( train[["latitude", "longitude"]], train["neighbourhood_group"])

##### Test new data

In [9]:
newClassification = KNN.predict([[1.44,103.8]])
print(newClassification)

[2]


##### Check KNN model

In [10]:
classification = KNN.predict(test[["latitude", "longitude"]])
accuracy = accuracy_score(test["neighbourhood_group"], classification)
MAE = mean_absolute_error(test["neighbourhood_group"], classification)
MSE = mean_squared_error(test["neighbourhood_group"], classification)

print(" ACC : ", accuracy)
print(" MAE : ", MAE)
print(" MSE : ", MSE)

 ACC :  0.9979767324228629
 MAE :  0.004552352048558422
 MSE :  0.01163378856853819


##### Find best K

In [11]:
Ks = 10
accuracy = np.zeros((Ks-1))
ConfustionMx = [];
for n in range(1, Ks):    
    KNN = KNeighborsClassifier(n_neighbors = n).fit(train[["latitude", "longitude"]], train["neighbourhood_group"])  
    classification = KNN.predict(test[["latitude", "longitude"]])
    accuracy[n - 1] = accuracy_score(test["neighbourhood_group"], classification)
    
print("Best  ACC : %.2f" % accuracy.max(), ", with k = ", accuracy.argmax() + 1)

Best  ACC : 1.00 , with k =  6


### B.) Using Decision Tree

##### Training data with DecisionTreeClassifier

In [12]:
DST = DecisionTreeClassifier(criterion="gini", max_depth = 10).fit(train[["latitude", "longitude"]], train["neighbourhood_group"])

##### Test new data

In [13]:
newClassification = DST.predict([[1.44,103.8]])
print(newClassification)

[2]


##### Check Decision Tree model

In [14]:
classification = DST.predict(test[["latitude", "longitude"]])
accuracy = accuracy_score(test["neighbourhood_group"], classification)
MAE = mean_absolute_error(test["neighbourhood_group"], classification)
MSE = mean_squared_error(test["neighbourhood_group"], classification)
F1  = f1_score(test["neighbourhood_group"], classification, average='weighted') 
cnf_matrix = confusion_matrix(test["neighbourhood_group"], classification, labels=[0,1,2,3,4])


print(" ACC : ", accuracy)
print(" MAE : ", MAE)
print(" MSE : ", MSE)
print(" F1  : ", F1)
print (classification_report(test["neighbourhood_group"], classification))

 ACC :  0.9964592817400101
 MAE :  0.008598887202832575
 MSE :  0.02680829539706626
 F1  :  0.9964549670299
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1552
           1       0.99      0.99      0.99       145
           2       1.00      1.00      1.00        47
           3       1.00      0.98      0.99        89
           4       0.99      1.00      0.99       144

    accuracy                           1.00      1977
   macro avg       1.00      0.99      0.99      1977
weighted avg       1.00      1.00      1.00      1977

