## k-Nearest Neighbour

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss,accuracy_score

In [2]:
movers_df=pd.read_csv('RidingMowers.csv')

In [3]:
lbl=LabelEncoder()

In [4]:
movers_df['Response']=lbl.fit_transform(movers_df['Response'])

In [5]:
X=movers_df.drop('Response',axis=1)
y=movers_df['Response']

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,random_state=23,stratify=y)

Accuracy score and the log loss value differs if you use train_size (train_size=0.7, accuracy=0.87, log_loss=4.58)
instead of test_size ((test_size=0.3, accuracy=0.85, log_loss=5.33)

In [7]:
knn=KNeighborsClassifier(n_neighbors=1)

In [8]:
knn.fit(X_train,y_train)

In [9]:
y_pred=knn.predict(X_test)

In [10]:
print(accuracy_score(y_test,y_pred))

0.8727272727272727


In [11]:
y_pred_prob=knn.predict_proba(X_test)[:,1]

In [12]:
print(log_loss(y_test,y_pred_prob))

4.58737406770582


# Applying grid search

In [13]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

In [14]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
knn=KNeighborsClassifier()

In [15]:
params={'n_neighbors':np.arange(1,11)}

In [16]:
gcv=GridSearchCV(knn,param_grid=params,cv=kfold,scoring='neg_log_loss')

In [17]:
gcv.fit(X,y)

In [18]:
gcv.best_params_

{'n_neighbors': 8}

In [19]:
gcv.best_score_

-1.1403137133213659

## k-NN for Breast_cancer dataset

In [20]:
lbl=LabelEncoder()

In [21]:
cancer_df=pd.read_csv('BreastCancer.csv')

In [22]:
cancer_df['Class']=lbl.fit_transform(cancer_df['Class'])

In [23]:
X=cancer_df.drop('Class',axis=1)

In [24]:
y=cancer_df['Class']

In [25]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,random_state=23,stratify=y)

In [26]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

In [27]:
lr=KNeighborsClassifier()

In [28]:
params={'n_neighbors':np.arange(1,11,1)}

GridSearchCV - goes through combination of the paramaters through the parame

In [29]:
gcv=GridSearchCV(lr,param_grid=params,scoring='neg_log_loss')

In [30]:
gcv.fit(X_train,y_train)

In [31]:
gcv.best_params_

{'n_neighbors': 10}

In [32]:
gcv.best_score_

-0.6976317524474397

# K NN for image segmentation dataser

In [33]:
lbl=LabelEncoder()

In [34]:
image=pd.read_csv('Image_Segmention.csv')

In [35]:
image

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.000000,0.0,0.333333,0.266667,0.500000,0.077778,6.666666,8.333334,7.777778,3.888889,5.000000,3.333333,-8.333333,8.444445,0.538580,-0.924817
1,BRICKFACE,105,139,9,0.000000,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.000000,0.0,0.500000,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.000000,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.000000,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.000000,0.0,0.500000,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.000000,3.333333,-7.333334,7.111111,0.561508,-0.985811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,GRASS,36,243,9,0.111111,0.0,1.888889,1.851851,2.000000,0.711110,13.333333,9.888889,12.111111,18.000000,-10.333333,-3.666667,14.000000,18.000000,0.452229,2.368311
205,GRASS,186,218,9,0.000000,0.0,1.166667,0.744444,1.166667,0.655555,13.703704,10.666667,12.666667,17.777779,-9.111111,-3.111111,12.222222,17.777779,0.401347,2.382684
206,GRASS,197,236,9,0.000000,0.0,2.444444,6.829628,3.333333,7.599998,16.074074,13.111111,16.666668,18.444445,-8.888889,1.777778,7.111111,18.555555,0.292729,2.789800
207,GRASS,208,240,9,0.111111,0.0,1.055556,0.862963,2.444444,5.007407,14.148149,10.888889,13.000000,18.555555,-9.777778,-3.444444,13.222222,18.555555,0.421621,2.392487


In [36]:
image['Class']=lbl.fit_transform(image['Class'])

In [37]:
X=image.drop('Class',axis=1)
y=image['Class']

In [38]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.7,random_state=23,stratify=y)

In [39]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

In [40]:
lr=KNeighborsClassifier()

# Grid search

In [47]:
params={'n_neighbors':np.arange(1,11,1)}

In [48]:
gcv=GridSearchCV(lr,param_grid=params,scoring='neg_log_loss')

In [49]:
gcv.fit(X_train,y_train)

In [50]:
gcv.best_params_

{'n_neighbors': 9}

In [51]:
gcv.best_score_

-0.8647564686328915