<a href="https://colab.research.google.com/github/cisimon7/Machine-Learning-with-plotly/blob/main/KNN_Model_with_Plotly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# K-Nearest Neighbors

In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score

In [2]:
df = px.data.tips()
X = df.total_bill.values.reshape(-1,1)
x_range = np.linspace(X.min(), X.max(), 100)

In [3]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


### <b>KNN Regressor<b/>

In [4]:
# Model #1
knn_dist = KNeighborsRegressor(10, weights='distance')
knn_dist.fit(X, df.tip)
y_dist = knn_dist.predict(x_range.reshape(-1,1))

In [5]:
#Model #2
knn_uni = KNeighborsRegressor(10, weights='uniform')
knn_uni.fit(X, df.tip)
y_uni = knn_uni.predict(x_range.reshape(-1,1))

In [6]:
fig = px.scatter(df, x='total_bill', y='tip', color='sex', opacity=0.65, title="KNN Regressor")
fig.add_traces(go.Scatter(x=x_range, y=y_uni, name='Weights: Uniform'))
fig.add_traces(go.Scatter(x=x_range, y=y_dist, name='Weights: Distance'))
fig.show()

### <b>KNN Classifier<b/>

In [7]:
x_train, x_test, y_train, y_test = train_test_split(df[['total_bill','tip']], df.sex, test_size=0.2, random_state=123)

In [8]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [9]:
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)
acc=np.round(accuracy_score(y_test, y_pred),4)

In [10]:
fig = px.scatter(x=np.asarray(x_train)[:,0], y=np.asarray(x_train)[:,1], 
                 color=y_train, labels={'x':'Total Bill', 'y':'Tip'}, 
                 title=f'KNN Classifier\n accuracy={acc}')
fig.add_scatter(
    x=np.asarray(x_test)[:,0], 
    y=np.asarray(x_test)[:,1], 
    name='KNN Prediction', mode='markers', 
    hovertext=[f'sex prediction: {sex}' for sex in y_pred],
    marker=dict(
        color=np.where(y_pred=='Male',1,0), 
        size=[ 15 for _ in np.where(y_pred=='Male',1,0)],
        colorscale=['green','black']),
    showlegend=False)
fig.show()

### <b>Choosing K values<b/>

In [11]:
Ks = list(range(1, 20))
cv_scores = []
for K in Ks:
    knn = KNeighborsClassifier(n_neighbors=K)
    scores = cross_val_score(knn, x_train, y_train,
                             cv=7, scoring='accuracy')
    avg_score = np.mean(scores)
    cv_scores.append(avg_score)

fig = px.line(x=Ks, y=cv_scores, labels={'x':'K value', 'y':'Accuracy'})
fig.update_layout(width=700)
fig.show()