In [47]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score,accuracy_score,recall_score,f1_score,confusion_matrix,roc_auc_score

In [48]:
# load the data set 
data = load_breast_cancer()
X = data.data
Y = data.target

In [49]:
standard = StandardScaler()
X = standard.fit_transform(X)

In [50]:
# train test split 
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.25,random_state=42)

In [51]:
# function to evaluate 

def evaluate(y_true,y_pred,y_prob):
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)

    return{
        "accuracy":accuracy_score(y_true,y_pred),
        "precision score":precision_score(y_true,y_pred),
        "f1 score":f1_score(y_true,y_pred),
        "specificity":specificity,
        "recall score":recall_score(y_true,y_pred),
        "ROC_AOC":roc_auc_score(y_true,y_prob),
        "confusion_matrix":cm
    }

In [52]:
result = []

In [53]:
# standard knn for diffrent value of k

for k in [3,5,7,9]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train,y_train)
    pred = knn.predict(X_test)
    prob = knn.predict_proba(X_test)[:,1]
    metrics = evaluate(y_test, pred, prob)
    metrics["Model"] = f"kNN (k={k})"
    result.append(metrics)

In [54]:
# Distance Weighted k-NN
dw_knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
dw_knn.fit(X_train, y_train)
pred = dw_knn.predict(X_test)
prob = dw_knn.predict_proba(X_test)[:,1]
metrics = evaluate(y_test, pred, prob)
metrics["Model"] = "Distance Weighted kNN"
result.append(metrics)

In [55]:
# Locally Weighted Averaging (approx using radius neighbors)
from sklearn.neighbors import RadiusNeighborsClassifier

In [56]:
lwa = RadiusNeighborsClassifier(radius=5.0, weights='distance', outlier_label=1)
lwa.fit(X_train, y_train)
pred = lwa.predict(X_test)
prob = lwa.predict_proba(X_test)[:,1]
metrics = evaluate(y_test, pred, prob)
metrics["Model"] = "Locally Weighted Averaging"
result.append(metrics)

In [57]:

# Display Results
df = pd.DataFrame(result)
print(df)


   accuracy  precision score  f1 score  specificity  recall score   ROC_AOC  \
0  0.958042         0.966292  0.966292     0.944444      0.966292  0.986371   
1  0.958042         0.966292  0.966292     0.944444      0.966292  0.985643   
2  0.958042         0.966292  0.966292     0.944444      0.966292  0.984707   
3  0.972028         0.977528  0.977528     0.962963      0.977528  0.993966   
4  0.958042         0.966292  0.966292     0.944444      0.966292  0.985539   
5  0.923077         0.890000  0.941799     0.796296      1.000000  0.929671   

      confusion_matrix                       Model  
0   [[51, 3], [3, 86]]                   kNN (k=3)  
1   [[51, 3], [3, 86]]                   kNN (k=5)  
2   [[51, 3], [3, 86]]                   kNN (k=7)  
3   [[52, 2], [2, 87]]                   kNN (k=9)  
4   [[51, 3], [3, 86]]       Distance Weighted kNN  
5  [[43, 11], [0, 89]]  Locally Weighted Averaging  
