## Example: K Nearest Neighbors

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scratch.algos.classification.knn import KNearestNeighbors
from scratch.utils.evaluation import *

ModuleNotFoundError: No module named 'matplotlib'

In [2]:
cancer = (
    pd.read_csv(
        "./data/BreastCancer.csv",
        header=0,
        names=[
            "id", "diag", "radius", "texture", "perimeter", "area", "smoothness", 
            "compactness", "concavity", "concave_points", "symmetry", "fractal_dimension"
        ]
    )
    .sample(frac=1)
    .dropna(axis=0)
    .drop(columns=["id"])
    .replace({"diag": {"M": 1, "B": 0}})
)

nvalid = int(np.floor(cancer.shape[0] * 0.3))

dftrain = cancer.head(n=-nvalid)
Xtrain = dftrain.drop(columns=['diag']).to_numpy()
ytrain = dftrain['diag'].values

dfvalid = cancer.tail(n=nvalid)
Xvalid = dfvalid.drop(columns=['diag']).to_numpy()
yvalid = dfvalid['diag'].values


NameError: name 'pd' is not defined

In [None]:
list_targets = [("Total", cancer.diag.values), ("Train", ytrain), ("Valid", yvalid)]
print("Label order:   [0 1]")
print("Target: Counts, Percentages:")

for name_, array_ in list_targets:
    vals, cnts = np.unique(array_, return_counts=True)
    print("\t{0}: {1}, {2}".format(name_, cnts, np.around(cnts / len(array_), 2)))

In [None]:
m = KNearestNeighbors(num_neighbors=5)
m.fit(Xtrain=Xtrain, ytrain=ytrain)
ypred = m.predict(Xvalid=Xvalid)

In [None]:
c_count = build_confusion_matrix(y=yvalid, yhat=ypred, as_percentage=False)
print(c_count)

In [None]:
c_percent = build_confusion_matrix(y=yvalid, yhat=ypred, as_percentage=True)
print(c_percent)

In [None]:
tn = c_count.iloc[0, 0]
tp = c_count.iloc[1, 1]
fp = c_count.iloc[0, 1]
fn = c_count.iloc[1, 0]

metric = [
    "Accurary", "F1", "Sensitivity", "Specificity", 
    "False Negative Rate", "False Positive Rate"
]

score = [
    (tp + tn) / (tp + fp + fn + tn), tp / (tp + 0.5 * (fp + fn)), 
    tp / (tp + fn), tn / (tn + fp), fn / (fn + tp), fp / (fp + tn)
]
score = [round(s, 3) for s in score]
score += score[:1]

N = len(metric)

angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

plt.polar(angles, score)
for a, s in zip(angles, score):
    plt.text(a, s - 0.01, s)
plt.xticks(angles[:-1], metric)
plt.yticks(list(np.linspace(0.2, 1, 5)), color="grey", size=15)
plt.ylim(0, 1)
plt.show()

In [None]:
print(metric)
print(score)