### import library

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

### Load DataSet

In [2]:
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

### Display Features and Labels

In [3]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
breast_cancer.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [6]:
breast_cancer.target_names

array(['malignant', 'benign'], dtype='<U9')

### Split Data into Training and Testing Sets

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y)
X_test

array([[1.230e+01, 1.590e+01, 7.883e+01, ..., 4.815e-02, 2.482e-01,
        6.306e-02],
       [2.064e+01, 1.735e+01, 1.348e+02, ..., 2.112e-01, 2.689e-01,
        7.055e-02],
       [1.344e+01, 2.158e+01, 8.618e+01, ..., 1.112e-01, 2.994e-01,
        7.146e-02],
       ...,
       [1.316e+01, 2.054e+01, 8.406e+01, ..., 4.195e-02, 2.687e-01,
        7.429e-02],
       [9.423e+00, 2.788e+01, 5.926e+01, ..., 0.000e+00, 2.475e-01,
        6.969e-02],
       [1.276e+01, 1.337e+01, 8.229e+01, ..., 8.411e-02, 2.564e-01,
        8.253e-02]])

### Train K-Nearest Neighbors (KNN) Classifier

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)

### Predict on Test Set

In [9]:
knn.fit(X_train,y_train)
knn_predict = knn.predict(X_test)
knn_predict

array([1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [10]:
y_test

array([1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [11]:
import pandas as pd
data = pd.DataFrame([y_test,knn_predict])
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,133,134,135,136,137,138,139,140,141,142
0,1,0,0,1,1,1,1,0,1,1,...,1,1,1,1,1,1,1,1,1,1
1,1,0,0,1,1,1,1,0,1,1,...,1,1,1,1,1,1,1,1,1,1


### Accuracy of the Model

In [12]:

acc = accuracy_score(knn_predict,y_test)
print(f"The accuracy is {acc*100}")


The accuracy is 92.3076923076923


### Scale Data

In [13]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_test, X_train

(array([[1.230e+01, 1.590e+01, 7.883e+01, ..., 4.815e-02, 2.482e-01,
         6.306e-02],
        [2.064e+01, 1.735e+01, 1.348e+02, ..., 2.112e-01, 2.689e-01,
         7.055e-02],
        [1.344e+01, 2.158e+01, 8.618e+01, ..., 1.112e-01, 2.994e-01,
         7.146e-02],
        ...,
        [1.316e+01, 2.054e+01, 8.406e+01, ..., 4.195e-02, 2.687e-01,
         7.429e-02],
        [9.423e+00, 2.788e+01, 5.926e+01, ..., 0.000e+00, 2.475e-01,
         6.969e-02],
        [1.276e+01, 1.337e+01, 8.229e+01, ..., 8.411e-02, 2.564e-01,
         8.253e-02]]),
 array([[1.152e+01, 1.493e+01, 7.387e+01, ..., 9.608e-02, 2.664e-01,
         7.809e-02],
        [1.461e+01, 1.569e+01, 9.268e+01, ..., 5.813e-02, 2.530e-01,
         5.695e-02],
        [1.701e+01, 2.026e+01, 1.097e+02, ..., 1.096e-01, 3.275e-01,
         6.469e-02],
        ...,
        [1.447e+01, 2.499e+01, 9.581e+01, ..., 1.205e-01, 3.187e-01,
         1.023e-01],
        [1.225e+01, 2.244e+01, 7.818e+01, ..., 6.335e-02, 3.100e-01,
   

### Train and Predict with Scaled Data

In [14]:
knn.fit(X_train_scaled,y_train)

In [15]:
y_pred_scaled = knn.predict(X_test_scaled)

In [16]:
data1 = pd.DataFrame([y_pred_scaled,y_test])
data1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,133,134,135,136,137,138,139,140,141,142
0,1,0,1,1,1,1,1,0,1,1,...,1,1,1,1,1,1,1,1,1,1
1,1,0,0,1,1,1,1,0,1,1,...,1,1,1,1,1,1,1,1,1,1


In [17]:
acc = accuracy_score(y_pred_scaled,y_test)

In [18]:
acc

0.9790209790209791

### Confusion Matrix

In [19]:
con=confusion_matrix(y_pred_scaled,y_test)

In [20]:
con

array([[42,  0],
       [ 3, 98]], dtype=int64)

In [21]:
con.view()

array([[42,  0],
       [ 3, 98]], dtype=int64)

In [22]:
import numpy as np

In [23]:
a = np.diag(con)
a

array([42, 98], dtype=int64)

### Per-Class Accuracy

In [24]:
per_class_acc = np.diag(con)/np.sum(con,axis=1)

In [25]:
per_class_acc

array([1.        , 0.97029703])

In [26]:
acc = (100+78.57+100)/3

In [27]:
acc

92.85666666666667

In [30]:

cm = pd.DataFrame(con,index=breast_cancer.target_names,columns=breast_cancer.target_names)
cm

Unnamed: 0,malignant,benign
malignant,42,0
benign,3,98


### Final Accuracy

In [32]:
per_class_accuracy = pd.DataFrame([breast_cancer.target_names,per_class_acc*100])

In [33]:
per_class_accuracy

Unnamed: 0,0,1
0,malignant,benign
1,100.0,97.029703


##### The KNN model exhibits perfect accuracy in predicting malignant tumors and very high accuracy for benign tumors, with only a 2.97% error rate for benign cases. This indicates the model is highly effective at distinguishing between malignant and benign breast cancer cases.