In [39]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

In [40]:
df=pd.read_csv("../datasets/wbc_csv.csv")
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [41]:
df['diagnosis'] = df['diagnosis'].astype('category')

In [42]:
# Set random seed for reproducibility
np.random.seed(123)

# Shuffle the rows
df = df.sample(frac=1).reset_index(drop=True)

In [43]:
def n(b):
    return (b - b.min()) / (b.max() - b.min())

In [44]:
# apply func on 3 to 32 columns
df1 = df.iloc[:, 2:32]

fnor = df1.apply(n)

In [45]:
fnor.head()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,0.202045,0.171458,0.190657,0.10456,0.274713,0.077296,0.002281,0.014617,0.360101,0.228517,...,0.171825,0.267591,0.157727,0.075575,0.300007,0.068545,0.004407,0.057285,0.246403,0.125541
1,0.130673,0.201556,0.122383,0.0621,0.340706,0.084381,0.025843,0.06993,0.515657,0.277169,...,0.10032,0.236141,0.088052,0.041781,0.498778,0.064431,0.032292,0.177285,0.251528,0.175193
2,0.49974,0.324992,0.492779,0.342778,0.334477,0.308018,0.242737,0.372167,0.225253,0.104254,...,0.444326,0.368337,0.443697,0.258995,0.441986,0.305333,0.280192,0.666323,0.268677,0.154991
3,0.285342,0.423064,0.264114,0.162418,0.089194,0.0,0.003737,0.009205,0.169192,0.050126,...,0.215937,0.453092,0.188107,0.104109,0.066565,0.006821,0.006371,0.031818,0.143899,0.022235
4,0.543282,0.297937,0.534241,0.395122,0.416268,0.263542,0.312793,0.437127,0.541414,0.216091,...,0.533618,0.417377,0.506948,0.348457,0.453213,0.176199,0.252157,0.564261,0.41987,0.201692


In [46]:
#data partitioning

train = fnor.iloc[0:469, :]
test = fnor.iloc[469:569, :]

train_label = df.iloc[0:469, 1]
test_label = df.iloc[469:569, 1]


In [47]:
# Initialize the KNN classifier with k=3
knn = KNeighborsClassifier(n_neighbors=3)

# Fit the model on the training data and labels
knn.fit(train, train_label)

# Predict the labels for the test set
p = knn.predict(test)

In [48]:
# Confusion Matrix
t = confusion_matrix(test_label, p)

conf_matrix = pd.DataFrame(t, 
                           index=[f'Actual_{cls}' for cls in sorted(test_label.unique())], 
                           columns=[f'Predicted_{cls}' for cls in sorted(test_label.unique())])

print(conf_matrix)


          Predicted_B  Predicted_M
Actual_B           58            1
Actual_M            4           37


In [49]:
TN = t[0][0]
TP = t[1][1]
FN = t[1][0]
FP = t[0][1]


In [50]:
# Accuracy
accuracy = (TN + TP) / (TN + TP + FN + FP)
print("Accuracy:", accuracy)


Accuracy: 0.95


In [51]:
# Sensitivity
sensitivity = TP / (TP + FN)
print("Sensitivity:", sensitivity)


Sensitivity: 0.9024390243902439


In [52]:
specificity = TN / (TN + FP)
print("Specificity:", specificity)

Specificity: 0.9830508474576272


In [53]:
precision = TP / (TP + FP)
print("Precision:", precision)

Precision: 0.9736842105263158
