In [1]:
# Importing necessary libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Step 1: Load the Breast Cancer dataset
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target


# Step 2: Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [3]:
print("First 5 rows of training data (X_train):")
print(X_train[:5])
print("\nFirst 5 rows of training labels (y_train):")
print(y_train[:5])
print("\nFirst 5 rows of testing data (X_test):")
print(X_test[:5])
print("\nFirst 5 rows of testing labels (y_test):")
print(y_test[:5])

First 5 rows of training data (X_train):
[[1.374e+01 1.791e+01 8.812e+01 5.850e+02 7.944e-02 6.376e-02 2.881e-02
  1.329e-02 1.473e-01 5.580e-02 2.500e-01 7.574e-01 1.573e+00 2.147e+01
  2.838e-03 1.592e-02 1.780e-02 5.828e-03 1.329e-02 1.976e-03 1.534e+01
  2.246e+01 9.719e+01 7.259e+02 9.711e-02 1.824e-01 1.564e-01 6.019e-02
  2.350e-01 7.014e-02]
 [1.337e+01 1.639e+01 8.610e+01 5.535e+02 7.115e-02 7.325e-02 8.092e-02
  2.800e-02 1.422e-01 5.823e-02 1.639e-01 1.140e+00 1.223e+00 1.466e+01
  5.919e-03 3.270e-02 4.957e-02 1.038e-02 1.208e-02 4.076e-03 1.426e+01
  2.275e+01 9.199e+01 6.321e+02 1.025e-01 2.531e-01 3.308e-01 8.978e-02
  2.048e-01 7.628e-02]
 [1.469e+01 1.398e+01 9.822e+01 6.561e+02 1.031e-01 1.836e-01 1.450e-01
  6.300e-02 2.086e-01 7.406e-02 5.462e-01 1.511e+00 4.795e+00 4.945e+01
  9.976e-03 5.244e-02 5.278e-02 1.580e-02 2.653e-02 5.444e-03 1.646e+01
  1.834e+01 1.141e+02 8.092e+02 1.312e-01 3.635e-01 3.219e-01 1.108e-01
  2.827e-01 9.208e-02]
 [1.291e+01 1.633e+01 8.25

In [4]:
# Step 3: Create the KNN classifier with K
knn = KNeighborsClassifier(n_neighbors=7)

# Step 4: Fit the model on the training data
knn.fit(X_train, y_train)

# Step 5: Predict on the test data
y_pred = knn.predict(X_test)

# Step 6: Evaluate the performance (accuracy)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with K: {accuracy*100:.2f}")

Accuracy with K: 96.49


In [5]:
# Step 7: Try different K values and observe how the accuracy changes
for k in range(1, 21):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy for K={k}: {accuracy*100:.2f}")

Accuracy for K=1: 93.57
Accuracy for K=2: 92.98
Accuracy for K=3: 94.15
Accuracy for K=4: 94.74
Accuracy for K=5: 95.91
Accuracy for K=6: 95.91
Accuracy for K=7: 96.49
Accuracy for K=8: 96.49
Accuracy for K=9: 97.08
Accuracy for K=10: 98.25
Accuracy for K=11: 97.66
Accuracy for K=12: 97.66
Accuracy for K=13: 96.49
Accuracy for K=14: 96.49
Accuracy for K=15: 96.49
Accuracy for K=16: 96.49
Accuracy for K=17: 96.49
Accuracy for K=18: 96.49
Accuracy for K=19: 95.91
Accuracy for K=20: 96.49
