In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Use KNN for classification
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on test set
y_pred = knn.predict(X_test)

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, knn.predict_proba(X_test)[:, 1])  # using probability scores for class 1
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"ROC AUC Score: {roc_auc}")
print(f"Confusion Matrix:\n{conf_matrix}")

Accuracy: 0.9590643274853801
Precision: 0.9469026548672567
Recall: 0.9907407407407407
F1 Score: 0.9683257918552035
ROC AUC Score: 0.995296884185773
Confusion Matrix:
[[ 57   6]
 [  1 107]]


**Accuracy**: It's the ratio of correctly predicted observation to the total observations.
> Accuracy = (True Positives + True Negatives) / (True Positives + False Positives + True Negatives + False Negatives)

**Precision**: Precision talks about how precise/accurate the model is, i.e., out of all the predicted positive instances, how many of them were actual positive.
> Precision = True Positives / (True Positives + False Positives)

**Recall (Sensitivity)**: Recall calculates how many of the actual positive cases were caught by the model.
> Recall = True Positives / (True Positives + False Negatives)

**F1 Score**: It's the weighted average of Precision and Recall. It tries to find the balance between precision and recall.
> F1 Score = 2*(Recall * Precision) / (Recall + Precision)
