In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import warnings
warnings.filterwarnings("ignore")


In [2]:
# Load breast cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier()
}

def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"\n--- {name} ---")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Train and evaluate each
for name, model in models.items():
    model.fit(X_train, y_train)
    evaluate_model(name, model, X_test, y_test)



--- Logistic Regression ---
Accuracy: 0.9736842105263158
Precision: 0.9722222222222222
Recall: 0.9859154929577465
F1 Score: 0.9790209790209791
Confusion Matrix:
 [[41  2]
 [ 1 70]]

--- Random Forest ---
Accuracy: 0.9649122807017544
Precision: 0.958904109589041
Recall: 0.9859154929577465
F1 Score: 0.9722222222222222
Confusion Matrix:
 [[40  3]
 [ 1 70]]

--- SVM ---
Accuracy: 0.9824561403508771
Precision: 0.9726027397260274
Recall: 1.0
F1 Score: 0.9861111111111112
Confusion Matrix:
 [[41  2]
 [ 0 71]]

--- KNN ---
Accuracy: 0.9473684210526315
Precision: 0.9577464788732394
Recall: 0.9577464788732394
F1 Score: 0.9577464788732394
Confusion Matrix:
 [[40  3]
 [ 3 68]]


In [4]:
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'max_features': ['sqrt', 'log2']
}

random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42),
                                   param_distributions=param_dist,
                                   n_iter=10, cv=5,
                                   scoring='f1',
                                   n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

print("\nBest Random Forest Params:", random_search.best_params_)
evaluate_model("Random Forest (Tuned)", random_search.best_estimator_, X_test, y_test)



Best Random Forest Params: {'n_estimators': 50, 'min_samples_split': 5, 'max_features': 'log2', 'max_depth': None}

--- Random Forest (Tuned) ---
Accuracy: 0.9736842105263158
Precision: 0.9722222222222222
Recall: 0.9859154929577465
F1 Score: 0.9790209790209791
Confusion Matrix:
 [[41  2]
 [ 1 70]]


In [5]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(SVC(), param_grid=param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("\nBest SVM Params:", grid_search.best_params_)
evaluate_model("SVM (Tuned)", grid_search.best_estimator_, X_test, y_test)



Best SVM Params: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}

--- SVM (Tuned) ---
Accuracy: 0.9824561403508771
Precision: 0.9726027397260274
Recall: 1.0
F1 Score: 0.9861111111111112
Confusion Matrix:
 [[41  2]
 [ 0 71]]


In [None]:
print("\n\n==== Final Model Comparison ====")
final_models = {
    "Logistic Regression": models['Logistic Regression'],
    "Random Forest (Tuned)": random_search.best_estimator_,
    "SVM (Tuned)": grid_search.best_estimator_,
    "KNN": models['KNN']
}

for name, model in final_models.items():
    evaluate_model(name, model, X_test, y_test)
