In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier()
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred)
    })

results_df = pd.DataFrame(results)
results_df.sort_values(by="F1-Score", ascending=False)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
1,SVM,0.982456,0.972603,1.0,0.986111
0,Logistic Regression,0.973684,0.972222,0.985915,0.979021
2,Random Forest,0.964912,0.958904,0.985915,0.972222


GridSearchCV on SVM

In [3]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(SVC(), param_grid, scoring='f1', cv=5)
grid_svm.fit(X_train, y_train)

print("Best parameters from GridSearchCV:", grid_svm.best_params_)
best_svm = grid_svm.best_estimator_

y_pred = best_svm.predict(X_test)

print("F1-score (GridSearch SVM):", f1_score(y_test, y_pred))


Best parameters from GridSearchCV: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
F1-score (GridSearch SVM): 0.9861111111111112


RandomizedSearchCV on Random Forest

In [4]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [4, 6, 8, None],
    'max_features': ['auto', 'sqrt'],
    'min_samples_split': [2, 5, 10]
}

random_rf = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_dist, 
                               n_iter=10, scoring='f1', cv=5, random_state=42)
random_rf.fit(X_train, y_train)

print("Best parameters from RandomizedSearchCV:", random_rf.best_params_)
best_rf = random_rf.best_estimator_

y_pred = best_rf.predict(X_test)

print("F1-score (RandomSearch RF):", f1_score(y_test, y_pred))


25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\harsh\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\harsh\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "C:\Users\harsh\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\harsh\AppData\Local\Programs\Python\Python312\Lib\

Best parameters from RandomizedSearchCV: {'n_estimators': 300, 'min_samples_split': 5, 'max_features': 'sqrt', 'max_depth': 8}
F1-score (RandomSearch RF): 0.9722222222222222


Final model comparison

In [5]:
final_models = {
    "Best SVM (GridSearch)": best_svm,
    "Best RF (RandomSearch)": best_rf
}

final_results = []

for name, model in final_models.items():
    y_pred = model.predict(X_test)
    final_results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred)
    })

final_df = pd.DataFrame(final_results)
final_df.sort_values(by="F1-Score", ascending=False)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Best SVM (GridSearch),0.982456,0.972603,1.0,0.986111
1,Best RF (RandomSearch),0.964912,0.958904,0.985915,0.972222
