In [27]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib

In [28]:
df = load_breast_cancer()

## 2.Split into train-test sets and keep the same split for all models.


In [29]:
x_train,x_test,y_train,y_test = train_test_split(df.data,df.target,test_size=0.2,random_state=42)

## 3.Train multiple models such as Logistic Regression, Decision Tree, Random Forest, SVM.


In [30]:
# Dictionary to store your models
models = {
    "Logistic Regression":LogisticRegression(max_iter=1000),
    "Random":RandomForestClassifier(),
    "Decision":DecisionTreeClassifier(),
    "svc":SVC()
}

In [31]:
result = []

In [32]:
#loop to train and evaluate each
for name, model in models.items():
    model.fit(x_train,y_train)
    # 4. Predict results
    y_train_pred = model.predict(x_train)
    prediction = model.predict(x_test)
    # 5. Evaluate metrics (Testing)
    acc = accuracy_score(y_test,prediction)
    prec = precision_score(y_test,prediction,average="weighted")
    rec = recall_score(y_test,prediction,average="weighted")
    f1 = f1_score(y_test,prediction,average = "weighted")
    # 8. Compare Train vs Test for Generalization
    train_acc = accuracy_score(y_train, y_train_pred) 
    #6 store in list
    result.append({
        "Model":name,
        "Accuracy":acc,
        "Precision":prec,
        "Recall":rec,
        "F1-Score":f1,
        "Train_Accuracy":train_acc
    })
    

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [33]:
import pandas as pd

In [34]:
# 6. Create Pandas comparison table
comparison_df = pd.DataFrame(result).set_index('Model')
print(comparison_df)

                     Accuracy  Precision    Recall  F1-Score  Train_Accuracy
Model                                                                       
Logistic Regression  0.956140   0.956905  0.956140  0.955801        0.958242
Random               0.964912   0.965205  0.964912  0.964738        1.000000
Decision             0.929825   0.929825  0.929825  0.929825        1.000000
svc                  0.947368   0.951470  0.947368  0.946462        0.914286


## 9. Select best model (example: based on F1-Score)

In [35]:
best_model_name = comparison_df['F1-Score'].idxmax()
best_model = models[best_model_name]
joblib.dump(best_model, 'best_model.pkl')
print(f"Best model '{best_model_name}' saved as 'best_model.pkl'")

Best model 'Random' saved as 'best_model.pkl'
