In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix,
    precision_recall_curve,
    classification_report,
    ConfusionMatrixDisplay,
    PrecisionRecallDisplay
)

# 1. Load Dataset (Binary classification - Breast Cancer)
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Initialize models
lr = LogisticRegression(max_iter=500)
rf = RandomForestClassifier()

# 4. Train models
lr.fit(X_train, y_train)
rf.fit(X_train, y_train)

# 5. Predict probabilities
lr_probs = lr.predict_proba(X_test)[:, 1]
rf_probs = rf.predict_proba(X_test)[:, 1]

# 6. Predictions
lr_preds = lr.predict(X_test)
rf_preds = rf.predict(X_test)

# 7. Create results directory
os.makedirs("results", exist_ok=True)

# 8. Confusion Matrix - Logistic Regression
cm_lr = confusion_matrix(y_test, lr_preds)
disp_lr = ConfusionMatrixDisplay(cm_lr, display_labels=data.target_names)
disp_lr.plot()
plt.title("Confusion Matrix - Logistic Regression")
plt.savefig("results/confusion_matrix_lr.png")
plt.close()

# 9. Confusion Matrix - Random Forest
cm_rf = confusion_matrix(y_test, rf_preds)
disp_rf = ConfusionMatrixDisplay(cm_rf, display_labels=data.target_names)
disp_rf.plot()
plt.title("Confusion Matrix - Random Forest")
plt.savefig("results/confusion_matrix_rf.png")
plt.close()

# 10. Precision-Recall Curve
precision_lr, recall_lr, _ = precision_recall_curve(y_test, lr_probs)
precision_rf, recall_rf, _ = precision_recall_curve(y_test, rf_probs)

plt.plot(recall_lr, precision_lr, label="Logistic Regression")
plt.plot(recall_rf, precision_rf, label="Random Forest")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.savefig("results/precision_recall_curve.png")
plt.close()

# 11. Model Comparison - Print classification reports
print("Logistic Regression:\n", classification_report(y_test, lr_preds))
print("Random Forest:\n", classification_report(y_test, rf_preds))

# 12. Save reports to text file
with open("results/model_comparison.txt", "w") as f:
    f.write("Logistic Regression:\n")
    f.write(classification_report(y_test, lr_preds))
    f.write("\nRandom Forest:\n")
    f.write(classification_report(y_test, rf_preds))

print("✅ All results saved in the 'results/' directory.")


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression:
               precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Random Forest:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

✅ All results saved in the 'results/' directory.
