In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, roc_auc_score, confusion_matrix, 
                             classification_report, roc_curve)
from ydata_profiling import ProfileReport
df = pd.read_csv('data.csv')
profile = ProfileReport(
    df,
    title="Breast Cancer Data Profiling "
)
profile.to_file("Breast_Cancer_report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


  0%|          | 0/33 [00:00<?, ?it/s][A
 39%|███▉      | 13/33 [00:00<00:00, 126.31it/s][A
100%|██████████| 33/33 [00:00<00:00, 114.02it/s][A


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [13]:
df.drop(['Unnamed: 32','id'],axis=1,inplace=True)

In [14]:
df.isnull().sum()

diagnosis                  0
radius_mean                0
texture_mean               0
perimeter_mean             0
area_mean                  0
smoothness_mean            0
compactness_mean           0
concavity_mean             0
concave points_mean        0
symmetry_mean              0
fractal_dimension_mean     0
radius_se                  0
texture_se                 0
perimeter_se               0
area_se                    0
smoothness_se              0
compactness_se             0
concavity_se               0
concave points_se          0
symmetry_se                0
fractal_dimension_se       0
radius_worst               0
texture_worst              0
perimeter_worst            0
area_worst                 0
smoothness_worst           0
compactness_worst          0
concavity_worst            0
concave points_worst       0
symmetry_worst             0
fractal_dimension_worst    0
dtype: int64

In [15]:
 #EDA
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

diag_counts = df['diagnosis'].value_counts()
#df_encoded = df.copy()
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})
correlations = df.corr()['diagnosis'].abs().sort_values(ascending=False)

# Plot 1: Diagnosis Distribution
axes[0, 0].pie(diag_counts.values, labels=diag_counts.index, autopct='%1.1f%%', startangle=90)
axes[0, 0].set_title('Diagnosis Distribution', fontsize=14, fontweight='bold')

# Plot 2: Top 10 Correlations
top_corr = correlations[1:11]
axes[0, 1].barh(range(len(top_corr)), top_corr.values)
axes[0, 1].set_yticks(range(len(top_corr)))
axes[0, 1].set_yticklabels(top_corr.index)
axes[0, 1].set_xlabel('Correlation with Diagnosis')
axes[0, 1].set_title('Top 10 Features by Correlation', fontsize=14, fontweight='bold')
axes[0, 1].invert_yaxis()

# Plot 3: Feature Groups Comparison
feature_groups = {
    'mean': [col for col in df.columns if '_mean' in col],
    'se': [col for col in df.columns if '_se' in col],
    'worst': [col for col in df.columns if '_worst' in col]
}
group_means = [df[feature_groups[g]].mean().mean() for g in ['mean', 'se', 'worst']]
axes[1, 0].bar(['Mean', 'SE', 'Worst'], group_means, color=['skyblue', 'lightgreen', 'salmon'])
axes[1, 0].set_ylabel('Average Value')
axes[1, 0].set_title('Feature Groups Average', fontsize=14, fontweight='bold')

# Plot 4: Box plot for key features
key_features = ['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean']
df_melted = df[key_features + ['diagnosis']].melt(id_vars='diagnosis', var_name='Feature', value_name='Value')
sns.boxplot(data=df_melted, x='Feature', y='Value', hue='diagnosis', ax=axes[1, 1])
axes[1, 1].set_title('Key Features Distribution by Diagnosis', fontsize=14, fontweight='bold')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('eda_analysis.png', dpi=300, bbox_inches='tight')
print("\n✓ EDA visualizations saved as 'eda_analysis.png'")

# Correlation heatmap for top features
plt.figure(figsize=(12, 10))
top_features = correlations[1:16].index.tolist() + ['diagnosis']
sns.heatmap(df[top_features].corr(), annot=True, fmt='.2f', cmap='coolwarm', center=0)
plt.title('Correlation Heatmap - Top 15 Features', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("✓ Correlation heatmap saved as 'correlation_heatmap.png'")



✓ EDA visualizations saved as 'eda_analysis.png'
✓ Correlation heatmap saved as 'correlation_heatmap.png'


In [16]:
#featuer engneering
#df_engineered = df.copy()

# 1. Ratio Features

df['area_radius_ratio'] = df['area_mean'] / (df['radius_mean'] + 1e-5)
df['perimeter_radius_ratio'] = df['perimeter_mean'] / (df['radius_mean'] + 1e-5)
df['area_perimeter_ratio'] = df['area_mean'] / (df['perimeter_mean'] + 1e-5)

# 2. Worst to Mean Ratios

mean_features = [col for col in df.columns if '_mean' in col]
for feature in mean_features:
    feature_name = feature.replace('_mean', '')
    worst_col = f'{feature_name}_worst'
    if worst_col in df.columns:
        df[f'{feature_name}_worst_mean_ratio'] = df[worst_col] / (df[feature] + 1e-5)

# 3. SE to Mean Ratios (Coefficient of Variation)

for feature in mean_features:
    feature_name = feature.replace('_mean', '')
    se_col = f'{feature_name}_se'
    if se_col in df.columns:
        df[f'{feature_name}_cv'] = df[se_col] / (df[feature] + 1e-5)

# 4. Aggregate Features by Groups

df['mean_group_avg'] = df[[col for col in df.columns if '_mean' in col]].mean(axis=1)
df['se_group_avg'] = df[[col for col in df.columns if '_se' in col]].mean(axis=1)
df['worst_group_avg'] = df[[col for col in df.columns if '_worst' in col]].mean(axis=1)

df['mean_group_std'] = df[[col for col in df.columns if '_mean' in col]].std(axis=1)
df['worst_group_std'] = df[[col for col in df.columns if '_worst' in col]].std(axis=1)

# 5. Interaction Features

df['concavity_compactness'] = df['concavity_mean'] * df['compactness_mean']
df['radius_texture_interaction'] = df['radius_mean'] * df['texture_mean']
df['area_smoothness_interaction'] = df['area_mean'] * df['smoothness_mean']
df['perimeter_concavity_interaction'] = df['perimeter_mean'] * df['concavity_mean']

# 6. Polynomial Features (for highly correlated features)

df['radius_mean_squared'] = df['radius_mean'] ** 2
df['area_mean_squared'] = df['area_mean'] ** 2
df['concavity_mean_squared'] = df['concavity_mean'] ** 2
df['concave_points_mean_squared'] = df['concave points_mean'] ** 2

# 7. Domain-Specific Features

# Tumor irregularity score
df['tumor_irregularity'] = (df['concavity_mean'] + df['concave points_mean'] + 
                                        df['compactness_mean']) / 3

# Size score
df['size_score'] = (df['radius_mean'] + df['perimeter_mean'] + df['area_mean']) / 3

# Texture complexity
df['texture_complexity'] = df['texture_mean'] * df['fractal_dimension_mean']

# Overall worst features score
worst_features = [col for col in df.columns if '_worst' in col]
df['worst_features_score'] = df[worst_features].mean(axis=1)

# 8. Log Transformations (for skewed features)

skewed_features = ['area_mean', 'area_se', 'area_worst', 'perimeter_mean', 'radius_mean']
for feature in skewed_features:
    if feature in df.columns:
        df[f'{feature}_log'] = np.log1p(df[feature])


In [17]:
df.columns

Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'area_radius_ratio',
       'perimeter_radius_ratio', 'area_perimeter_ratio',
       'radius_worst_mean_ratio', 'texture_worst_mean_ratio',
       'perimeter_worst_mean_ratio', 'area_worst_mean_ratio',
       'smoothness_worst_mean_ratio', 'compactness_worst_mean_ratio',
       'concavity_worst_mean_ratio', 'concave points_worst_mean_ratio',
       'symmetry_worst_mean_ratio',

In [18]:
#Modeling
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
models_config = {
    'Model_1_Default': {
        'penalty': 'l2',
        'C': 1.0,
        'solver': 'lbfgs',
        'max_iter': 1000
    },
    'Model_2_L1_Regularization': {
        'penalty': 'l1',
        'C': 1.0,
        'solver': 'liblinear',
        'max_iter': 1000
    },
    'Model_3_Strong_L2_Regularization': {
        'penalty': 'l2',
        'C': 0.01,
        'solver': 'lbfgs',
        'max_iter': 1000
    },
    'Model_4_Weak_L2_Regularization': {
        'penalty': 'l2',
        'C': 10.0,
        'solver': 'lbfgs',
        'max_iter': 1000
    },
    'Model_5_ElasticNet': {
        'penalty': 'elasticnet',
        'C': 1.0,
        'solver': 'saga',
        'l1_ratio': 0.5,
        'max_iter': 2000
    },
    'Model_6_No_Regularization': {
        'penalty': None,
        'C': 1.0,
        'solver': 'lbfgs',
        'max_iter': 1000
    },
    'Model_7_Newton_Solver': {
        'penalty': 'l2',
        'C': 1.0,
        'solver': 'newton-cg',
        'max_iter': 1000
    },
    'Model_8_SAG_Solver': {
        'penalty': 'l2',
        'C': 1.0,
        'solver': 'sag',
        'max_iter': 1000
    },
    'Model_9_Balanced_Class_Weight': {
        'penalty': 'l2',
        'C': 1.0,
        'solver': 'lbfgs',
        'class_weight': 'balanced',
        'max_iter': 1000
    },
    'Model_10_L1_Strong_Reg': {
        'penalty': 'l1',
        'C': 0.1,
        'solver': 'liblinear',
        'max_iter': 1000
    }
}
results = []
trained_models = {}
for model_name, params in models_config.items():
    print(f"Training {model_name}...")
    model = LogisticRegression(random_state=42, **params)
    model.fit(X_train_scaled, y_train)
    y_pred_train = model.predict(X_train_scaled)
    y_pred_test = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
    cv_scores = cross_val_score(model, X_train_scaled, y_train, 
                                cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
                                scoring='accuracy')
    results.append({
        'Model': model_name,
        'Train_Accuracy': accuracy_score(y_train, y_pred_train),
        'Test_Accuracy': accuracy_score(y_test, y_pred_test),
        'Precision': precision_score(y_test, y_pred_test),
        'Recall': recall_score(y_test, y_pred_test),
        'F1_Score': f1_score(y_test, y_pred_test),
        'ROC_AUC': roc_auc_score(y_test, y_pred_proba),
        'CV_Mean': cv_scores.mean(),
        'CV_Std': cv_scores.std(),
        'Params': str(params)
    })
    
    trained_models[model_name] = {
        'model': model,
        'y_pred': y_pred_test,
        'y_pred_proba': y_pred_proba
    }
    
    print(f"   ✓ Test Accuracy: {accuracy_score(y_test, y_pred_test):.4f}")



Training Model_1_Default...
   ✓ Test Accuracy: 0.9737
Training Model_2_L1_Regularization...
   ✓ Test Accuracy: 0.9737
Training Model_3_Strong_L2_Regularization...
   ✓ Test Accuracy: 0.9561
Training Model_4_Weak_L2_Regularization...
   ✓ Test Accuracy: 0.9474
Training Model_5_ElasticNet...
   ✓ Test Accuracy: 0.9737
Training Model_6_No_Regularization...
   ✓ Test Accuracy: 0.9211
Training Model_7_Newton_Solver...
   ✓ Test Accuracy: 0.9737
Training Model_8_SAG_Solver...
   ✓ Test Accuracy: 0.9737
Training Model_9_Balanced_Class_Weight...
   ✓ Test Accuracy: 0.9825
Training Model_10_L1_Strong_Reg...
   ✓ Test Accuracy: 0.9649


In [19]:
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('Test_Accuracy', ascending=False)

print("\n" + "="*80)
print("MODELS COMPARISON - SORTED BY TEST ACCURACY")
print("="*80)
print(results_df[['Model', 'Test_Accuracy', 'Precision', 'Recall', 'F1_Score', 'ROC_AUC']].to_string(index=False))

# 5. Find Best Model
best_model_name = results_df.iloc[0]['Model']
best_model_info = trained_models[best_model_name]
best_model = best_model_info['model']

print("\n" + "="*80)
print(f"BEST MODEL: {best_model_name}")
print("="*80)
print("\nDetailed Performance Metrics:")
print(classification_report(y_test, best_model_info['y_pred'], 
                          target_names=['Benign (0)', 'Malignant (1)']))



MODELS COMPARISON - SORTED BY TEST ACCURACY
                           Model  Test_Accuracy  Precision   Recall  F1_Score  ROC_AUC
   Model_9_Balanced_Class_Weight       0.982456   0.976190 0.976190  0.976190 0.995701
                 Model_1_Default       0.973684   0.975610 0.952381  0.963855 0.997024
       Model_2_L1_Regularization       0.973684   0.975610 0.952381  0.963855 0.997024
              Model_5_ElasticNet       0.973684   0.975610 0.952381  0.963855 0.996693
           Model_7_Newton_Solver       0.973684   0.975610 0.952381  0.963855 0.997024
              Model_8_SAG_Solver       0.973684   0.975610 0.952381  0.963855 0.997024
          Model_10_L1_Strong_Reg       0.964912   0.975000 0.928571  0.951220 0.997685
Model_3_Strong_L2_Regularization       0.956140   1.000000 0.880952  0.936709 0.999339
  Model_4_Weak_L2_Regularization       0.947368   0.928571 0.928571  0.928571 0.990741
       Model_6_No_Regularization       0.921053   0.923077 0.857143  0.888889 0.96792

In [20]:
# Confusion Matrix
cm = confusion_matrix(y_test, best_model_info['y_pred'])
print("\nConfusion Matrix:")
print(f"   True Negatives:  {cm[0, 0]}")
print(f"   False Positives: {cm[0, 1]}")
print(f"   False Negatives: {cm[1, 0]}")
print(f"   True Positives:  {cm[1, 1]}")





Confusion Matrix:
   True Negatives:  71
   False Positives: 1
   False Negatives: 1
   True Positives:  41


In [21]:
# GridSearchCV for Fine-tuning Best Model Type
print("\n" + "="*80)
print("GRID SEARCH - FINE-TUNING BEST PERFORMING CONFIGURATION")
print("="*80)
best_penalty = models_config[best_model_name].get('penalty', 'l2')

if best_penalty == 'elasticnet':
    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 10, 100],
        'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],
        'solver': ['saga'],
        'penalty': ['elasticnet'],
        'max_iter': [2000]
    }
elif best_penalty == 'l1':
    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 10, 100],
        'solver': ['liblinear', 'saga'],
        'penalty': ['l1'],
        'max_iter': [1000, 2000]
    }
else:
    param_grid = {
        'C': [0.001, 0.01, 0.1, 1, 10, 100],
        'solver': ['lbfgs', 'newton-cg', 'sag'],
        'penalty': ['l2'],
        'max_iter': [1000, 2000]
    }

print(f"\nSearching through {len(param_grid['C']) * len(param_grid['solver'])} combinations...")

grid_search = GridSearchCV(
    LogisticRegression(random_state=42),
    param_grid,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    scoring='accuracy',
    n_jobs=-1,
    verbose=0
)

grid_search.fit(X_train_scaled, y_train)

print(f"\n✓ Best Parameters: {grid_search.best_params_}")
print(f"✓ Best CV Score: {grid_search.best_score_:.4f}")



GRID SEARCH - FINE-TUNING BEST PERFORMING CONFIGURATION

Searching through 18 combinations...

✓ Best Parameters: {'C': 1, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}
✓ Best CV Score: 0.9714


In [22]:
# Evaluate grid search best model
best_grid_model = grid_search.best_estimator_
y_pred_grid = best_grid_model.predict(X_test_scaled)
y_pred_proba_grid = best_grid_model.predict_proba(X_test_scaled)[:, 1]

print("\nGrid Search Best Model Test Performance:")
print(f"   Accuracy:  {accuracy_score(y_test, y_pred_grid):.4f}")
print(f"   Precision: {precision_score(y_test, y_pred_grid):.4f}")
print(f"   Recall:    {recall_score(y_test, y_pred_grid):.4f}")
print(f"   F1-Score:  {f1_score(y_test, y_pred_grid):.4f}")
print(f"   ROC-AUC:   {roc_auc_score(y_test, y_pred_proba_grid):.4f}")



Grid Search Best Model Test Performance:
   Accuracy:  0.9737
   Precision: 0.9756
   Recall:    0.9524
   F1-Score:  0.9639
   ROC-AUC:   0.9970


In [23]:
# Visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Plot 1: Model Comparison - Test Accuracy
axes[0, 0].barh(results_df['Model'], results_df['Test_Accuracy'], color='skyblue')
axes[0, 0].set_xlabel('Test Accuracy')
axes[0, 0].set_title('Model Comparison - Test Accuracy', fontweight='bold')
axes[0, 0].axvline(x=results_df['Test_Accuracy'].max(), color='red', linestyle='--', linewidth=1)

# Plot 2: Precision vs Recall
axes[0, 1].scatter(results_df['Recall'], results_df['Precision'], s=100, alpha=0.6)
for i, model in enumerate(results_df['Model']):
    axes[0, 1].annotate(f"M{i+1}", (results_df['Recall'].iloc[i], results_df['Precision'].iloc[i]))
axes[0, 1].set_xlabel('Recall')
axes[0, 1].set_ylabel('Precision')
axes[0, 1].set_title('Precision vs Recall Trade-off', fontweight='bold')
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: F1-Score Comparison
axes[0, 2].bar(range(len(results_df)), results_df['F1_Score'], color='lightgreen')
axes[0, 2].set_xticks(range(len(results_df)))
axes[0, 2].set_xticklabels([f"M{i+1}" for i in range(len(results_df))], rotation=45)
axes[0, 2].set_ylabel('F1-Score')
axes[0, 2].set_title('F1-Score Comparison', fontweight='bold')

# Plot 4: ROC Curves for Top 3 Models
for i, model_name in enumerate(results_df['Model'].head(3)):
    fpr, tpr, _ = roc_curve(y_test, trained_models[model_name]['y_pred_proba'])
    auc = results_df[results_df['Model'] == model_name]['ROC_AUC'].values[0]
    axes[1, 0].plot(fpr, tpr, label=f'{model_name} (AUC={auc:.3f})')
axes[1, 0].plot([0, 1], [0, 1], 'k--', label='Random')
axes[1, 0].set_xlabel('False Positive Rate')
axes[1, 0].set_ylabel('True Positive Rate')
axes[1, 0].set_title('ROC Curves - Top 3 Models', fontweight='bold')
axes[1, 0].legend(fontsize=8)
axes[1, 0].grid(True, alpha=0.3)

# Plot 5: Confusion Matrix - Best Model
cm = confusion_matrix(y_test, best_model_info['y_pred'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 1])
axes[1, 1].set_xlabel('Predicted')
axes[1, 1].set_ylabel('Actual')
axes[1, 1].set_title(f'Confusion Matrix - {best_model_name}', fontweight='bold')

# Plot 6: Train vs Test Accuracy
x_pos = np.arange(len(results_df))
width = 0.35
axes[1, 2].bar(x_pos - width/2, results_df['Train_Accuracy'], width, label='Train', alpha=0.8)
axes[1, 2].bar(x_pos + width/2, results_df['Test_Accuracy'], width, label='Test', alpha=0.8)
axes[1, 2].set_xticks(x_pos)
axes[1, 2].set_xticklabels([f"M{i+1}" for i in range(len(results_df))], rotation=45)
axes[1, 2].set_ylabel('Accuracy')
axes[1, 2].set_title('Train vs Test Accuracy (Overfitting Check)', fontweight='bold')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('logistic_regression_comparison.png', dpi=300, bbox_inches='tight')
print("\n✓ Visualizations saved as 'logistic_regression_comparison.png'")


✓ Visualizations saved as 'logistic_regression_comparison.png'


In [24]:
# Feature Importance (Top 20)
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': np.abs(best_grid_model.coef_[0])
}).sort_values('Coefficient', ascending=False)

print(feature_importance.head(20).to_string(index=False))

# Plot feature importance
plt.figure(figsize=(10, 8))
top_features = feature_importance.head(20)
plt.barh(range(len(top_features)), top_features['Coefficient'])
plt.yticks(range(len(top_features)), top_features['Feature'])
plt.xlabel('Absolute Coefficient Value')
plt.title('Top 20 Most Important Features', fontweight='bold', fontsize=14)
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
print("\n✓ Feature importance plot saved as 'feature_importance.png'")



                        Feature  Coefficient
                        area_cv     0.906403
              concave points_se     0.813186
                  texture_worst     0.788503
            concave points_mean     0.730461
       texture_worst_mean_ratio     0.707673
                   texture_mean     0.682469
                      radius_cv     0.615000
                concavity_worst     0.608330
           concave points_worst     0.599559
     concavity_worst_mean_ratio     0.595926
     radius_texture_interaction     0.590201
          area_worst_mean_ratio     0.586283
    concave_points_mean_squared     0.544223
           fractal_dimension_se     0.538753
                 compactness_se     0.533036
perimeter_concavity_interaction     0.517150
                 symmetry_worst     0.515961
                 concavity_mean     0.504223
               compactness_mean     0.498842
                      radius_se     0.417437

✓ Feature importance plot saved as 'feature_importance

In [25]:
#  Save Results
results_df.to_csv('model_comparison_results.csv', index=False)
print("\n✓ Results saved as 'model_comparison_results.csv'")


✓ Results saved as 'model_comparison_results.csv'


In [26]:
import pickle
with open('best_logistic_model.pkl', 'wb') as f:
    pickle.dump(best_grid_model, f)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print("✓ Best model saved as 'best_logistic_model.pkl'")
print("✓ Scaler saved as 'scaler.pkl'")

✓ Best model saved as 'best_logistic_model.pkl'
✓ Scaler saved as 'scaler.pkl'
