# Chapter 04 ‚Äî Logistic Regression: When the Answer is Yes or No

> **Book**: Machine Learning For Dentists: From Torque To Tensors

---

## Learning Objectives

By the end of this codelab, you will be able to:

1. **Explain** how logistic regression transforms a linear score into a probability
2. **Train** a logistic regression model using scikit-learn
3. **Interpret** weights as odds ratios with clinical meaning
4. **Evaluate** classification models using accuracy, precision, recall, F1, and ROC-AUC
5. **Choose** appropriate decision thresholds based on clinical costs

---


## Phase 1: Setup and Data Loading

Let's set up our environment with the Periospot brand colors and load the implant success dataset.


In [None]:
# Core imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Scikit-learn imports
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, confusion_matrix, classification_report,
    precision_recall_curve, average_precision_score
)

# Set random seed for reproducibility
np.random.seed(42)

# Periospot brand colors
PERIOSPOT_COLORS = {
    'periospot_blue': '#15365a',
    'mystic_blue': '#003049',
    'periospot_red': '#6c1410',
    'crimson_blaze': '#a92a2a',
    'vanilla_cream': '#f7f0da',
    'black': '#000000',
    'white': '#ffffff'
}

# Configure matplotlib with brand styling
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 12,
    'axes.titlesize': 16,
    'axes.labelsize': 12,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'figure.facecolor': 'white',
    'axes.facecolor': 'white',
    'axes.edgecolor': PERIOSPOT_COLORS['periospot_blue'],
    'axes.labelcolor': PERIOSPOT_COLORS['mystic_blue'],
    'xtick.color': PERIOSPOT_COLORS['mystic_blue'],
    'ytick.color': PERIOSPOT_COLORS['mystic_blue'],
    'text.color': PERIOSPOT_COLORS['black']
})

# Create figures directory if it doesn't exist
Path('figures').mkdir(exist_ok=True)

print("‚úÖ Setup complete!")
print(f"\nüìä Periospot brand colors loaded: {list(PERIOSPOT_COLORS.keys())}")


In [None]:
# Load the implant success dataset
df = pd.read_csv('data/implant_success_data_training.csv')

print(f"üìä Dataset loaded: {df.shape[0]} implant cases, {df.shape[1]} columns")
print(f"\nüéØ Target variable: 'success' (1 = success, 0 = failure)")
print(f"\nüìã Columns: {list(df.columns)}")
df.head()


## Phase 2: Exploratory Data Analysis

Before building our model, let's understand the data ‚Äî especially the **class distribution** (balance between success and failure cases).


In [None]:
# Basic statistics
print("üìä Dataset Summary Statistics:\n")
df.describe().round(2)


In [None]:
# Class distribution - critical for classification problems!
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Count plot
class_counts = df['success'].value_counts()
colors = [PERIOSPOT_COLORS['crimson_blaze'], PERIOSPOT_COLORS['periospot_blue']]
bars = axes[0].bar(['Failure (0)', 'Success (1)'], 
                   [class_counts[0], class_counts[1]], 
                   color=colors, edgecolor='white', linewidth=2)
axes[0].set_ylabel('Count')
axes[0].set_title('Class Distribution: Success vs Failure')

# Add count labels
for bar, count in zip(bars, [class_counts[0], class_counts[1]]):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5,
                 f'{count}', ha='center', fontsize=14, fontweight='bold')

# Pie chart
axes[1].pie([class_counts[0], class_counts[1]], 
            labels=['Failure', 'Success'],
            colors=colors,
            autopct='%1.1f%%',
            explode=(0.02, 0.02),
            startangle=90,
            textprops={'fontsize': 12})
axes[1].set_title('Class Proportions')

plt.tight_layout()
plt.savefig('figures/01_class_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nüìä Class Balance:")
print(f"   Failures: {class_counts[0]} ({class_counts[0]/len(df)*100:.1f}%)")
print(f"   Successes: {class_counts[1]} ({class_counts[1]/len(df)*100:.1f}%)")


In [None]:
# Feature distributions by outcome
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

features_to_plot = [
    ('insertion_torque_ncm', 'Insertion Torque (Ncm)'),
    ('isq_placement', 'ISQ at Placement'),
    ('hounsfield_units', 'Bone Density (HU)'),
    ('age', 'Patient Age'),
    ('implant_length_mm', 'Implant Length (mm)'),
    ('implant_diameter_mm', 'Implant Diameter (mm)')
]

for ax, (feature, label) in zip(axes.flatten(), features_to_plot):
    # Separate by outcome
    failures = df[df['success'] == 0][feature]
    successes = df[df['success'] == 1][feature]
    
    ax.hist(failures, bins=20, alpha=0.6, color=PERIOSPOT_COLORS['crimson_blaze'],
            label=f'Failure (n={len(failures)})', edgecolor='white')
    ax.hist(successes, bins=20, alpha=0.6, color=PERIOSPOT_COLORS['periospot_blue'],
            label=f'Success (n={len(successes)})', edgecolor='white')
    
    ax.set_xlabel(label)
    ax.set_ylabel('Count')
    ax.legend(loc='upper right', fontsize=9)
    ax.set_title(f'{label} by Outcome')

plt.tight_layout()
plt.savefig('figures/01b_feature_distributions.png', dpi=150, bbox_inches='tight')
plt.show()


## Phase 3: Understanding the Sigmoid Function

The **sigmoid function** is the heart of logistic regression. It transforms any real number into a probability between 0 and 1.

$$\sigma(z) = \frac{1}{1 + e^{-z}}$$


In [None]:
# Visualize the sigmoid function
def sigmoid(z):
    """The sigmoid function: squashes any number into [0, 1]"""
    return 1 / (1 + np.exp(-z))

# Create z values from -10 to 10
z = np.linspace(-10, 10, 200)
p = sigmoid(z)

fig, ax = plt.subplots(figsize=(12, 6))

# Main sigmoid curve
ax.plot(z, p, color=PERIOSPOT_COLORS['periospot_blue'], linewidth=3, label='œÉ(z) = 1/(1+e‚Åª·∂ª)')

# Reference lines
ax.axhline(0.5, color=PERIOSPOT_COLORS['crimson_blaze'], linestyle='--', alpha=0.7, label='p = 0.5 (decision boundary)')
ax.axvline(0, color=PERIOSPOT_COLORS['mystic_blue'], linestyle=':', alpha=0.7, label='z = 0')

# Shade regions
ax.fill_between(z, 0, p, where=(z < 0), alpha=0.2, color=PERIOSPOT_COLORS['crimson_blaze'], label='Predict Failure (p < 0.5)')
ax.fill_between(z, 0, p, where=(z >= 0), alpha=0.2, color=PERIOSPOT_COLORS['periospot_blue'], label='Predict Success (p ‚â• 0.5)')

# Mark key points
key_z = [-5, -2, 0, 2, 5]
for z_val in key_z:
    p_val = sigmoid(z_val)
    ax.plot(z_val, p_val, 'o', color=PERIOSPOT_COLORS['mystic_blue'], markersize=10)
    ax.annotate(f'({z_val}, {p_val:.2f})', (z_val, p_val), 
                textcoords="offset points", xytext=(0, 15), ha='center', fontsize=9)

ax.set_xlabel('z (linear score = w¬∑x + b)', fontsize=12)
ax.set_ylabel('œÉ(z) = Probability', fontsize=12)
ax.set_title('The Sigmoid Function: Transforming Scores to Probabilities', fontsize=14, fontweight='bold')
ax.legend(loc='upper left', fontsize=10)
ax.set_xlim(-10, 10)
ax.set_ylim(-0.05, 1.05)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('figures/02_sigmoid_function.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nüìù Key Properties of the Sigmoid:")
print("   ‚Ä¢ Output is always between 0 and 1")
print("   ‚Ä¢ When z = 0, œÉ(z) = 0.5 (maximum uncertainty)")
print("   ‚Ä¢ Very negative z ‚Üí probability close to 0")
print("   ‚Ä¢ Very positive z ‚Üí probability close to 1")


## Phase 4: Data Preparation

Before training, we need to:
1. Select features and target
2. Split into train/test sets
3. Scale features (important for logistic regression!)


In [None]:
# Select features for the model
feature_columns = [
    'insertion_torque_ncm',
    'isq_placement',
    'hounsfield_units',
    'age',
    'smoking_status',
    'diabetes_status',
    'implant_length_mm',
    'implant_diameter_mm'
]

X = df[feature_columns]
y = df['success']

print(f"üìä Features shape: {X.shape}")
print(f"üéØ Target shape: {y.shape}")
print(f"\nüìã Features: {feature_columns}")


In [None]:
# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"üìä Training set: {X_train.shape[0]} samples")
print(f"üìä Test set: {X_test.shape[0]} samples")
print(f"\n‚úÖ Stratified split ensures class balance is preserved:")
print(f"   Train success rate: {y_train.mean():.1%}")
print(f"   Test success rate: {y_test.mean():.1%}")


In [None]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert back to DataFrame for interpretability
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=feature_columns, index=X_train.index)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=feature_columns, index=X_test.index)

print("‚úÖ Features scaled using StandardScaler")
print("\nüìä Scaled feature statistics (should be mean‚âà0, std‚âà1):")
print(X_train_scaled_df.describe().round(3).loc[['mean', 'std']])


## Phase 5: Training the Logistic Regression Model

Now we train the model using scikit-learn's `LogisticRegression`.


In [None]:
# Train logistic regression model
model = LogisticRegression(
    penalty='l2',           # L2 regularization (prevents overfitting)
    C=1.0,                  # Regularization strength (inverse)
    solver='lbfgs',         # Optimization algorithm
    max_iter=1000,          # Maximum iterations
    random_state=42
)

model.fit(X_train_scaled, y_train)

print("‚úÖ Model trained successfully!")
print(f"\nüìä Model converged in optimization")


In [None]:
# Extract and display learned weights
weights = model.coef_[0]
intercept = model.intercept_[0]

print("üìä Learned Model Parameters:\n")
print(f"{'Feature':<25} {'Weight':>10} {'Odds Ratio':>12}")
print("-" * 50)
for feature, weight in zip(feature_columns, weights):
    odds_ratio = np.exp(weight)
    print(f"{feature:<25} {weight:>10.4f} {odds_ratio:>12.4f}")
print("-" * 50)
print(f"{'Intercept (b)':<25} {intercept:>10.4f}")


In [None]:
# Visualize odds ratios with confidence context
fig, ax = plt.subplots(figsize=(12, 7))

# Calculate odds ratios
odds_ratios = np.exp(weights)
sorted_idx = np.argsort(odds_ratios)

# Create horizontal bar chart
y_pos = np.arange(len(feature_columns))
colors = [PERIOSPOT_COLORS['crimson_blaze'] if odds_ratios[i] < 1 
          else PERIOSPOT_COLORS['periospot_blue'] for i in sorted_idx]

bars = ax.barh(y_pos, odds_ratios[sorted_idx], color=colors, edgecolor='white', linewidth=2)
ax.set_yticks(y_pos)
ax.set_yticklabels([feature_columns[i] for i in sorted_idx])
ax.axvline(1.0, color=PERIOSPOT_COLORS['mystic_blue'], linestyle='--', linewidth=2, label='OR = 1 (no effect)')

# Add value labels
for bar, idx in zip(bars, sorted_idx):
    width = bar.get_width()
    x_pos = width + 0.02 if width > 1 else width - 0.1
    ax.text(x_pos, bar.get_y() + bar.get_height()/2,
            f'{odds_ratios[idx]:.3f}', va='center', fontsize=10, fontweight='bold')

ax.set_xlabel('Odds Ratio', fontsize=12)
ax.set_title('Feature Importance: Odds Ratios\n(OR > 1 increases success odds, OR < 1 decreases)', 
             fontsize=14, fontweight='bold')
ax.legend(loc='upper right')

# Add interpretation text
ax.text(0.02, 0.98, 'Red = Decreases success odds\nBlue = Increases success odds',
        transform=ax.transAxes, fontsize=10, verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor=PERIOSPOT_COLORS['vanilla_cream'], alpha=0.8))

plt.tight_layout()
plt.savefig('figures/03_odds_ratios.png', dpi=150, bbox_inches='tight')
plt.show()


## Phase 6: Model Evaluation

For classification, we use different metrics than regression:
- **Accuracy**: % of correct predictions
- **Precision**: Of predicted successes, how many were actual successes?
- **Recall (Sensitivity)**: Of actual successes, how many did we catch?
- **F1 Score**: Harmonic mean of precision and recall
- **ROC-AUC**: Area under the ROC curve (model's discrimination ability)


In [None]:
# Make predictions
y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

# Predicted probabilities (for ROC curve)
y_prob_train = model.predict_proba(X_train_scaled)[:, 1]
y_prob_test = model.predict_proba(X_test_scaled)[:, 1]

print("‚úÖ Predictions made!")
print(f"\nüìä Sample predictions (first 10 test cases):")
sample_results = pd.DataFrame({
    'Actual': y_test.values[:10],
    'Predicted': y_pred_test[:10],
    'Probability': y_prob_test[:10].round(3)
})
print(sample_results.to_string(index=False))


In [None]:
# Calculate all metrics
metrics = {
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC-AUC'],
    'Training': [
        accuracy_score(y_train, y_pred_train),
        precision_score(y_train, y_pred_train),
        recall_score(y_train, y_pred_train),
        f1_score(y_train, y_pred_train),
        roc_auc_score(y_train, y_prob_train)
    ],
    'Test': [
        accuracy_score(y_test, y_pred_test),
        precision_score(y_test, y_pred_test),
        recall_score(y_test, y_pred_test),
        f1_score(y_test, y_pred_test),
        roc_auc_score(y_test, y_prob_test)
    ]
}

metrics_df = pd.DataFrame(metrics)
metrics_df['Training'] = metrics_df['Training'].round(4)
metrics_df['Test'] = metrics_df['Test'].round(4)

print("üìä Model Performance Metrics:\n")
print(metrics_df.to_string(index=False))


In [None]:
# ROC Curve
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# ROC Curve
fpr_train, tpr_train, _ = roc_curve(y_train, y_prob_train)
fpr_test, tpr_test, _ = roc_curve(y_test, y_prob_test)

auc_train = roc_auc_score(y_train, y_prob_train)
auc_test = roc_auc_score(y_test, y_prob_test)

axes[0].plot(fpr_train, tpr_train, color=PERIOSPOT_COLORS['mystic_blue'], 
             linewidth=2, label=f'Training (AUC = {auc_train:.3f})')
axes[0].plot(fpr_test, tpr_test, color=PERIOSPOT_COLORS['crimson_blaze'], 
             linewidth=2, label=f'Test (AUC = {auc_test:.3f})')
axes[0].plot([0, 1], [0, 1], 'k--', linewidth=1, label='Random Classifier')

axes[0].fill_between(fpr_test, 0, tpr_test, alpha=0.2, color=PERIOSPOT_COLORS['crimson_blaze'])
axes[0].set_xlabel('False Positive Rate (1 - Specificity)')
axes[0].set_ylabel('True Positive Rate (Sensitivity)')
axes[0].set_title('ROC Curve: Model Discrimination', fontweight='bold')
axes[0].legend(loc='lower right')
axes[0].grid(True, alpha=0.3)

# Precision-Recall Curve
precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_prob_test)
ap = average_precision_score(y_test, y_prob_test)

axes[1].plot(recall_curve, precision_curve, color=PERIOSPOT_COLORS['periospot_blue'], 
             linewidth=2, label=f'PR Curve (AP = {ap:.3f})')
axes[1].axhline(y_test.mean(), color=PERIOSPOT_COLORS['crimson_blaze'], 
                linestyle='--', label=f'Baseline (prevalence = {y_test.mean():.2f})')
axes[1].fill_between(recall_curve, 0, precision_curve, alpha=0.2, color=PERIOSPOT_COLORS['periospot_blue'])
axes[1].set_xlabel('Recall (Sensitivity)')
axes[1].set_ylabel('Precision')
axes[1].set_title('Precision-Recall Curve', fontweight='bold')
axes[1].legend(loc='lower left')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('figures/04_roc_curve.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nüìä AUC Interpretation:")
print(f"   ‚Ä¢ AUC = 0.5: Random guessing")
print(f"   ‚Ä¢ AUC = 0.7-0.8: Acceptable discrimination")
print(f"   ‚Ä¢ AUC = 0.8-0.9: Excellent discrimination")
print(f"   ‚Ä¢ AUC > 0.9: Outstanding discrimination")
print(f"\n   Our model: AUC = {auc_test:.3f}")


## Phase 7: Threshold Analysis

By default, we classify as "success" when P(success) > 0.5. But this threshold can be adjusted based on clinical costs!


In [None]:
# Analyze different thresholds
thresholds = np.arange(0.1, 0.95, 0.05)

threshold_metrics = []
for thresh in thresholds:
    y_pred_thresh = (y_prob_test >= thresh).astype(int)
    threshold_metrics.append({
        'Threshold': thresh,
        'Accuracy': accuracy_score(y_test, y_pred_thresh),
        'Precision': precision_score(y_test, y_pred_thresh, zero_division=0),
        'Recall': recall_score(y_test, y_pred_thresh, zero_division=0),
        'F1': f1_score(y_test, y_pred_thresh, zero_division=0)
    })

threshold_df = pd.DataFrame(threshold_metrics)

# Plot threshold analysis
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Metrics vs Threshold
axes[0].plot(threshold_df['Threshold'], threshold_df['Accuracy'], 
             label='Accuracy', linewidth=2, color=PERIOSPOT_COLORS['periospot_blue'])
axes[0].plot(threshold_df['Threshold'], threshold_df['Precision'], 
             label='Precision', linewidth=2, color=PERIOSPOT_COLORS['crimson_blaze'])
axes[0].plot(threshold_df['Threshold'], threshold_df['Recall'], 
             label='Recall', linewidth=2, color=PERIOSPOT_COLORS['mystic_blue'])
axes[0].plot(threshold_df['Threshold'], threshold_df['F1'], 
             label='F1 Score', linewidth=2, linestyle='--', color='black')

axes[0].axvline(0.5, color='gray', linestyle=':', alpha=0.7, label='Default threshold (0.5)')
axes[0].set_xlabel('Classification Threshold')
axes[0].set_ylabel('Score')
axes[0].set_title('Metrics vs. Threshold', fontweight='bold')
axes[0].legend(loc='lower center')
axes[0].grid(True, alpha=0.3)
axes[0].set_xlim(0.1, 0.9)

# Precision-Recall Tradeoff
axes[1].plot(threshold_df['Recall'], threshold_df['Precision'], 
             'o-', color=PERIOSPOT_COLORS['periospot_blue'], linewidth=2, markersize=6)

# Mark specific thresholds
for thresh in [0.3, 0.5, 0.7]:
    row = threshold_df[threshold_df['Threshold'].round(2) == thresh].iloc[0]
    color = 'green' if thresh == 0.5 else PERIOSPOT_COLORS['crimson_blaze']
    axes[1].annotate(f't={thresh}', (row['Recall'], row['Precision']),
                     textcoords="offset points", xytext=(10, 5), fontsize=10, color=color)

axes[1].set_xlabel('Recall (Sensitivity)')
axes[1].set_ylabel('Precision')
axes[1].set_title('Precision-Recall Tradeoff at Different Thresholds', fontweight='bold')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('figures/05_threshold_analysis.png', dpi=150, bbox_inches='tight')
plt.show()


In [None]:
# Confusion matrices at different thresholds
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

thresholds_to_show = [0.3, 0.5, 0.7]
titles = ['Conservative (t=0.3)\nFavor catching successes', 
          'Default (t=0.5)\nBalanced', 
          'Strict (t=0.7)\nFavor confidence']

for ax, thresh, title in zip(axes, thresholds_to_show, titles):
    y_pred_thresh = (y_prob_test >= thresh).astype(int)
    cm = confusion_matrix(y_test, y_pred_thresh)
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                xticklabels=['Pred Failure', 'Pred Success'],
                yticklabels=['Actual Failure', 'Actual Success'],
                annot_kws={'size': 14})
    ax.set_title(title, fontweight='bold')

plt.tight_layout()
plt.savefig('figures/06_confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()


## Phase 8: Making a Prediction (Dr. Marco's Patient)

Let's use our trained model to predict the success probability for Dr. Marco's patient from the chapter scenario.


In [None]:
# Dr. Marco's patient data
marcos_patient = pd.DataFrame([{
    'insertion_torque_ncm': 32,
    'isq_placement': 68,
    'hounsfield_units': 650,  # Reasonable bone density
    'age': 45,
    'smoking_status': 0,      # Non-smoker
    'diabetes_status': 0,     # No diabetes
    'implant_length_mm': 10,
    'implant_diameter_mm': 4.0
}])

# Scale the features
marcos_patient_scaled = scaler.transform(marcos_patient)

# Make prediction
prob_success = model.predict_proba(marcos_patient_scaled)[0, 1]
prediction = 'Success' if prob_success >= 0.5 else 'Failure'

print("ü¶∑ Dr. Marco's Patient Prediction\n")
print("Patient Features:")
for col in marcos_patient.columns:
    print(f"   {col}: {marcos_patient[col].values[0]}")

print(f"\nüìä Model Prediction:")
print(f"   Probability of Success: {prob_success:.1%}")
print(f"   Classification (at 0.5 threshold): {prediction}")

print(f"\nüí° Clinical Interpretation:")
if prob_success >= 0.7:
    print("   ‚Üí High confidence in success. Immediate loading reasonable.")
elif prob_success >= 0.5:
    print("   ‚Üí Borderline case. Consider patient factors and preferences.")
else:
    print("   ‚Üí Lower confidence. Consider waiting or additional assessment.")


## Phase 9: Summary and Key Takeaways

Let's summarize what we learned in this codelab.


In [None]:
# Final summary
print("="*60)
print("üìö LOGISTIC REGRESSION CODELAB SUMMARY")
print("="*60)

print("\nüìä Dataset:")
print(f"   ‚Ä¢ {len(df)} implant cases")
print(f"   ‚Ä¢ Success rate: {df['success'].mean():.1%}")
print(f"   ‚Ä¢ {len(feature_columns)} features used")

print("\nüéØ Model Performance (Test Set):")
print(f"   ‚Ä¢ Accuracy: {accuracy_score(y_test, y_pred_test):.1%}")
print(f"   ‚Ä¢ ROC-AUC: {roc_auc_score(y_test, y_prob_test):.3f}")
print(f"   ‚Ä¢ F1 Score: {f1_score(y_test, y_pred_test):.3f}")

print("\nüîë Top 3 Predictors (by |weight|):")
weight_importance = sorted(zip(feature_columns, np.abs(weights)), key=lambda x: x[1], reverse=True)
for feature, importance in weight_importance[:3]:
    idx = feature_columns.index(feature)
    direction = "‚Üë success" if weights[idx] > 0 else "‚Üì success"
    print(f"   ‚Ä¢ {feature}: OR = {np.exp(weights[idx]):.3f} ({direction})")

print("\nüí° Key Learnings:")
print("   1. Sigmoid transforms linear scores into probabilities")
print("   2. Weights ‚Üí Odds Ratios for clinical interpretation")
print("   3. Threshold choice depends on clinical costs")
print("   4. ROC-AUC measures discrimination ability")
print("   5. Class imbalance requires careful evaluation")

print("\n" + "="*60)


In [None]:
# List generated figures
import os

print("üìÅ Generated Figures:")
for f in sorted(os.listdir('figures')):
    if f.endswith('.png'):
        print(f"   ‚Ä¢ figures/{f}")


---

## üßò Reflection Log

### What did you learn in this session?
- How logistic regression transforms linear scores into probabilities using the sigmoid function
- How to interpret model weights as odds ratios for clinical meaning
- The importance of choosing appropriate thresholds based on clinical costs
- How to evaluate classification models using multiple metrics (accuracy, precision, recall, F1, ROC-AUC)

### How will this improve Periospot AI?
- Provides a foundation for binary classification tasks (success/failure, disease/healthy)
- Enables probabilistic predictions that inform clinical decision-making
- Demonstrates interpretable ML that clinicians can trust and explain

---

*Questions or feedback? Open an issue on the book's GitHub repository or reach out on Twitter @cisco_research*
