# ALS Clinical Trial Success Prediction - Example Usage

This notebook demonstrates how to use the ALS trial success prediction model.

In [None]:
import pandas as pd
import numpy as np
from als_trial_success_model import ALSTrialSuccessPredictor
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Load and Train Model

In [None]:
# Initialize predictor
predictor = ALSTrialSuccessPredictor(random_state=42)

# Load your data
df = pd.read_csv('comprehensive_merged_trial_data.csv')
target_col = 'reached_phase_3_plus'

# Prepare features
X, y = predictor.prepare_features(df, target_col)
print(f"Feature matrix shape: {X.shape}")
print(f"Target distribution: {y.value_counts().to_dict()}")

In [None]:
# Train model
metrics = predictor.train(X, y)

print("Model Performance:")
print(f"ROC-AUC: {metrics['roc_auc']:.3f}")
print(f"PR-AUC: {metrics['pr_auc']:.3f}")
print(f"\nOptimal Threshold: {predictor.optimal_threshold:.3f}")
print(f"Recall: {metrics['optimal_recall']:.3f}")
print(f"Precision: {metrics['optimal_precision']:.3f}")

## 2. Feature Importance Analysis

In [None]:
# Get top features
importance_df = predictor.get_feature_importance(top_n=15)

# Plot feature importance
plt.figure(figsize=(10, 8))
plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
plt.xlabel('Importance Score')
plt.title('Top 15 Most Important Features for ALS Trial Success')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## 3. Make Predictions on New Trials

In [None]:
# Example: Create a hypothetical trial
new_trial = pd.DataFrame({
    'Target Accrual': [150],
    'Enrollment Duration (Mos.)': [18],
    'Treatment Duration (Mos.)': [12],
    'endpoint_survival': [1],
    'endpoint_efficacy': [3],
    'endpoint_safety': [2],
    'sponsor_trial_count': [15],
    'sponsor_is_major_pharma': [1],
    'biomarker_total_uses': [2],
    'population_description_length': [800],
    'supporting_url_count': [12]
})

# Fill missing features with defaults
for feature in predictor.feature_names:
    if feature not in new_trial.columns:
        new_trial[feature] = 0

# Make prediction
probability = predictor.predict_proba(new_trial)[0, 1]
prediction = predictor.predict(new_trial, use_optimal_threshold=True)[0]

print(f"Success Probability: {probability:.1%}")
print(f"Prediction: {'Success' if prediction else 'Failure'}")

## 4. Batch Predictions

In [None]:
# Make predictions on the test set
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Get predictions
test_proba = predictor.predict_proba(X_test)[:, 1]
test_pred = predictor.predict(X_test, use_optimal_threshold=True)

# Create results dataframe
results = pd.DataFrame({
    'actual': y_test,
    'probability': test_proba,
    'prediction': test_pred
})

# Show some examples
print("Sample Predictions:")
print(results.head(10))

## 5. Analyze Prediction Distribution

In [None]:
# Plot prediction distribution
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(results[results['actual'] == 0]['probability'], bins=30, alpha=0.7, label='Failed Trials', color='red')
plt.hist(results[results['actual'] == 1]['probability'], bins=30, alpha=0.7, label='Successful Trials', color='green')
plt.axvline(predictor.optimal_threshold, color='black', linestyle='--', label=f'Threshold ({predictor.optimal_threshold:.3f})')
plt.xlabel('Predicted Probability')
plt.ylabel('Count')
plt.title('Distribution of Predicted Probabilities')
plt.legend()

plt.subplot(1, 2, 2)
confusion_matrix = pd.crosstab(results['actual'], results['prediction'], 
                              rownames=['Actual'], colnames=['Predicted'])
sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')

plt.tight_layout()
plt.show()

## 6. Get Recommendations

In [None]:
# Display recommendations
print("Recommendations for Maximizing ALS Trial Success:")
print("=" * 60)

for rec in predictor.get_recommendations():
    print(f"\n{rec}")

## 7. Save Model for Future Use

In [None]:
# Save the trained model
predictor.save_model('als_trial_success_model.pkl')

# Later, you can load it:
# new_predictor = ALSTrialSuccessPredictor()
# new_predictor.load_model('als_trial_success_model.pkl')