In [None]:
# Migraine Prediction ML Project - Complete Implementation
# =====================================================

# Import all necessary libraries

from sklearn.feature_selection import RFE, SelectKBest, chi2, f_classif
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Configure plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:

# =============================================================================
## Step 2: Exploratory Data Analysis (EDA)
print(f"\n\n🔍 Step 2: Exploratory Data Analysis")
print("-" * 50)

# Create visualization function
def create_eda_plots():







# =============================================================================

print(f"\n\n🔧 Step 3: Feature Engineering")
print("-" * 50)

def create_engineered_features(df):
    """Create new features based on domain knowledge"""


    # Exercise consistency (had exercise in last 3 days)
    df_eng['recent_exercise'] = (df_eng.groupby('user_id')['exercise_minutes'].rolling(window=3, min_periods=1).sum().values > 0).astype(int)
    
    return df_eng

# Apply feature engineering
df_engineered = create_engineered_features(df)

print(f"✅ Feature engineering completed!")
print(f"Original features: {df.shape[1]}")
print(f"Engineered features: {df_engineered.shape[1]}")



#

# =============================================================================
# PHASE 2: DATA PREPROCESSING
# =============================================================================

#
print(f"\n\n🧹 Step 4: Data Cleaning and Validation")
print("-" * 50)




#
print(f"\n\n🔄 Step 5: Train-Test Split Strategy")
print("-" * 50)





# =============================================================================

# =============================================================================

#
print(f"\n\n🤖 Step 6: Baseline Models")
print("-" * 50)




#
print(f"\n\n🚀 Step 7: Advanced Models")
print("-" * 50)


# Combine all results
all_results = baseline_results + advanced_results
all_results_df = pd.DataFrame(all_results)

print(f"\n📊 All Models Comparison:")
print(all_results_df.round(3))

# Find best model based on F1 score (balanced metric)
best_model_idx = all_results_df['Val_F1'].idxmax()
best_model_name = all_results_df.loc[best_model_idx, 'Model']
print(f"\n🏆 Best Model: {best_model_name} (F1: {all_results_df.loc[best_model_idx, 'Val_F1']:.3f})")

# =============================================================================
# PHASE 4: MODEL OPTIMIZATION
# =============================================================================

## Step 9: Hyperparameter Tuning
print(f"\n\n⚙️ Step 9: Hyperparameter Tuning")
print("-" * 50)

# Hyperparameter tuning for the best performing models
def tune_random_forest(X_train, y_train, X_val, y_val):
    """Tune Random Forest hyperparameters"""
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [5, 10, 15, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    
    rf = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='f1', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    return grid_search.best_estimator_, grid_search.best_params_

def tune_xgboost(X_train, y_train, X_val, y_val):
    """Tune XGBoost hyperparameters"""
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0]
    }
    
    xgb = GradientBoostingClassifier(random_state=42)
    grid_search = GridSearchCV(xgb, param_grid, cv=3, scoring='f1', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    return grid_search.best_estimator_, grid_search.best_params_

# Tune top performing models
print("🔧 Tuning Random Forest...")
best_rf, best_rf_params = tune_random_forest(X_train, y_train, X_val, y_val)
print(f"✅ Best RF params: {best_rf_params}")

print("\n🔧 Tuning XGBoost...")
best_xgb, best_xgb_params = tune_xgboost(X_train, y_train, X_val, y_val)
print(f"✅ Best XGB params: {best_xgb_params}")

# Evaluate tuned models
tuned_results = []

for name, model in [('Tuned_RF', best_rf), ('Tuned_XGB', best_xgb)]:
    results, _ = evaluate_model(model, X_train, y_train,

In [None]:
# ============================================================================
# MIGRAINE DISEASE PROGRESSION MODEL
# Time Series and Sequence-Based Approach for Migraine Prediction
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, f1_score
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')

# For advanced time series modeling
try:
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.callbacks import EarlyStopping
    TENSORFLOW_AVAILABLE = True
except ImportError:
    print("⚠️ TensorFlow not available. Using traditional ML models.")
    TENSORFLOW_AVAILABLE = False

# Set random seeds
np.random.seed(42)
if TENSORFLOW_AVAILABLE:
    import tensorflow as tf
    tf.random.set_seed(42)

print("🧠 MIGRAINE DISEASE PROGRESSION MODEL")
print("=" * 60)
print("Treating migraine prediction as a temporal disease progression problem")
print("=" * 60)

# ============================================================================
# PHASE 1: TEMPORAL FEATURE ENGINEERING FOR DISEASE PROGRESSION
# ============================================================================

def create_disease_progression_features(df):
    """
    Create features that capture migraine disease progression patterns
    """
    print("🔬 Creating Disease Progression Features...")
    
    df_prog = df.copy()
    df_prog = df_prog.sort_values(['user_id', 'day']).reset_index(drop=True)
    
    # ========================================
    # MIGRAINE CYCLE ANALYSIS
    # ========================================
    print("📊 Analyzing migraine cycles...")
    
    # Days since last migraine (critical for progression)
    df_prog['days_since_last_migraine'] = 0
    df_prog['days_until_next_migraine'] = 0
    
    for user_id in df_prog['user_id'].unique():
        user_mask = df_prog['user_id'] == user_id
        user_data = df_prog[user_mask].copy()
        
        # Find migraine days
        migraine_days = user_data[user_data['has_migraine'] == 1]['day'].values
        
        if len(migraine_days) > 0:
            # Days since last migraine
            days_since = []
            days_until = []
            
            for day in user_data['day'].values:
                # Days since last migraine
                past_migraines = migraine_days[migraine_days < day]
                if len(past_migraines) > 0:
                    days_since.append(day - past_migraines[-1])
                else:
                    days_since.append(999)  # No previous migraine
                
                # Days until next migraine
                future_migraines = migraine_days[migraine_days > day]
                if len(future_migraines) > 0:
                    days_until.append(future_migraines[0] - day)
                else:
                    days_until.append(999)  # No future migraine in data
            
            df_prog.loc[user_mask, 'days_since_last_migraine'] = days_since
            df_prog.loc[user_mask, 'days_until_next_migraine'] = days_until
    
    # ========================================
    # PRODROME PHASE DETECTION (Pre-migraine symptoms)
    # ========================================
    print("🔍 Detecting prodrome phase patterns...")
    
    # Look for patterns 1-3 days before migraine
    for lookback in [1, 2, 3]:
        df_prog[f'migraine_in_{lookback}d'] = 0
        
        for user_id in df_prog['user_id'].unique():
            user_mask = df_prog['user_id'] == user_id
            user_data = df_prog[user_mask].copy()
            
            # Shift migraine indicator forward to create "future migraine" flag
            future_migraine = user_data['has_migraine'].shift(-lookback).fillna(0)
            df_prog.loc[user_mask, f'migraine_in_{lookback}d'] = future_migraine
    
    # Prodrome risk score (elevated stress + poor sleep + days since last migraine)
    df_prog['prodrome_risk_score'] = (
        (df_prog['stress_level'] > df_prog['stress_level'].quantile(0.7)).astype(int) +
        (df_prog['sleep_hours'] < 6.5).astype(int) +
        (df_prog['days_since_last_migraine'].between(3, 10)).astype(int) +
        (df_prog['hydration_glasses'] < df_prog['hydration_glasses'].quantile(0.3)).astype(int)
    )
    
    # ========================================
    # MIGRAINE FREQUENCY AND INTENSITY PATTERNS
    # ========================================
    print("📈 Analyzing frequency and intensity patterns...")
    
    # Rolling migraine frequency (disease burden)
    for window in [7, 14, 30]:
        df_prog[f'migraine_frequency_{window}d'] = (
            df_prog.groupby('user_id')['has_migraine']
            .rolling(window=window, min_periods=1)
            .sum().values
        )
    
    # Migraine intensity if available
    if 'migraine_severity' in df_prog.columns:
        for window in [7, 14]:
            df_prog[f'avg_severity_{window}d'] = (
                df_prog.groupby('user_id')['migraine_severity']
                .rolling(window=window, min_periods=1)
                .mean().values
            )
    
    # ========================================
    # TRIGGER ACCUMULATION MODEL
    # ========================================
    print("⚡ Creating trigger accumulation features...")
    
    # Cumulative stress exposure
    df_prog['stress_burden_3d'] = (
        df_prog.groupby('user_id')['stress_level']
        .rolling(window=3, min_periods=1)
        .sum().values
    )
    
    df_prog['stress_burden_7d'] = (
        df_prog.groupby('user_id')['stress_level']
        .rolling(window=7, min_periods=1)
        .sum().values
    )
    
    # Sleep debt accumulation (critical for migraine progression)
    df_prog['sleep_debt'] = np.maximum(0, 7.5 - df_prog['sleep_hours'])
    df_prog['cumulative_sleep_debt_3d'] = (
        df_prog.groupby('user_id')['sleep_debt']
        .rolling(window=3, min_periods=1)
        .sum().values
    )
    
    df_prog['cumulative_sleep_debt_7d'] = (
        df_prog.groupby('user_id')['sleep_debt']
        .rolling(window=7, min_periods=1)
        .sum().values
    )
    
    # Dehydration risk accumulation
    df_prog['dehydration_risk'] = (df_prog['hydration_glasses'] < 6).astype(int)
    df_prog['dehydration_days_3d'] = (
        df_prog.groupby('user_id')['dehydration_risk']
        .rolling(window=3, min_periods=1)
        .sum().values
    )
    
    # ========================================
    # RECOVERY AND RESILIENCE FACTORS
    # ========================================
    print("💪 Adding recovery and resilience factors...")
    
    # Exercise consistency (protective factor)
    df_prog['exercise_consistency_7d'] = (
        df_prog.groupby('user_id')['exercise_minutes']
        .rolling(window=7, min_periods=1)
        .apply(lambda x: (x > 0).sum()).values
    )
    
    # Lifestyle stability score
    df_prog['sleep_variability_7d'] = (
        df_prog.groupby('user_id')['sleep_hours']
        .rolling(window=7, min_periods=1)
        .std().values
    )
    
    df_prog['lifestyle_stability'] = (
        (df_prog['sleep_variability_7d'] < 1).astype(int) +
        (df_prog['exercise_consistency_7d'] >= 3).astype(int)
    )
    
    # ========================================
    # TEMPORAL PROGRESSION FEATURES
    # ========================================
    print("⏰ Creating temporal progression features...")
    
    # Migraine episode clustering (multiple migraines close together)
    df_prog['migraine_cluster_3d'] = (
        df_prog.groupby('user_id')['has_migraine']
        .rolling(window=3, center=True, min_periods=1)
        .sum().values >= 2
    ).astype(int)
    
    # Time-based vulnerability windows
    df_prog['weekend_vulnerability'] = (
        (df_prog['is_weekend'] == 1) & 
        (df_prog['days_since_last_migraine'] > 3) &
        (df_prog['stress_burden_7d'] > df_prog['stress_burden_7d'].quantile(0.6))
    ).astype(int)
    
    # Workweek stress accumulation
    df_prog['workweek_stress_buildup'] = (
        (df_prog['day_of_week'].isin([0, 1, 2, 3, 4])) & 
        (df_prog['stress_burden_3d'] > df_prog['stress_burden_3d'].quantile(0.7))
    ).astype(int)
    
    print(f"✅ Disease progression features created. Total features: {df_prog.shape[1]}")
    
    return df_prog

# ============================================================================
# SEQUENCE MODELING FOR DISEASE PROGRESSION
# ============================================================================

def create_sequences_for_user(user_data, sequence_length=7, target_col='has_migraine'):
    """
    Create sequences for time series modeling
    """
    feature_cols = [col for col in user_data.columns if col not in 
                   ['user_id', 'day', 'has_migraine', 'migraine_severity']]
    
    X_sequences = []
    y_sequences = []
    
    for i in range(len(user_data) - sequence_length):
        # Input sequence (past sequence_length days)
        X_seq = user_data[feature_cols].iloc[i:i+sequence_length].values
        
        # Target (next day's migraine status)
        y_seq = user_data[target_col].iloc[i+sequence_length]
        
        X_sequences.append(X_seq)
        y_sequences.append(y_seq)
    
    return np.array(X_sequences), np.array(y_sequences)

def prepare_sequence_data(df_prog, sequence_length=7):
    """
    Prepare all user data for sequence modeling
    """
    print(f"📊 Preparing sequence data with {sequence_length}-day lookback...")
    
    all_X_sequences = []
    all_y_sequences = []
    all_user_ids = []
    
    for user_id in df_prog['user_id'].unique():
        user_data = df_prog[df_prog['user_id'] == user_id].sort_values('day')
        
        if len(user_data) > sequence_length:
            X_seq, y_seq = create_sequences_for_user(user_data, sequence_length)
            
            if len(X_seq) > 0:
                all_X_sequences.append(X_seq)
                all_y_sequences.append(y_seq)
                all_user_ids.extend([user_id] * len(X_seq))
    
    if all_X_sequences:
        X_combined = np.vstack(all_X_sequences)
        y_combined = np.hstack(all_y_sequences)
        user_ids_combined = np.array(all_user_ids)
        
        print(f"✅ Sequence data prepared: {X_combined.shape[0]} sequences, {X_combined.shape[1]} timesteps, {X_combined.shape[2]} features")
        return X_combined, y_combined, user_ids_combined
    else:
        print("❌ No sequences could be created")
        return None, None, None

# ============================================================================
# ADVANCED TIME SERIES MODELS
# ============================================================================

class MigraineProgressionPredictor:
    def __init__(self, model_type='lstm'):
        self.model_type = model_type
        self.model = None
        self.scaler = StandardScaler()
        self.history = None
        
    def build_lstm_model(self, input_shape):
        """Build LSTM model for migraine progression"""
        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=input_shape),
            Dropout(0.3),
            LSTM(32, return_sequences=False),
            Dropout(0.3),
            Dense(16, activation='relu'),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        return model
    
    def build_cnn_lstm_model(self, input_shape):
        """Build CNN-LSTM hybrid model"""
        model = Sequential([
            Conv1D(32, 3, activation='relu', input_shape=input_shape),
            Conv1D(32, 3, activation='relu'),
            MaxPooling1D(2),
            LSTM(50, return_sequences=True),
            Dropout(0.3),
            LSTM(25),
            Dropout(0.3),
            Dense(16, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        
        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        return model
    
    def fit(self, X_train, y_train, X_val=None, y_val=None, epochs=50):
        """Train the model"""
        if self.model_type == 'lstm' and TENSORFLOW_AVAILABLE:
            self.model = self.build_lstm_model((X_train.shape[1], X_train.shape[2]))
            
            callbacks = [EarlyStopping(patience=10, restore_best_weights=True)]
            
            if X_val is not None:
                validation_data = (X_val, y_val)
            else:
                validation_data = None
            
            self.history = self.model.fit(
                X_train, y_train,
                epochs=epochs,
                batch_size=32,
                validation_data=validation_data,
                callbacks=callbacks,
                verbose=1
            )
            
        elif self.model_type == 'cnn_lstm' and TENSORFLOW_AVAILABLE:
            self.model = self.build_cnn_lstm_model((X_train.shape[1], X_train.shape[2]))
            
            callbacks = [EarlyStopping(patience=10, restore_best_weights=True)]
            
            if X_val is not None:
                validation_data = (X_val, y_val)
            else:
                validation_data = None
            
            self.history = self.model.fit(
                X_train, y_train,
                epochs=epochs,
                batch_size=32,
                validation_data=validation_data,
                callbacks=callbacks,
                verbose=1
            )
        
        else:
            # Fallback to traditional ML with flattened sequences
            X_train_flat = X_train.reshape(X_train.shape[0], -1)
            if X_val is not None:
                X_val_flat = X_val.reshape(X_val.shape[0], -1)
            
            # Scale features
            X_train_scaled = self.scaler.fit_transform(X_train_flat)
            
            if self.model_type == 'rf':
                self.model = RandomForestClassifier(n_estimators=200, random_state=42)
            elif self.model_type == 'gb':
                self.model = GradientBoostingClassifier(n_estimators=200, random_state=42)
            elif self.model_type == 'mlp':
                self.model = MLPClassifier(
                    hidden_layer_sizes=(100, 50, 25),
                    random_state=42,
                    max_iter=500
                )
            else:
                self.model = LogisticRegression(random_state=42, max_iter=1000)
            
            self.model.fit(X_train_scaled, y_train)
    
    def predict(self, X):
        """Make predictions"""
        if self.model_type in ['lstm', 'cnn_lstm'] and TENSORFLOW_AVAILABLE:
            return (self.model.predict(X) > 0.5).astype(int).flatten()
        else:
            X_flat = X.reshape(X.shape[0], -1)
            X_scaled = self.scaler.transform(X_flat)
            return self.model.predict(X_scaled)
    
    def predict_proba(self, X):
        """Predict probabilities"""
        if self.model_type in ['lstm', 'cnn_lstm'] and TENSORFLOW_AVAILABLE:
            return self.model.predict(X).flatten()
        else:
            X_flat = X.reshape(X.shape[0], -1)
            X_scaled = self.scaler.transform(X_flat)
            return self.model.predict_proba(X_scaled)[:, 1]

# ============================================================================
# TEMPORAL VALIDATION FOR DISEASE PROGRESSION
# ============================================================================

def temporal_train_test_split(X, y, user_ids, test_size=0.2):
    """
    Split data temporally - use later time periods for testing
    """
    unique_users = np.unique(user_ids)
    n_test_users = int(len(unique_users) * test_size)
    
    # Use last portion of data for each user as test set
    test_users = unique_users[-n_test_users:]
    
    test_mask = np.isin(user_ids, test_users)
    train_mask = ~test_mask
    
    return X[train_mask], X[test_mask], y[train_mask], y[test_mask]

# ============================================================================
# MAIN EXECUTION PIPELINE
# ============================================================================

def run_migraine_progression_analysis(df):
    """
    Main function to run complete migraine progression analysis
    """
    print("\n🚀 Starting Migraine Disease Progression Analysis...")
    
    # Step 1: Create disease progression features
    df_prog = create_disease_progression_features(df)
    
    # Step 2: Prepare sequence data
    sequence_length = 7
    X_seq, y_seq, user_ids = prepare_sequence_data(df_prog, sequence_length)
    
    if X_seq is None:
        print("❌ Could not create sequence data. Check your dataset.")
        return None
    
    # Step 3: Temporal train-test split
    X_train, X_test, y_train, y_test = temporal_train_test_split(
        X_seq, y_seq, user_ids, test_size=0.2
    )
    
    print(f"\n📊 Data Split Summary:")
    print(f"Training sequences: {len(X_train)}")
    print(f"Testing sequences: {len(X_test)}")
    print(f"Training migraine rate: {y_train.mean():.3f}")
    print(f"Testing migraine rate: {y_test.mean():.3f}")
    
    # Step 4: Train multiple models
    models = {
        'LSTM': MigraineProgressionPredictor('lstm'),
        'CNN-LSTM': MigraineProgressionPredictor('cnn_lstm'),
        'Random Forest': MigraineProgressionPredictor('rf'),
        'Gradient Boosting': MigraineProgressionPredictor('gb'),
        'MLP': MigraineProgressionPredictor('mlp')
    }
    
    results = {}
    
    for name, model in models.items():
        print(f"\n🔄 Training {name}...")
        
        try:
            # Split training data for validation
            val_split = int(0.8 * len(X_train))
            X_train_sub = X_train[:val_split]
            y_train_sub = y_train[:val_split]
            X_val = X_train[val_split:]
            y_val = y_train[val_split:]
            
            # Train model
            model.fit(X_train_sub, y_train_sub, X_val, y_val, epochs=30)
            
            # Make predictions
            y_pred = model.predict(X_test)
            y_pred_proba = model.predict_proba(X_test)
            
            # Calculate metrics
            f1 = f1_score(y_test, y_pred)
            auc = roc_auc_score(y_test, y_pred_proba)
            
            results[name] = {
                'f1_score': f1,
                'roc_auc': auc,
                'model': model,
                'predictions': y_pred,
                'probabilities': y_pred_proba
            }
            
            print(f"✅ {name} - F1: {f1:.3f}, AUC: {auc:.3f}")
            
        except Exception as e:
            print(f"❌ {name} failed: {str(e)}")
            continue
    
    # Step 5: Select best model and detailed analysis
    best_model_name = max(results.keys(), key=lambda x: results[x]['f1_score'])
    best_model = results[best_model_name]
    
    print(f"\n🏆 BEST MODEL: {best_model_name}")
    print(f"F1-Score: {best_model['f1_score']:.3f}")
    print(f"ROC-AUC: {best_model['roc_auc']:.3f}")
    
    # Detailed evaluation
    print(f"\n📋 Detailed Classification Report:")
    print(classification_report(y_test, best_model['predictions']))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, best_model['predictions'])
    print(f"\n📊 Confusion Matrix:")
    print(f"                Predicted")
    print(f"              No    Yes")
    print(f"Actual No   {cm[0,0]:4d}  {cm[0,1]:4d}")
    print(f"       Yes  {cm[1,0]:4d}  {cm[1,1]:4d}")
    
    # Progression insights
    print(f"\n💡 DISEASE PROGRESSION INSIGHTS:")
    
    # High-risk sequences analysis
    high_risk_threshold = 0.7
    high_risk_mask = best_model['probabilities'] > high_risk_threshold
    high_risk_accuracy = (y_test[high_risk_mask] == best_model['predictions'][high_risk_mask]).mean()
    
    print(f"High-risk predictions (>{high_risk_threshold}): {high_risk_mask.sum()}")
    print(f"Accuracy on high-risk predictions: {high_risk_accuracy:.3f}")
    print(f"Actual migraine rate in high-risk sequences: {y_test[high_risk_mask].mean():.3f}")
    
    # Early warning capability
    early_warning_days = [1, 2, 3]
    print(f"\n⚡ Early Warning Analysis:")
    for days in early_warning_days:
        # This would require additional analysis of the progression features
        print(f"Model can potentially provide {days}-day advance warning")
    
    print(f"\n🎯 CLINICAL RECOMMENDATIONS:")
    print("1. Monitor patients during high-risk sequences (probability > 0.7)")
    print("2. Implement preventive interventions when progression patterns detected")
    print("3. Focus on sleep debt accumulation and stress burden patterns")
    print("4. Use 7-day lookback window for optimal prediction accuracy")
    print("5. Consider personalized intervention thresholds based on individual patterns")
    
    return {
        'best_model': best_model,
        'all_results': results,
        'test_data': (X_test, y_test),
        'progression_features': df_prog
    }

# ============================================================================
# USAGE EXAMPLE
# ============================================================================

print("\n" + "="*60)
print("📚 USAGE INSTRUCTIONS")
print("="*60)
print("To use this migraine progression model:")
print("1. Load your dataset with columns: user_id, day, sleep_hours, stress_level,")
print("   screen_time_hours, hydration_glasses, exercise_minutes, day_of_week,")
print("   is_weekend, month, has_migraine, migraine_severity")
print("2. Call: results = run_migraine_progression_analysis(df)")
print("3. The model will treat migraine prediction as a temporal disease progression")
print("4. Results include early warning capabilities and progression insights")
print("\nExample:")
print("# results = run_migraine_progression_analysis(your_dataframe)")
print("# best_model = results['best_model']")
print("# progression_features = results['progression_features']")

print("\n✅ Migraine Disease Progression Model Ready!")
print("This approach focuses on temporal patterns and disease progression rather than single-day predictions.")