In [None]:
###### import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import warnings
import random
warnings.filterwarnings('ignore')

# Set all random seeds for reproducibility
def set_random_seeds(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set random seeds
set_random_seeds(42)

plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = (12, 8)

# Set device and optimize for CPU
device = torch.device('cpu')
torch.set_num_threads(4)
print(f"Using device: {device}")
print(f"CPU threads: {torch.get_num_threads()}")

class MLPDataset(Dataset):
    def __init__(self, X, y, seed=42):
        torch.manual_seed(seed)
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, dropout_rate=0.1):
        super(MLPModel, self).__init__()
        
        layers = []
        prev_size = input_size
        
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size
        
        layers.append(nn.Linear(prev_size, 2))  # Binary classification
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)

class EarlyStopping:
    def __init__(self, patience=15, min_delta=0.001, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_loss = None
        self.counter = 0
        self.best_weights = None
        
    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.save_checkpoint(model)
        else:
            self.counter += 1
            
        if self.counter >= self.patience:
            if self.restore_best_weights:
                model.load_state_dict(self.best_weights)
            return True
        return False
    
    def save_checkpoint(self, model):
        self.best_weights = model.state_dict().copy()

def train_model(model, train_loader, val_loader, epochs=500, lr=0.01, patience=15, seed=42):
    """
    Train model with early stopping monitored on separate validation set
    """
    torch.manual_seed(seed)
    np.random.seed(seed)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    early_stopping = EarlyStopping(patience=patience)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation phase - early stopping monitored here
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        
        if epoch % 50 == 0:
            print(f'    Epoch {epoch}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
        
        # Early stopping based on validation loss
        if early_stopping(val_loss, model):
            if epoch > 10:
                print(f'    Early stopping at epoch {epoch}')
                break
    
    return train_losses, val_losses, epoch + 1

def predict_model(model, data_loader):
    model.eval()
    predictions = []
    probabilities = []
    
    with torch.no_grad():
        for batch_X, _ in data_loader:
            batch_X = batch_X.to(device)
            outputs = model(batch_X)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(outputs, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            probabilities.extend(probs[:, 1].cpu().numpy())
    
    return np.array(predictions), np.array(probabilities)

def generate_shap_analysis(model, X_test, feature_names, model_name):
    """
    Generate SHAP analysis for model interpretability
    """
    print(f"\nGenerating SHAP analysis for {model_name}...")
    
    def model_predict_proba(X):
        model.eval()
        with torch.no_grad():
            X_tensor = torch.FloatTensor(X).to(device)
            outputs = model(X_tensor)
            probs = torch.softmax(outputs, dim=1)
            return probs.cpu().numpy()
    
    def model_predict_class1(X):
        return model_predict_proba(X)[:, 1]
    
    background_size = min(100, len(X_test))
    background_indices = np.linspace(0, len(X_test)-1, background_size, dtype=int)
    background_data = X_test[background_indices]
    
    print(f"Using {background_size} background samples for SHAP explainer...")
    explainer = shap.KernelExplainer(model_predict_class1, background_data)
    
    print(f"Computing SHAP values for test set ({len(X_test)} samples)...")
    
    batch_size = 50
    all_shap_values = []
    
    for i in range(0, len(X_test), batch_size):
        end_idx = min(i + batch_size, len(X_test))
        batch_data = X_test[i:end_idx]
        print(f"Processing batch {i//batch_size + 1}/{(len(X_test)-1)//batch_size + 1}")
        
        batch_shap_values = explainer.shap_values(batch_data)
        all_shap_values.append(batch_shap_values)
    
    shap_values = np.vstack(all_shap_values)
    feature_names_array = np.array(feature_names)
    
    # SHAP Summary Plot
    try:
        plt.figure(figsize=(12, 8))
        shap.summary_plot(shap_values, X_test, feature_names=feature_names_array, show=False)
        plt.title(f'SHAP Summary Plot - {model_name}', fontsize=14, fontweight='bold', pad=20)
        plt.tight_layout()
        plt.show()
    except Exception as e:
        print(f"Summary plot failed: {e}")
        
        plt.figure(figsize=(12, 8))
        feature_importance = np.abs(shap_values).mean(0)
        sorted_idx = np.argsort(feature_importance)
        
        plt.barh(range(len(feature_importance)), feature_importance[sorted_idx])
        plt.yticks(range(len(feature_importance)), feature_names_array[sorted_idx])
        plt.xlabel('Mean |SHAP value| (Feature Importance)')
        plt.title(f'Feature Importance - {model_name}')
        plt.tight_layout()
        plt.show()
    
    # Feature importance ranking
    mean_shap_values = np.mean(shap_values, axis=0)
    mean_abs_shap_values = np.mean(np.abs(shap_values), axis=0)
    
    importance_df = pd.DataFrame({
        'Feature': feature_names_array,
        'Mean_SHAP': mean_shap_values,
        'Mean_Abs_SHAP': mean_abs_shap_values
    }).sort_values('Mean_Abs_SHAP', ascending=False)
    
    print(f"\nFeature Importance Ranking for {model_name}:")
    print("=" * 75)
    print(f"{'Rank':<4} {'Feature':<25} {'Mean SHAP':<12} {'|Mean SHAP|':<12} {'Direction':<12}")
    print("-" * 75)
    
    for i, (_, row) in enumerate(importance_df.iterrows(), 1):
        direction = "Positive" if row['Mean_SHAP'] > 0 else "Negative"
        direction_symbol = "↑" if row['Mean_SHAP'] > 0 else "↓"
        
        print(f"{i:2d}.  {row['Feature']:<25} {row['Mean_SHAP']:>+10.6f} {row['Mean_Abs_SHAP']:>10.6f}   {direction_symbol} {direction}")
    
    return shap_values

def run_mlp_model(df, continuous_features, binary_features, model_name, title_suffix):
    """
    PyTorch MLP model with 5-fold TimeSeriesSplit cross-validation for hyperparameter selection
    Early stopping monitored on separate validation set
    """
    
    # Data splitting by company in chronological order
    train_data_list = []
    val_data_list = []
    test_data_list = []

    for company in df['Company'].unique():
        company_data = df[df['Company'] == company].copy()
        company_data = company_data.sort_values('Date')
        
        n_company = len(company_data)
        train_size = int(0.7 * n_company)
        val_size = int(0.15 * n_company)
        
        train_data_list.append(company_data.iloc[:train_size])
        val_data_list.append(company_data.iloc[train_size:train_size+val_size])
        test_data_list.append(company_data.iloc[train_size+val_size:])

    train_data = pd.concat(train_data_list, ignore_index=True)
    val_data = pd.concat(val_data_list, ignore_index=True)
    test_data = pd.concat(test_data_list, ignore_index=True)

    print(f"Training set size: {len(train_data)}")
    print(f"Validation set size: {len(val_data)}")
    print(f"Test set size: {len(test_data)}")
    
    def preprocess_data(train_df, val_df, test_df, target_col, continuous_features, binary_features):
        all_features = continuous_features + binary_features
        
        train_df = train_df.dropna(subset=all_features + [target_col])
        val_df = val_df.dropna(subset=all_features + [target_col])
        test_df = test_df.dropna(subset=all_features + [target_col])
        
        X_train_continuous = train_df[continuous_features]
        X_val_continuous = val_df[continuous_features]
        X_test_continuous = test_df[continuous_features]
        
        X_train_binary = train_df[binary_features]
        X_val_binary = val_df[binary_features]
        X_test_binary = test_df[binary_features]
        
        y_train = train_df[target_col]
        y_val = val_df[target_col]
        y_test = test_df[target_col]
        
        # Standardize continuous features
        scaler_continuous = StandardScaler()
        X_train_continuous_scaled = scaler_continuous.fit_transform(X_train_continuous)
        X_val_continuous_scaled = scaler_continuous.transform(X_val_continuous)
        X_test_continuous_scaled = scaler_continuous.transform(X_test_continuous)
        
        # Standardize binary features
        if len(binary_features) > 0:
            scaler_binary = StandardScaler()
            X_train_binary_scaled = scaler_binary.fit_transform(X_train_binary)
            X_val_binary_scaled = scaler_binary.transform(X_val_binary)
            X_test_binary_scaled = scaler_binary.transform(X_test_binary)
            
            X_train_scaled = np.hstack([X_train_continuous_scaled, X_train_binary_scaled])
            X_val_scaled = np.hstack([X_val_continuous_scaled, X_val_binary_scaled])
            X_test_scaled = np.hstack([X_test_continuous_scaled, X_test_binary_scaled])
        else:
            X_train_scaled = X_train_continuous_scaled
            X_val_scaled = X_val_continuous_scaled
            X_test_scaled = X_test_continuous_scaled
        
        return X_train_scaled, y_train, X_val_scaled, y_val, X_test_scaled, y_test

    def evaluate_model(y_true, y_pred, y_prob, model_name):
        cm = confusion_matrix(y_true, y_pred)
        f1_macro = f1_score(y_true, y_pred, average='macro')
        accuracy = accuracy_score(y_true, y_pred)
        auc = roc_auc_score(y_true, y_prob)
        precision_per_class = precision_score(y_true, y_pred, average=None)
        recall_per_class = recall_score(y_true, y_pred, average=None)
        f1_per_class = f1_score(y_true, y_pred, average=None)
        
        # Confusion matrix
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=['Predicted 0', 'Predicted 1'],
                    yticklabels=['Actual 0', 'Actual 1'])
        plt.title(f'{model_name} MLP {title_suffix} - Confusion Matrix')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.show()
        
        # Performance metrics table
        plt.figure(figsize=(10, 8))
        plt.axis('tight')
        plt.axis('off')
        
        table_data = [
            ['Metric', 'Value'],
            ['Macro F1 Score', f'{f1_macro:.4f}'],
            ['Overall Accuracy', f'{accuracy:.4f}'],
            ['AUC', f'{auc:.4f}'],
            ['', ''],
            ['Class 1 Precision', f'{precision_per_class[1]:.4f}'],
            ['Class 1 Recall', f'{recall_per_class[1]:.4f}'],
            ['Class 1 F1', f'{f1_per_class[1]:.4f}'],
            ['', ''],
            ['Class 0 Precision', f'{precision_per_class[0]:.4f}'],
            ['Class 0 Recall', f'{recall_per_class[0]:.4f}'],
            ['Class 0 F1', f'{f1_per_class[0]:.4f}']
        ]
        
        table = plt.table(cellText=table_data[1:], colLabels=table_data[0],
                         cellLoc='center', loc='center', colWidths=[0.4, 0.3])
        table.auto_set_font_size(False)
        table.set_fontsize(12)
        table.scale(1.5, 2)
        
        for i in range(len(table_data)):
            for j in range(len(table_data[0])):
                cell = table[(i, j)]
                if i == 0:
                    cell.set_facecolor('#4CAF50')
                    cell.set_text_props(weight='bold', color='white')
                elif len(table_data[i]) > 0 and table_data[i][0] == '':
                    cell.set_facecolor('#ffffff')
                    cell.set_text_props(color='white')
                else:
                    cell.set_facecolor('#f0f0f0')
        
        plt.title(f'{model_name} MLP {title_suffix} - Performance Metrics', fontsize=16, fontweight='bold', pad=20)
        plt.show()
        
        return {'confusion_matrix': cm, 'f1_macro': f1_macro, 'accuracy': accuracy, 'auc': auc,
                'class_0_precision': precision_per_class[0], 'class_0_recall': recall_per_class[0], 'class_0_f1': f1_per_class[0],
                'class_1_precision': precision_per_class[1], 'class_1_recall': recall_per_class[1], 'class_1_f1': f1_per_class[1]}

    print("=" * 60)
    print(f"PyTorch MLP {model_name} with 5-Fold TimeSeriesSplit CV")
    print("=" * 60)
    
    feature_names = continuous_features + binary_features
    
    # Hyperparameter configurations
    configs = [
        # Single layer networks
        {'hidden_sizes': [32], 'lr': 0.001, 'dropout': 0.1},

        {'hidden_sizes': [64], 'lr': 0.001, 'dropout': 0.1},
         {'hidden_sizes': [128], 'lr': 0.001, 'dropout': 0.1},
        
        # Two layer networks
        {'hidden_sizes': [64, 32], 'lr': 0.001, 'dropout': 0.2},
        {'hidden_sizes': [128, 64], 'lr': 0.001, 'dropout': 0.2},
        
    ]
    
    # Initialize 5-fold TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=5)
    
    # 1D Model with 5-fold CV hyperparameter selection
    X_train_1d, y_train_1d, X_val_1d, y_val_1d, X_test_1d, y_test_1d = preprocess_data(
        train_data, val_data, test_data, '1D_Up', continuous_features, binary_features)
    
    print("Training 1D MLP model with 5-fold TimeSeriesSplit CV...")
    
    best_cv_score_1d = float('inf')
    best_config_1d = None
    best_model_1d = None
    
    # 5-fold cross-validation for hyperparameter selection
    for i, config in enumerate(configs):
        print(f"\nConfiguration {i+1}/{len(configs)}: {config}")
        
        cv_scores = []
        
        # 5-fold TimeSeriesSplit cross-validation
        for fold, (train_idx, val_idx) in enumerate(tscv.split(X_train_1d)):
            print(f"  Fold {fold + 1}/5")
            
            # Split data for this fold
            X_train_fold = X_train_1d[train_idx]
            X_val_fold = X_train_1d[val_idx]  # CV validation (different from early stopping validation)
            y_train_fold = y_train_1d.iloc[train_idx]
            y_val_fold = y_train_1d.iloc[val_idx]
            
            # Create datasets
            train_dataset = MLPDataset(X_train_fold, y_train_fold.values, seed=42)
            val_dataset = MLPDataset(X_val_fold, y_val_fold.values, seed=42)
            
            generator = torch.Generator()
            generator.manual_seed(42 + fold)
            
            train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, generator=generator)
            val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
            
            # Create model
            torch.manual_seed(42 + i * 10 + fold)
            model = MLPModel(X_train_1d.shape[1], config['hidden_sizes'], config['dropout']).to(device)
            
            # Train model (early stopping monitored on fold validation set)
            train_losses, val_losses, epochs_trained = train_model(
                model, train_loader, val_loader, lr=config['lr'], patience=15, seed=42 + i * 10 + fold
            )
            
            # Record fold validation loss
            fold_val_loss = val_losses[-1]
            cv_scores.append(fold_val_loss)
            print(f"    Fold {fold + 1} validation loss: {fold_val_loss:.4f}")
        
        # Calculate average CV score
        mean_cv_score = np.mean(cv_scores)
        std_cv_score = np.std(cv_scores)
        print(f"  Average CV Score: {mean_cv_score:.4f} ± {std_cv_score:.4f}")
        
        # If this configuration is better, retrain on full training set
        if mean_cv_score < best_cv_score_1d:
            best_cv_score_1d = mean_cv_score
            best_config_1d = config
            
            print(f"  New best config! Retraining on full training set...")
            
            # Retrain best model on full training set (early stopping on separate validation set)
            train_dataset_full = MLPDataset(X_train_1d, y_train_1d.values, seed=42)
            val_dataset_full = MLPDataset(X_val_1d, y_val_1d.values, seed=42)  # Separate validation set for early stopping
            
            generator_full = torch.Generator()
            generator_full.manual_seed(42)
            
            train_loader_full = DataLoader(train_dataset_full, batch_size=32, shuffle=True, generator=generator_full)
            val_loader_full = DataLoader(val_dataset_full, batch_size=32, shuffle=False)
            
            torch.manual_seed(42 + i)
            best_model_1d = MLPModel(X_train_1d.shape[1], config['hidden_sizes'], config['dropout']).to(device)
            
            # Train with early stopping on separate validation set
            train_losses, val_losses, epochs_trained = train_model(
                best_model_1d, train_loader_full, val_loader_full, lr=config['lr'], patience=15, seed=42 + i
            )
    
    print(f"\nBest configuration for 1D model: {best_config_1d}")
    print(f"Best CV score: {best_cv_score_1d:.4f}")
    
    # Evaluate 1D model on test set
    test_dataset_1d = MLPDataset(X_test_1d, y_test_1d.values)
    test_loader_1d = DataLoader(test_dataset_1d, batch_size=32, shuffle=False)
    
    y_pred_1d, y_prob_1d = predict_model(best_model_1d, test_loader_1d)
    results_1d = evaluate_model(y_test_1d, y_pred_1d, y_prob_1d, "1D_Up Prediction")
    
    # Generate SHAP analysis for 1D model
    shap_values_1d = generate_shap_analysis(best_model_1d, X_test_1d, feature_names, "1D_Up Prediction")
    
    # 20D Model with 5-fold CV hyperparameter selection
    X_train_20d, y_train_20d, X_val_20d, y_val_20d, X_test_20d, y_test_20d = preprocess_data(
        train_data, val_data, test_data, '20D_Up', continuous_features, binary_features)
    
    print("\nTraining 20D MLP model with 5-fold TimeSeriesSplit CV...")
    
    best_cv_score_20d = float('inf')
    best_config_20d = None
    best_model_20d = None
    
    # 5-fold cross-validation for hyperparameter selection
    for i, config in enumerate(configs):
        print(f"\nConfiguration {i+1}/{len(configs)}: {config}")
        
        cv_scores = []
        
        # 5-fold TimeSeriesSplit cross-validation
        for fold, (train_idx, val_idx) in enumerate(tscv.split(X_train_20d)):
            print(f"  Fold {fold + 1}/5")
            
            # Split data for this fold
            X_train_fold = X_train_20d[train_idx]
            X_val_fold = X_train_20d[val_idx]  # CV validation
            y_train_fold = y_train_20d.iloc[train_idx]
            y_val_fold = y_train_20d.iloc[val_idx]
            
            # Create datasets
            train_dataset = MLPDataset(X_train_fold, y_train_fold.values, seed=42)
            val_dataset = MLPDataset(X_val_fold, y_val_fold.values, seed=42)
            
            generator = torch.Generator()
            generator.manual_seed(42 + fold)
            
            train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, generator=generator)
            val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
            
            # Create model
            torch.manual_seed(42 + i * 10 + fold)
            model = MLPModel(X_train_20d.shape[1], config['hidden_sizes'], config['dropout']).to(device)
            
            # Train model
            train_losses, val_losses, epochs_trained = train_model(
                model, train_loader, val_loader, lr=config['lr'], patience=15, seed=42 + i * 10 + fold
            )
            
            # Record fold validation loss
            fold_val_loss = val_losses[-1]
            cv_scores.append(fold_val_loss)
            print(f"    Fold {fold + 1} validation loss: {fold_val_loss:.4f}")
        
        # Calculate average CV score
        mean_cv_score = np.mean(cv_scores)
        std_cv_score = np.std(cv_scores)
        print(f"  Average CV Score: {mean_cv_score:.4f} ± {std_cv_score:.4f}")
        
        # If this configuration is better, retrain on full training set
        if mean_cv_score < best_cv_score_20d:
            best_cv_score_20d = mean_cv_score
            best_config_20d = config
            
            print(f"  New best config! Retraining on full training set...")
            
            # Retrain best model on full training set (early stopping on separate validation set)
            train_dataset_full = MLPDataset(X_train_20d, y_train_20d.values, seed=42)
            val_dataset_full = MLPDataset(X_val_20d, y_val_20d.values, seed=42)  # Separate validation set for early stopping
            
            generator_full = torch.Generator()
            generator_full.manual_seed(42)
            
            train_loader_full = DataLoader(train_dataset_full, batch_size=32, shuffle=True, generator=generator_full)
            val_loader_full = DataLoader(val_dataset_full, batch_size=32, shuffle=False)
            
            torch.manual_seed(42 + i)
            best_model_20d = MLPModel(X_train_20d.shape[1], config['hidden_sizes'], config['dropout']).to(device)
            
            # Train with early stopping on separate validation set
            train_losses, val_losses, epochs_trained = train_model(
                best_model_20d, train_loader_full, val_loader_full, lr=config['lr'], patience=15, seed=42 + i
            )
    
    print(f"\nBest configuration for 20D model: {best_config_20d}")
    print(f"Best CV score: {best_cv_score_20d:.4f}")
    
    # Evaluate 20D model on test set
    test_dataset_20d = MLPDataset(X_test_20d, y_test_20d.values)
    test_loader_20d = DataLoader(test_dataset_20d, batch_size=32, shuffle=False)
    
    y_pred_20d, y_prob_20d = predict_model(best_model_20d, test_loader_20d)
    results_20d = evaluate_model(y_test_20d, y_pred_20d, y_prob_20d, "20D_Up Prediction")
    
    # Generate SHAP analysis for 20D model
    shap_values_20d = generate_shap_analysis(best_model_20d, X_test_20d, feature_names, "20D_Up Prediction")
    
    # Model performance summary
    print(f"\nModel Performance Summary:")
    print(f"1D Model - AUC: {results_1d['auc']:.4f}, F1: {results_1d['f1_macro']:.4f}")
    print(f"20D Model - AUC: {results_20d['auc']:.4f}, F1: {results_20d['f1_macro']:.4f}")
    print(f"1D Best Config: {best_config_1d}")
    print(f"20D Best Config: {best_config_20d}")
    print(f"1D Best CV Score: {best_cv_score_1d:.4f}")
    print(f"20D Best CV Score: {best_cv_score_20d:.4f}")
    
    return results_1d, results_20d, best_model_1d, best_model_20d

# Load data
df = pd.read_csv('DATA.csv')

# Version 1: Baseline Model
continuous_features_base = ['1D_PastChangePct', '5D_PastChangePct', '20D_PastChangePct', 'J', 
                           'mfi', 'MACD', 'MACD_diff', 'BB_rel_pos', 'Vol_Change']
binary_features_base = ['MA5_GT_MA20']

print("\nVERSION 1: Baseline Model (Technical Indicators Only)")
print("-" * 60)

results_baseline = run_mlp_model(df, continuous_features_base, binary_features_base, 
                                "Baseline", "Baseline")

# Version 2: Interest Rate Model
continuous_features_ir = ['1D_PastChangePct', '5D_PastChangePct', '20D_PastChangePct', 'J', 
                          'mfi', 'MACD', 'MACD_diff', 'BB_rel_pos', 'Vol_Change', 'Interest_Rate']

print("\nVERSION 2: Interest Rate Model")
print("-" * 60)

results_ir = run_mlp_model(df, continuous_features_ir, binary_features_base, 
                          "with Interest Rate", "with Interest Rate")

# Version 3: Bitcoin Model
continuous_features_btc = ['1D_PastChangePct', '5D_PastChangePct', '20D_PastChangePct', 'J', 
                          'mfi', 'MACD', 'MACD_diff', 'BB_rel_pos', 'Vol_Change', 'Bitcoin_Close']

print("\nVERSION 3: Bitcoin Model")
print("-" * 60)

results_btc = run_mlp_model(df, continuous_features_btc, binary_features_base, 
                          "with Bitcoin", "with Bitcoin")


# Version 4: Gold Model
continuous_features_gold = ['1D_PastChangePct', '5D_PastChangePct', '20D_PastChangePct', 'J', 
                          'mfi', 'MACD', 'MACD_diff', 'BB_rel_pos', 'Vol_Change', 'Gold_Close']

print("\nVERSION 4: Gold Model")
print("-" * 60)

results_gold = run_mlp_model(df, continuous_features_gold, binary_features_base, 
                          "with Gold", "with Gold")

print("\n" + "=" * 70)
print("ALL MODEL VERSIONS COMPLETED!")
print("=" * 70)
print("Summary of all model performances:")
print(f"Baseline Model     - 1D AUC: {results_baseline[0]['auc']:.4f}, 20D AUC: {results_baseline[1]['auc']:.4f}")
print(f"Interest Rate Model - 1D AUC: {results_ir[0]['auc']:.4f}, 20D AUC: {results_ir[1]['auc']:.4f}")
print(f"Bitcoin Model      - 1D AUC: {results_btc[0]['auc']:.4f}, 20D AUC: {results_btc[1]['auc']:.4f}")
print(f"Gold Model         - 1D AUC: {results_gold[0]['auc']:.4f}, 20D AUC: {results_gold[1]['auc']:.4f}")