Cell 1: Install dependencies

In [None]:
!pip install pandas scikit-learn matplotlib optuna joblib


Cell 2: Load predictions and labels

In [None]:
import pandas as pd

# Replace paths with actual outputs
xgb = pd.read_csv('/content/xgb_preds.csv')
lstm = pd.read_csv('/content/lstm_preds.csv')
catb = pd.read_csv('/content/catboost_preds.csv')
bayes = pd.read_csv('/content/bayes_preds.csv')
labels = pd.read_csv('/content/true_labels.csv')

df = xgb.merge(lstm, on='date', suffixes=('_xgb', '_lstm'))
df = df.merge(catb, on='date')
df = df.rename(columns={'pred': 'pred_catboost'})
df = df.merge(bayes, on='date')
df = df.rename(columns={'pred': 'pred_bayes'})
df = df.merge(labels, on='date')


Cell 3: Score each model

In [None]:
from sklearn.metrics import accuracy_score, f1_score

results = {}

for model in ['xgb', 'lstm', 'catboost', 'bayes']:
    y_pred = df[f'pred_{model}'].round()
    acc = accuracy_score(df['true'], y_pred)
    f1 = f1_score(df['true'], y_pred)
    results[model] = {'Accuracy': acc, 'F1': f1}

pd.DataFrame(results).T.sort_values(by='F1', ascending=False)


Cell 4: Auto-select best model (custom or Optuna)

In [None]:
best_model = max(results.items(), key=lambda x: x[1]['F1'])[0]
print(f"🏆 Best Model (by F1): {best_model}")


Cell 5: Save model choice for app routing

In [None]:
with open("best_model_selected.txt", "w") as f:
    f.write(best_model)

print("✅ Best model name saved to best_model_selected.txt")


BONUS: AutoML via Optuna (Optional Hyperparameter Search)


In [None]:
import optuna

def objective(trial):
    pred_xgb = df['pred_xgb'] * trial.suggest_float("xgb_weight", 0, 1)
    pred_lstm = df['pred_lstm'] * trial.suggest_float("lstm_weight", 0, 1)
    pred_cat = df['pred_catboost'] * trial.suggest_float("catboost_weight", 0, 1)
    
    combined = (pred_xgb + pred_lstm + pred_cat) / 3
    pred_label = (combined > 0.5).astype(int)
    
    return f1_score(df['true'], pred_label)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

print("🧠 Best F1:", study.best_value)
print("🎛️ Best weights:", study.best_params)
