Cell 1: Install dependencies

In [None]:
!pip install pandas scikit-learn matplotlib joblib


Cell 2: Load predictions from previous models

In [None]:
import pandas as pd

# Example format: each file has columns ['date', 'pred']
xgb = pd.read_csv('/content/xgb_preds.csv')
lstm = pd.read_csv('/content/lstm_preds.csv')
finbert = pd.read_csv('/content/finbert_preds.csv')

# Merge on date
df = xgb.merge(lstm, on='date', suffixes=('_xgb', '_lstm'))
df = df.merge(finbert, on='date')
df = df.rename(columns={'pred': 'pred_finbert'})

df.head()


Cell 3: Majority Voting Ensemble

In [None]:
import numpy as np

# Round to binary labels
df['voted'] = df[['pred_xgb', 'pred_lstm', 'pred_finbert']].round().mode(axis=1)[0]

# Compare with true labels if available
# df = df.merge(true_labels_df, on='date')
# accuracy_score(df['true'], df['voted'])


Cell 4: Averaging Ensemble (Soft Voting)

In [None]:
df['average'] = df[['pred_xgb', 'pred_lstm', 'pred_finbert']].mean(axis=1)
df['average_label'] = (df['average'] > 0.5).astype(int)


Cell 5: Stacking Meta-Model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

X = df[['pred_xgb', 'pred_lstm', 'pred_finbert']]
y = df['true']  # actual ground truth (add your own column here)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

stacker = LogisticRegression()
stacker.fit(X_train, y_train)

y_pred = stacker.predict(X_test)
print(classification_report(y_test, y_pred))


Cell 6: Save ensemble model

In [None]:
import joblib
joblib.dump(stacker, "ensemble_model.pkl")
print("✅ Saved ensemble model.")
