## 1. Import Libraries

In [25]:
import pandas as pd
import numpy as np

print('Libraries loaded')

Libraries loaded


## 2. Load Individual Model Forecasts

In [26]:
# Load forecasts from RF and XGBoost models
rf_forecast = pd.read_csv('randomforest_ha15m_forecast.csv')
xgb_forecast = pd.read_csv('xgboost_ha15m_forecast.csv')

print(f'Random Forest shape: {rf_forecast.shape}')
print(f'XGBoost shape: {xgb_forecast.shape}')

print(f'\nRF columns: {rf_forecast.columns.tolist()}')
print(f'XGB columns: {xgb_forecast.columns.tolist()}')

Random Forest shape: (95754, 2)
XGBoost shape: (78023, 2)

RF columns: ['Time', 'RF_Prediction']
XGB columns: ['Time', 'XGB_Prediction']


## 3. Merge Forecasts

In [27]:
# Merge strategy: XGBoost PRIMARY (event-driven), RF CONFIRMATION
# Rename columns to be consistent
rf_temp = rf_forecast.rename(columns={rf_forecast.columns[1]: 'RF_Pred'})
xgb_temp = xgb_forecast.rename(columns={xgb_forecast.columns[1]: 'XGB_Pred'})

print(f'RF forecasts: {len(rf_temp)} rows (confirmation/filter)')
print(f'XGB forecasts: {len(xgb_temp)} rows (primary trigger)')

# RIGHT JOIN on XGBoost - only keep times when XGB has a signal
ensemble_df = xgb_temp.merge(rf_temp, on='Time', how='left')

print(f'\nAfter merge:')
print(f'Total XGB opportunity signals: {len(ensemble_df)} rows')
print(f'RF confirmation available: {ensemble_df["RF_Pred"].notna().sum()} times')
print(f'RF confirmation missing: {ensemble_df["RF_Pred"].isna().sum()} times')

# For missing RF values at XGB signal times, forward-fill from recent history
ensemble_df['RF_Pred'] = ensemble_df['RF_Pred'].fillna(method='ffill')
ensemble_df['RF_Pred'] = ensemble_df['RF_Pred'].fillna(method='bfill')

print(f'After forward-fill: RF NaN remaining = {ensemble_df["RF_Pred"].isna().sum()}')
print(f'\nFirst 15 rows:')
print(ensemble_df.head(15))

RF forecasts: 95754 rows (confirmation/filter)
XGB forecasts: 78023 rows (primary trigger)

After merge:
Total XGB opportunity signals: 78023 rows
RF confirmation available: 78023 times
RF confirmation missing: 0 times
After forward-fill: RF NaN remaining = 0

First 15 rows:
                   Time  XGB_Pred  RF_Pred
0   2023-01-01 17:00:00         1        1
1   2023-01-01 23:15:00         1       -1
2   2023-01-02 01:15:00         1        1
3   2023-01-02 01:30:00         1        1
4   2023-01-02 01:45:00         1        1
5   2023-01-02 02:00:00         1        1
6   2023-01-02 02:15:00         1        1
7   2023-01-02 02:30:00         1        1
8   2023-01-02 03:00:00         1        1
9   2023-01-02 03:15:00         1        1
10  2023-01-02 03:30:00         1        1
11  2023-01-02 04:45:00         1        1
12  2023-01-02 05:00:00         1        1
13  2023-01-02 05:15:00         1        1
14  2023-01-02 05:30:00         1        1


  ensemble_df['RF_Pred'] = ensemble_df['RF_Pred'].fillna(method='ffill')
  ensemble_df['RF_Pred'] = ensemble_df['RF_Pred'].fillna(method='bfill')


## 4. Majority Voting

In [28]:
# Ensemble voting logic (XGBoost PRIMARY, RF CONFIRMATION):
# XGB=1 + RF=1 → CONFIRMED BUY (1)
# XGB=1 + RF≠1 → UNCONFIRMED BUY (0.5 or skip)
# XGB=-1 + RF=-1 → CONFIRMED SELL (-1)
# XGB=-1 + RF≠-1 → UNCONFIRMED SELL (0.5 or skip)

def ensemble_vote(xgb_signal, rf_signal):
    """XGB triggers, RF confirms"""
    if xgb_signal == rf_signal:
        return xgb_signal  # CONFIRMED: both agree
    else:
        return 0  # UNCONFIRMED: RF contradicts XGB signal

ensemble_df['Ensemble_Vote'] = ensemble_df.apply(
    lambda row: ensemble_vote(row['XGB_Pred'], row['RF_Pred']),
    axis=1
)

print('Ensemble voting complete (XGB primary, RF confirmation)!')
print(f'\nVote distribution:')
print(ensemble_df['Ensemble_Vote'].value_counts())
print(f'\nDetailed breakdown:')
confirmed_buy = ((ensemble_df['XGB_Pred'] == 1) & (ensemble_df['RF_Pred'] == 1)).sum()
unconfirmed_buy = ((ensemble_df['XGB_Pred'] == 1) & (ensemble_df['RF_Pred'] != 1)).sum()
confirmed_sell = ((ensemble_df['XGB_Pred'] == -1) & (ensemble_df['RF_Pred'] == -1)).sum()
unconfirmed_sell = ((ensemble_df['XGB_Pred'] == -1) & (ensemble_df['RF_Pred'] != -1)).sum()
print(f'  Confirmed BUY (XGB=1, RF=1): {confirmed_buy}')
print(f'  Unconfirmed BUY (XGB=1, RF≠1): {unconfirmed_buy}')
print(f'  Confirmed SELL (XGB=-1, RF=-1): {confirmed_sell}')
print(f'  Unconfirmed SELL (XGB=-1, RF≠-1): {unconfirmed_sell}')

Ensemble voting complete (XGB primary, RF confirmation)!

Vote distribution:
Ensemble_Vote
 1    28756
 0    25435
-1    23832
Name: count, dtype: int64

Detailed breakdown:
  Confirmed BUY (XGB=1, RF=1): 28756
  Unconfirmed BUY (XGB=1, RF≠1): 17950
  Confirmed SELL (XGB=-1, RF=-1): 23832
  Unconfirmed SELL (XGB=-1, RF≠-1): 7485


## 5. Calculate Agreement Metrics

In [29]:
# Agreement = when RF confirms XGB's decision
def check_confirmation(xgb_pred, rf_pred):
    """1 if RF confirms XGB, 0 if contradicts"""
    return 1 if xgb_pred == rf_pred else 0

ensemble_df['Confirmation'] = ensemble_df.apply(
    lambda row: check_confirmation(row['XGB_Pred'], row['RF_Pred']),
    axis=1
)

# Confirmation strength: 100% when confirmed, 0% when contradicted
ensemble_df['Confirmation_Strength'] = ensemble_df['Confirmation'] * 100

print('Confirmation metrics calculated (RF confirms XGB triggers)')
print(f'\nConfirmation distribution:')
print(ensemble_df['Confirmation'].value_counts().sort_index())
print(f'Confirmed signals: {ensemble_df["Confirmation"].sum()} / {len(ensemble_df)} ({ensemble_df["Confirmation"].mean()*100:.1f}%)')
print(f'Contradicted signals: {(ensemble_df["Confirmation"] == 0).sum()} / {len(ensemble_df)} ({(ensemble_df["Confirmation"]==0).mean()*100:.1f}%)')

Confirmation metrics calculated (RF confirms XGB triggers)

Confirmation distribution:
Confirmation
0    25435
1    52588
Name: count, dtype: int64
Confirmed signals: 52588 / 78023 (67.4%)
Contradicted signals: 25435 / 78023 (32.6%)


## 6. Summary Statistics

In [30]:
bullish_signals = (ensemble_df['Ensemble_Vote'] == 1).sum()
bearish_signals = (ensemble_df['Ensemble_Vote'] == -1).sum()
neutral_signals = (ensemble_df['Ensemble_Vote'] == 0).sum()

print(f"Ensemble Results (XGB Primary, RF Confirmation):")
print(f"  Total XGB triggers: {len(ensemble_df)}")
print(f"  Confirmed BUY (XGB=1, RF=1): {((ensemble_df['XGB_Pred'] == 1) & (ensemble_df['RF_Pred'] == 1)).sum()}")
print(f"  Confirmed SELL (XGB=-1, RF=-1): {((ensemble_df['XGB_Pred'] == -1) & (ensemble_df['RF_Pred'] == -1)).sum()}")
print(f"  Unconfirmed (XGB ≠ RF): {neutral_signals}")

confirmation_rate = ensemble_df['Confirmation'].mean() * 100
print(f"\nConfirmation Rate: {confirmation_rate:.1f}%")
print(f"  RF confirms XGB: {ensemble_df['Confirmation'].sum()} times")
print(f"  RF contradicts XGB: {(ensemble_df['Confirmation'] == 0).sum()} times")

Ensemble Results (XGB Primary, RF Confirmation):
  Total XGB triggers: 78023
  Confirmed BUY (XGB=1, RF=1): 28756
  Confirmed SELL (XGB=-1, RF=-1): 23832
  Unconfirmed (XGB ≠ RF): 25435

Confirmation Rate: 67.4%
  RF confirms XGB: 52588 times
  RF contradicts XGB: 25435 times


## 7. Confidence Score

In [31]:
# Confidence = How much RF confirms the XGB trigger
def calculate_confidence(xgb_signal, rf_signal):
    """
    100% = RF confirms XGB signal (both agree)
    0% = RF contradicts XGB signal (they disagree)
    """
    if xgb_signal == rf_signal:
        return 100.0  # RF confirms XGB trigger
    else:
        return 0.0  # RF contradicts XGB trigger

ensemble_df['Confidence'] = ensemble_df.apply(
    lambda row: calculate_confidence(row['XGB_Pred'], row['RF_Pred']),
    axis=1
)

print('Confidence scores calculated (100% = RF confirms XGB, 0% = RF contradicts)')
print(f"Average confidence: {ensemble_df['Confidence'].mean():.1f}%")

Confidence scores calculated (100% = RF confirms XGB, 0% = RF contradicts)
Average confidence: 67.4%


## 8. Save Ensemble Forecast

In [32]:
# Create output CSV: XGB trigger + RF confirmation
output_df = ensemble_df[['Time', 'XGB_Pred', 'RF_Pred', 'Ensemble_Vote', 'Confirmation', 'Confidence']].copy()

# Rename for clarity
output_df.columns = ['Time', 'XGB_Trigger', 'RF_Confirm', 'Signal', 'Confirmed', 'Confidence']

output_df.to_csv('ensemble_ha15m_forecast.csv', index=False)

print('Ensemble forecast saved: ensemble_ha15m_forecast.csv')
print(f'Rows: {output_df.shape[0]} (XGB triggers with RF confirmation check)')
print(f'\nFirst 10 rows:')
print(output_df.head(10))
print(f'\nSignal breakdown:')
print(f'  Confirmed signals (±1): {((output_df["Signal"] == 1) | (output_df["Signal"] == -1)).sum()}')
print(f'  Unconfirmed signals (0): {(output_df["Signal"] == 0).sum()}')

Ensemble forecast saved: ensemble_ha15m_forecast.csv
Rows: 78023 (XGB triggers with RF confirmation check)

First 10 rows:
                  Time  XGB_Trigger  RF_Confirm  Signal  Confirmed  Confidence
0  2023-01-01 17:00:00            1           1       1          1       100.0
1  2023-01-01 23:15:00            1          -1       0          0         0.0
2  2023-01-02 01:15:00            1           1       1          1       100.0
3  2023-01-02 01:30:00            1           1       1          1       100.0
4  2023-01-02 01:45:00            1           1       1          1       100.0
5  2023-01-02 02:00:00            1           1       1          1       100.0
6  2023-01-02 02:15:00            1           1       1          1       100.0
7  2023-01-02 02:30:00            1           1       1          1       100.0
8  2023-01-02 03:00:00            1           1       1          1       100.0
9  2023-01-02 03:15:00            1           1       1          1       100.0

Signal 

## 9. Model Agreement Analysis

In [33]:
# RF Confirmation Analysis (when XGB triggers)
rf_confirms = (ensemble_df['Confirmation'] == 1).sum()
rf_contradicts = (ensemble_df['Confirmation'] == 0).sum()
total = len(ensemble_df)

print('RF Confirmation Analysis (when XGB is triggered):')
print(f'  RF Confirms: {rf_confirms}/{total} ({rf_confirms/total*100:.1f}%)')
print(f'  RF Contradicts: {rf_contradicts}/{total} ({rf_contradicts/total*100:.1f}%)')
print(f'\nTrade acceptance rates:')
print(f'  Use XGB signal (when RF confirms): {rf_confirms}')
print(f'  Skip signal (when RF contradicts): {rf_contradicts}')

RF Confirmation Analysis (when XGB is triggered):
  RF Confirms: 52588/78023 (67.4%)
  RF Contradicts: 25435/78023 (32.6%)

Trade acceptance rates:
  Use XGB signal (when RF confirms): 52588
  Skip signal (when RF contradicts): 25435


## 10. Final Summary

In [34]:
print('='*70)
print('ENSEMBLE VOTING SUMMARY - XGBoost PRIMARY + Random Forest CONFIRMATION')
print('='*70)
print(f"\nXGB Opportunity Signals: {len(ensemble_df)}")
print(f"\nSignal Outcomes:")
print(f"  Confirmed BUY (XGB=1 ✓ RF=1):    {((ensemble_df['XGB_Pred'] == 1) & (ensemble_df['RF_Pred'] == 1)).sum():6d}")
print(f"  Confirmed SELL (XGB=-1 ✓ RF=-1): {((ensemble_df['XGB_Pred'] == -1) & (ensemble_df['RF_Pred'] == -1)).sum():6d}")
print(f"  Unconfirmed (XGB ✗ RF):          {(ensemble_df['Ensemble_Vote'] == 0).sum():6d}")

confirmation_rate = ensemble_df['Confirmation'].mean() * 100
print(f"\nRF Confirmation Rate: {confirmation_rate:.1f}%")
print(f"  Signals to EXECUTE: {ensemble_df['Confirmation'].sum()} ({ensemble_df['Confirmation'].mean()*100:.1f}%)")
print(f"  Signals to SKIP: {(ensemble_df['Confirmation'] == 0).sum()} ({(ensemble_df['Confirmation']==0).mean()*100:.1f}%)")

print(f"\nAverage Confidence: {ensemble_df['Confidence'].mean():.1f}%")
print(f"  100% (confirmed): {(ensemble_df['Confidence'] == 100).sum()}")
print(f"  0% (contradicted): {(ensemble_df['Confidence'] == 0).sum()}")

print(f"\nOutput File: ensemble_ha15m_forecast.csv")
print(f"Strategy: Only take XGB signals confirmed by RF")
print('='*70)

ENSEMBLE VOTING SUMMARY - XGBoost PRIMARY + Random Forest CONFIRMATION

XGB Opportunity Signals: 78023

Signal Outcomes:
  Confirmed BUY (XGB=1 ✓ RF=1):     28756
  Confirmed SELL (XGB=-1 ✓ RF=-1):  23832
  Unconfirmed (XGB ✗ RF):           25435

RF Confirmation Rate: 67.4%
  Signals to EXECUTE: 52588 (67.4%)
  Signals to SKIP: 25435 (32.6%)

Average Confidence: 67.4%
  100% (confirmed): 52588
  0% (contradicted): 25435

Output File: ensemble_ha15m_forecast.csv
Strategy: Only take XGB signals confirmed by RF
