In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt


In [2]:
file_path = "results/"

ridge_df = pd.read_csv(file_path + "ridge_result_df.csv")
gbm_df = pd.read_csv(file_path + "lightGBM_results_full.csv")
lasso_df = pd.read_csv(file_path + "Lasso Rolling Backtest Results.csv")
trans_df = pd.read_csv(file_path + "transformer_backtest_results_Huber_final.csv")

In [4]:
ridge_df.head()

Unnamed: 0,datetime,symbol,predicted_log_return,actual_log_return,weight_relative,weight_sign
0,2025-04-29 15:59:00,AMAT,-0.000307,-0.014007,0.0,-1.0
1,2025-04-29 15:59:00,AMD,-4.4e-05,-0.006581,0.0,-1.0
2,2025-04-29 15:59:00,AVGO,-0.000359,-0.008008,0.0,-1.0
3,2025-04-29 15:59:00,MU,1.3e-05,-0.019444,1.0,1.0
4,2025-04-29 15:59:00,NVDA,-0.00032,-0.025525,0.0,-1.0


In [5]:
gbm_df.head()

Unnamed: 0,datetime,symbol,predicted_log_return,actual_log_return,weight_relative,weight_sign,pnl_sample_sign,pnl_sample_relative
0,2025-04-29 15:59:00,AMAT,-0.000121,-0.014007,0.0,-1.0,0.014007,-0.0
1,2025-04-29 15:59:00,AMD,-0.000194,-0.006581,0.0,-1.0,0.006581,-0.0
2,2025-04-29 15:59:00,AVGO,-1.9e-05,-0.008008,0.0,-1.0,0.008008,-0.0
3,2025-04-29 15:59:00,MU,0.000106,-0.019444,0.39797,1.0,-0.019444,-0.007738
4,2025-04-29 15:59:00,NVDA,0.00016,-0.025525,0.60203,1.0,-0.025525,-0.015367


In [6]:
lasso_df.head()

Unnamed: 0,datetime,symbol,predicted_log_return,actual_log_return,weight_relative,weight_sign
0,2025-04-29 15:59:00,AMAT,0.001567,-0.014007,0.155424,1.0
1,2025-04-29 15:59:00,AMD,0.004571,-0.006581,0.453262,1.0
2,2025-04-29 15:59:00,AVGO,0.002464,-0.008008,0.244344,1.0
3,2025-04-29 15:59:00,MU,0.00119,-0.019444,0.118016,1.0
4,2025-04-29 15:59:00,NVDA,0.000292,-0.025525,0.028953,1.0


In [7]:
trans_df.head()

Unnamed: 0,datetime,symbol,predicted_return,position,actual_log_return,weight_relative,weight_sign
0,2025-04-29 15:59:00,AMD,1.9e-05,0.019483,-0.006581,0.253978,1.0
1,2025-04-29 15:59:00,MU,1.8e-05,0.018026,-0.019444,0.234991,1.0
2,2025-04-29 15:59:00,NVDA,1.6e-05,0.015607,-0.025525,0.203454,1.0
3,2025-04-29 15:59:00,AVGO,1.4e-05,0.013713,-0.008008,0.178765,1.0
4,2025-04-29 15:59:00,AMAT,1e-05,0.009881,-0.014007,0.128811,1.0


In [10]:
trans_df.rename(columns={'predicted_return': 'predicted_log_return'}, inplace=True)

In [12]:
def prepare_model_df(df, model_name):
    return (
        df[['datetime', 'symbol', 'predicted_log_return', 'actual_log_return', 'weight_relative', 'weight_sign']]
        .assign(model=model_name)
    )

ridge_df_clean = prepare_model_df(ridge_df, 'ridge')
gbm_df_clean   = prepare_model_df(gbm_df, 'lightgbm')
lasso_df_clean = prepare_model_df(lasso_df, 'lasso')
trans_df_clean = prepare_model_df(trans_df, 'transformer')

result_df = pd.concat([ridge_df_clean, gbm_df_clean, lasso_df_clean, trans_df_clean], ignore_index=True)

result_df

Unnamed: 0,datetime,symbol,predicted_log_return,actual_log_return,weight_relative,weight_sign,model
0,2025-04-29 15:59:00,AMAT,-0.000307,-0.014007,0.000000,-1.0,ridge
1,2025-04-29 15:59:00,AMD,-0.000044,-0.006581,0.000000,-1.0,ridge
2,2025-04-29 15:59:00,AVGO,-0.000359,-0.008008,0.000000,-1.0,ridge
3,2025-04-29 15:59:00,MU,0.000013,-0.019444,1.000000,1.0,ridge
4,2025-04-29 15:59:00,NVDA,-0.000320,-0.025525,0.000000,-1.0,ridge
...,...,...,...,...,...,...,...
390441,2025-10-28 15:58:00,MU,0.000031,-0.000135,0.386497,1.0,transformer
390442,2025-10-28 15:58:00,AMD,0.000016,-0.001316,0.203232,1.0,transformer
390443,2025-10-28 15:58:00,NVDA,0.000015,-0.000025,0.185135,1.0,transformer
390444,2025-10-28 15:58:00,AVGO,0.000011,0.000375,0.133862,1.0,transformer


In [19]:
print(result_df['predicted_log_return'].describe())

count    390446.000000
mean         -0.000001
std           0.000517
min          -0.014446
25%          -0.000148
50%           0.000005
75%           0.000149
max           0.017298
Name: predicted_log_return, dtype: float64


In [27]:
# Annualization factor (minute level)
N = 252 * 390

# Annual SR
def sharpe_ratio(pnl, annualize=True):
    mu = pnl.mean()
    sigma = pnl.std(ddof=1)
    if sigma == 0:
        return 0.0
    sr = mu / sigma
    if annualize:
        sr *= np.sqrt(N)
    return sr

# Raw SR
def raw_sharpe(pnl):
    mu = pnl.mean()
    sigma = pnl.std(ddof=1)
    return 0.0 if sigma == 0 else mu / sigma

# pnl
result_df['pnl'] = result_df['predicted_log_return'] * result_df['weight_relative']

# aggregate by model and datetime
pnl_by_model = (
    result_df
    .groupby(['model', 'datetime'])['pnl']
    .sum()
    .reset_index()
)

# Raw & Annualized Sharpe
sharpes_raw = pnl_by_model.groupby('model')['pnl'].apply(raw_sharpe)
sharpes_ann = pnl_by_model.groupby('model')['pnl'].apply(lambda x: sharpe_ratio(x, annualize=True))

print("Raw Sharpe (used for weight calculation:)")
print(sharpes_raw.round(3))
print("\nAnnualized Sharpe:")
print(sharpes_ann.round(3))

Raw Sharpe (used for weight calculation:)
model
lasso          0.917
lightgbm       1.008
ridge          0.791
transformer    2.041
Name: pnl, dtype: float64

Annualized Sharpe:
model
lasso          287.614
lightgbm       316.070
ridge          248.014
transformer    639.723
Name: pnl, dtype: float64


In [31]:
model_weights = sharpes_raw.clip(lower=0)
model_weights = model_weights / model_weights.sum()

model_weights = model_weights.rename("weight")

weights_df = model_weights.rename("weight").reset_index() 
display(weights_df)

Unnamed: 0,model,weight
0,lasso,0.192846
1,lightgbm,0.211926
2,ridge,0.166294
3,transformer,0.428935


In [29]:
ensemble_pred = (
    result_df
    .pivot_table(index=['datetime', 'symbol'], columns='model', values='predicted_log_return')
    .mul(model_weights, axis=1)
    .sum(axis=1)
    .reset_index(name='ensemble_predicted_log_return')
)

ensemble_pred

Unnamed: 0,datetime,symbol,ensemble_predicted_log_return
0,2025-04-29 15:59:00,AMAT,0.000230
1,2025-04-29 15:59:00,AMD,0.000841
2,2025-04-29 15:59:00,AVGO,0.000417
3,2025-04-29 15:59:00,MU,0.000262
4,2025-04-29 15:59:00,NVDA,0.000044
...,...,...,...
99744,2025-10-28 15:58:00,AMAT,0.000185
99745,2025-10-28 15:58:00,AMD,0.000631
99746,2025-10-28 15:58:00,AVGO,0.000516
99747,2025-10-28 15:58:00,MU,0.000683


In [33]:
weights_df.to_csv("results/model_weights.csv", index=False)

ensemble_pred.to_csv("results/ensemble_predictions.csv", index=False)

print("Saved：")
print("1. model_weights.csv")
print("2. ensemble_predictions.csv")


Saved：
1. model_weights.csv
2. ensemble_predictions.csv
