In [1]:
import pandas as pd
import numpy as np

spread_df = pd.read_csv("outputs/spreads.csv", parse_dates=["Date"])
spread_df.set_index("Date", inplace=True)

chrono_df = pd.read_csv("outputs/chrono_dummy.csv", parse_dates=["Date"])
chrono_df.set_index("Date", inplace=True)

bert_df = pd.read_csv("outputs/bert_dummy.csv", parse_dates=["Date"])
bert_df.set_index("Date", inplace=True)

traditional_df = pd.read_csv("outputs/traditional_dummy.csv", parse_dates=["Date"])
traditional_df.set_index("Date", inplace=True)


spread_df = spread_df.merge(chrono_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(bert_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(traditional_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')

print(spread_df.head(10))

           Ticker Pair    Spread    Return  CHRONOBERT Spread  \
Date                                                            
2019-02-28    AMD-NVDA -1.319545       NaN          -1.195366   
2019-03-31    AMD-NVDA -2.016619 -0.079436          -2.051185   
2019-04-30    AMD-NVDA -1.311970  0.074660          -1.150048   
2019-05-31    AMD-NVDA  1.456129  0.242786           1.836887   
2019-06-30    AMD-NVDA  0.568122 -0.104398           0.509584   
2019-07-31    AMD-NVDA  0.328065 -0.024705           0.269530   
2019-08-31    AMD-NVDA  0.707954  0.039029           1.102757   
2019-09-30    AMD-NVDA -0.474115 -0.117381          -0.282256   
2019-10-31    AMD-NVDA -0.341949  0.015581          -0.459318   
2019-11-30    AMD-NVDA  0.319481  0.074849           0.455121   

           CHRONOBERT Position  BERT Spread BERT Position  Traditional Spread  \
Date                                                                            
2019-02-28                 Buy    -1.245820           Buy

In [2]:
position_cols = ['CHRONOBERT Position', 'BERT Position', 'Traditional Position']

for col in position_cols:
    strat_return_col = f'{col.replace("Position", "Strategy Return")}'

    spread_df[strat_return_col] = np.where(
        spread_df.groupby('Ticker Pair')[col].shift(1)== 'Buy',
        spread_df['Return'],
        np.where(spread_df.groupby('Ticker Pair')[col].shift(1) == 'Sell', -spread_df['Return'], np.nan)
    )


In [23]:
spread_df['CHRONOBERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['CHRONOBERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['BERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['BERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['Traditional Cumulative Return'] =spread_df.groupby('Ticker Pair')['Traditional Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()

In [27]:
# Select the relevant columns for CHRONOBERT, BERT, and Traditional strategy returns
columns_to_save = [
    'Ticker Pair', 
    'CHRONOBERT Strategy Return', 'BERT Strategy Return', 'Traditional Strategy Return',
    'CHRONOBERT Cumulative Return', 'BERT Cumulative Return', 'Traditional Cumulative Return'
]

# Filter the dataframe to include only these columns
strategy_returns_df = spread_df[columns_to_save]

# Save to a CSV file
strategy_returns_df.to_csv('outputs/portfolios.csv', index=True)

# Optionally, print the dataframe to confirm the result
print(strategy_returns_df.head())


           Ticker Pair  CHRONOBERT Strategy Return  BERT Strategy Return  \
Date                                                                       
2019-02-28    AMD-NVDA                         NaN                   NaN   
2019-03-31    AMD-NVDA                   -0.079436             -0.079436   
2019-04-30    AMD-NVDA                    0.074660              0.074660   
2019-05-31    AMD-NVDA                    0.242786              0.242786   
2019-06-30    AMD-NVDA                    0.104398              0.104398   

            Traditional Strategy Return  CHRONOBERT Cumulative Return  \
Date                                                                    
2019-02-28                          NaN                      1.000000   
2019-03-31                     0.079436                      1.052117   
2019-04-30                     0.074660                      0.970518   
2019-05-31                     0.242786                      1.042730   
2019-06-30                   

In [15]:
from sklearn.metrics import r2_score, mean_squared_error

y_true = spread_df["Spread"]
y_pred = spread_df["CHRONOBERT Spread"]

# R² and MSE
r2 = r2_score(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")


R²: 0.9402
MSE: 0.0558
