In [9]:
import pandas as pd
import numpy as np

spread_df = pd.read_csv("outputs/spreads_testing.csv", parse_dates=["Date"])
spread_df.set_index("Date", inplace=True)

chrono_df = pd.read_csv("outputs/chrono_dummy.csv", parse_dates=["Date"])
chrono_df.set_index("Date", inplace=True)

bert_df = pd.read_csv("outputs/bert_dummy.csv", parse_dates=["Date"])
bert_df.set_index("Date", inplace=True)

traditional_df = pd.read_csv("outputs/traditional_dummy.csv", parse_dates=["Date"])
traditional_df.set_index("Date", inplace=True)


spread_df = spread_df.merge(chrono_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(bert_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(traditional_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')

print(spread_df.head(10))

           Ticker Pair    Spread    Return  CHRONOBERT Spread  \
Date                                                            
2019-01-06   AAPL-MSFT -1.163671       NaN          -1.039492   
2019-01-13   AAPL-MSFT -0.914226  0.018647          -0.948792   
2019-01-20   AAPL-MSFT -1.150394 -0.018017          -0.988472   
2019-01-27   AAPL-MSFT -1.000587  0.011008          -0.619830   
2019-02-03   AAPL-MSFT  0.304717  0.096490           0.246179   
2019-02-10   AAPL-MSFT  0.299832 -0.000369           0.241298   
2019-02-17   AAPL-MSFT -0.024045 -0.024073           0.370758   
2019-02-24   AAPL-MSFT -0.221516 -0.014827          -0.029657   
2019-03-03   AAPL-MSFT -0.255065 -0.002496          -0.372434   
2019-03-10   AAPL-MSFT -0.169683  0.006178          -0.034043   

           CHRONOBERT Position  BERT Spread BERT Position  Traditional Spread  \
Date                                                                            
2019-01-06                 Buy    -1.089946           Buy

In [10]:
position_cols = ['CHRONOBERT Position', 'BERT Position', 'Traditional Position']

for col in position_cols:
    strat_return_col = f'{col.replace("Position", "Strategy Return")}'

    spread_df[strat_return_col] = np.where(
        spread_df.groupby('Ticker Pair')[col].shift(1)== 'Buy',
        spread_df['Return'],
        np.where(spread_df.groupby('Ticker Pair')[col].shift(1) == 'Sell', -spread_df['Return'], np.nan)
    )


In [11]:
spread_df['CHRONOBERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['CHRONOBERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['BERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['BERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['Traditional Cumulative Return'] =spread_df.groupby('Ticker Pair')['Traditional Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()

In [12]:
# Select the relevant columns for CHRONOBERT, BERT, and Traditional strategy returns
columns_to_save = [
    'Ticker Pair', 
    'CHRONOBERT Strategy Return', 'BERT Strategy Return', 'Traditional Strategy Return',
    'CHRONOBERT Cumulative Return', 'BERT Cumulative Return', 'Traditional Cumulative Return'
]

# Filter the dataframe to include only these columns
strategy_returns_df = spread_df[columns_to_save]

# Save to a CSV file
strategy_returns_df.to_csv('outputs/portfolios.csv', index=True)

# Optionally, print the dataframe to confirm the result
print(strategy_returns_df.head(50))


           Ticker Pair  CHRONOBERT Strategy Return  BERT Strategy Return  \
Date                                                                       
2019-01-06   AAPL-MSFT                         NaN                   NaN   
2019-01-13   AAPL-MSFT                    0.018647              0.018647   
2019-01-20   AAPL-MSFT                   -0.018017              0.018017   
2019-01-27   AAPL-MSFT                    0.011008              0.011008   
2019-02-03   AAPL-MSFT                    0.096490             -0.096490   
2019-02-10   AAPL-MSFT                    0.000369              0.000369   
2019-02-17   AAPL-MSFT                    0.024073              0.024073   
2019-02-24   AAPL-MSFT                    0.014827             -0.014827   
2019-03-03   AAPL-MSFT                   -0.002496             -0.002496   
2019-03-10   AAPL-MSFT                    0.006178              0.006178   
2019-03-17   AAPL-MSFT                    0.027534              0.027534   
2019-03-24  

In [13]:
from sklearn.metrics import r2_score, mean_squared_error

y_true = spread_df["Spread"]
y_pred = spread_df["CHRONOBERT Spread"]

# R² and MSE
r2 = r2_score(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")


R²: 0.9395
MSE: 0.0593
