In [31]:
import pandas as pd
import numpy as np

spread_df = pd.read_csv("outputs/spreads_testing.csv", parse_dates=["Date"])
spread_df.set_index("Date", inplace=True)

#Load Predictions
chrono_df = pd.read_csv("outputs/CHRONOBERT_spreads_weekly.csv", parse_dates=["Date"])
chrono_df.set_index("Date", inplace=True)
bert_df = pd.read_csv("outputs/bert_spread.csv", parse_dates=["Date"])
bert_df.set_index("Date", inplace=True)
bert_df.rename(columns={"Bert Spread": "BERT Spread", "Bert Position": "Bert Position", "Ticker_Pair":"Ticker Pair"}, inplace=True)
traditional_df = pd.read_csv("outputs/Traditional Spreads weekly Return.csv", parse_dates=["Date"])
traditional_df.set_index("Date", inplace=True)
traditional_df.rename(columns={"Ticker Pair": "Ticker Pair", "Traditional_Spread": "Traditional Spread"}, inplace=True)

# Merge them together
spread_df = spread_df.merge(chrono_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(bert_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(traditional_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')

# Create Positions
spread_df["CHRONOBERT Position"] = np.where(spread_df["CHRONOBERT Spread"] < 0, "Buy", "Sell")
spread_df["BERT Position"] = np.where(spread_df["BERT Spread"] < 0, "Buy", "Sell")
spread_df["Traditional Position"] = np.where(spread_df["Traditional Spread"] < 0, "Buy", "Sell")

position_cols = ['CHRONOBERT Position', 'BERT Position', 'Traditional Position']

# Generate Returns
for col in position_cols:
    strat_return_col = f'{col.replace("Position", "Strategy Return")}'

    spread_df[strat_return_col] = np.where(
        spread_df.groupby('Ticker Pair')[col].shift(1)== 'Buy',
        spread_df['Return'],
        np.where(spread_df.groupby('Ticker Pair')[col].shift(1) == 'Sell', -spread_df['Return'], np.nan)
    )
    
spread_df['CHRONOBERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['CHRONOBERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['BERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['BERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['Traditional Cumulative Return'] =spread_df.groupby('Ticker Pair')['Traditional Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()

# Select the relevant columns for CHRONOBERT, BERT, and Traditional strategy returns
columns_to_save = [
    'Ticker Pair', 
    'CHRONOBERT Strategy Return', 'BERT Strategy Return', 'Traditional Strategy Return',
    'CHRONOBERT Cumulative Return', 'BERT Cumulative Return', 'Traditional Cumulative Return'
]

# Filter the dataframe to include only these columns
strategy_returns_df = spread_df[columns_to_save]

In [35]:
from sklearn.metrics import r2_score, mean_squared_error

# Get Avg Returns, R^2, and MSE
y_true = spread_df["Spread"]

final_returns_per_pair = strategy_returns_df.groupby("Ticker Pair").tail(1)


avg_returns = {
    "CHRONOBERT": final_returns_per_pair["CHRONOBERT Cumulative Return"].mean(),
    "BERT": final_returns_per_pair["BERT Cumulative Return"].mean(),
    "Traditional": final_returns_per_pair["Traditional Cumulative Return"].mean(),
}

results = {
    "Model": ["CHRONOBERT", "BERT", "Traditional"],
    "MSE": [
        mean_squared_error(y_true, spread_df["CHRONOBERT Spread"]),
        mean_squared_error(y_true, spread_df["BERT Spread"]),
        mean_squared_error(y_true, spread_df["Traditional Spread"]),
    ],
    "R2": [
        r2_score(y_true, spread_df["CHRONOBERT Spread"]),
        r2_score(y_true, spread_df["BERT Spread"]),
        r2_score(y_true, spread_df["Traditional Spread"]),
    ],
    "Return": [
        avg_returns["CHRONOBERT"],
        avg_returns["BERT"],
        avg_returns["Traditional"],
    ]
}

metrics_df = pd.DataFrame(results)

print(metrics_df)



         Model       MSE        R2    Return
0   CHRONOBERT  2.972381 -2.150949  0.950503
1         BERT  0.972601 -0.031031  0.963807
2  Traditional  0.914074  0.031012  1.009966
