In [15]:
import pandas as pd
import numpy as np

spread_df = pd.read_csv("outputs/spreads_testing.csv", parse_dates=["Date"])
spread_df.set_index("Date", inplace=True)

chrono_df = pd.read_csv("outputs/CHRONOBERT_spreads_weekly.csv", parse_dates=["Date"])
chrono_df.set_index("Date", inplace=True)
bert_df = pd.read_csv("outputs/bert_spread.csv", parse_dates=["Date"])
bert_df.set_index("Date", inplace=True)
bert_df.rename(columns={"Bert Spread": "BERT Spread", "Bert Position": "Bert Position", "Ticker_Pair":"Ticker Pair"}, inplace=True)
traditional_df = pd.read_csv("outputs/Traditional Spreads weekly Return.csv", parse_dates=["Date"])
traditional_df.set_index("Date", inplace=True)
traditional_df.rename(columns={"Ticker Pair": "Ticker Pair", "Traditional_Spread": "Traditional Spread"}, inplace=True)


spread_df = spread_df.merge(chrono_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(bert_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')
spread_df = spread_df.merge(traditional_df, how="inner", on=["Date", "Ticker Pair"], validate='one_to_one')

spread_df["CHRONOBERT Position"] = np.where(spread_df["CHRONOBERT Spread"] < 0, "Buy", "Sell")
spread_df["BERT Position"] = np.where(spread_df["BERT Spread"] < 0, "Buy", "Sell")
spread_df["Traditional Position"] = np.where(spread_df["Traditional Spread"] < 0, "Buy", "Sell")

print(spread_df.head(10))

           Ticker Pair    Spread    Return  CHRONOBERT Spread  Unnamed: 0  \
Date                                                                        
2019-01-13   AAPL-MSFT -0.914226  0.018647           0.782743          10   
2019-01-20   AAPL-MSFT -1.150394 -0.018017           0.734276          20   
2019-01-27   AAPL-MSFT -1.000587  0.011008           0.756310          30   
2019-02-03   AAPL-MSFT  0.304717  0.096490           0.791206          40   
2019-02-10   AAPL-MSFT  0.299832 -0.000369           0.989668          50   
2019-02-17   AAPL-MSFT -0.024045 -0.024073           0.761414          60   
2019-02-24   AAPL-MSFT -0.221516 -0.014827           0.798999          70   
2019-03-03   AAPL-MSFT -0.255065 -0.002496           0.684607          80   
2019-03-10   AAPL-MSFT -0.169683  0.006178           0.855320          90   
2019-03-17   AAPL-MSFT  0.183140  0.027534           0.648276         100   

            BERT Spread Bert Position  Traditional Spread CHRONOBERT Positi

In [16]:
position_cols = ['CHRONOBERT Position', 'BERT Position', 'Traditional Position']

for col in position_cols:
    strat_return_col = f'{col.replace("Position", "Strategy Return")}'

    spread_df[strat_return_col] = np.where(
        spread_df.groupby('Ticker Pair')[col].shift(1)== 'Buy',
        spread_df['Return'],
        np.where(spread_df.groupby('Ticker Pair')[col].shift(1) == 'Sell', -spread_df['Return'], np.nan)
    )


In [17]:
spread_df['CHRONOBERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['CHRONOBERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['BERT Cumulative Return'] =spread_df.groupby('Ticker Pair')['BERT Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()
spread_df['Traditional Cumulative Return'] =spread_df.groupby('Ticker Pair')['Traditional Strategy Return'].apply(lambda x: (1 + x.fillna(0)).cumprod()).tolist()

In [18]:
# Select the relevant columns for CHRONOBERT, BERT, and Traditional strategy returns
columns_to_save = [
    'Ticker Pair', 
    'CHRONOBERT Strategy Return', 'BERT Strategy Return', 'Traditional Strategy Return',
    'CHRONOBERT Cumulative Return', 'BERT Cumulative Return', 'Traditional Cumulative Return'
]

# Filter the dataframe to include only these columns
strategy_returns_df = spread_df[columns_to_save]

# Save to a CSV file
strategy_returns_df.to_csv('outputs/portfolios.csv', index=True)

# Optionally, print the dataframe to confirm the result
print(strategy_returns_df.head(50))


           Ticker Pair  CHRONOBERT Strategy Return  BERT Strategy Return  \
Date                                                                       
2019-01-13   AAPL-MSFT                         NaN                   NaN   
2019-01-20   AAPL-MSFT                    0.018017              0.018017   
2019-01-27   AAPL-MSFT                   -0.011008             -0.011008   
2019-02-03   AAPL-MSFT                   -0.096490             -0.096490   
2019-02-10   AAPL-MSFT                    0.000369              0.000369   
2019-02-17   AAPL-MSFT                    0.024073              0.024073   
2019-02-24   AAPL-MSFT                    0.014827              0.014827   
2019-03-03   AAPL-MSFT                    0.002496              0.002496   
2019-03-10   AAPL-MSFT                   -0.006178             -0.006178   
2019-03-17   AAPL-MSFT                   -0.027534             -0.027534   
2019-03-24   AAPL-MSFT                   -0.016653             -0.016653   
2019-03-31  

In [19]:
from sklearn.metrics import r2_score, mean_squared_error

y_true = spread_df["Spread"]

final_returns_per_pair = strategy_returns_df.groupby("Ticker Pair").tail(1)


avg_returns = {
    "CHRONOBERT": final_returns_per_pair["CHRONOBERT Cumulative Return"].mean(),
    "BERT": final_returns_per_pair["BERT Cumulative Return"].mean(),
    "Traditional": final_returns_per_pair["Traditional Cumulative Return"].mean(),
}

results = {
    "Model": ["CHRONOBERT", "BERT", "Traditional"],
    "MSE": [
        mean_squared_error(y_true, spread_df["CHRONOBERT Spread"]),
        mean_squared_error(y_true, spread_df["BERT Spread"]),
        mean_squared_error(y_true, spread_df["Traditional Spread"]),
    ],
    "R2": [
        r2_score(y_true, spread_df["CHRONOBERT Spread"]),
        r2_score(y_true, spread_df["BERT Spread"]),
        r2_score(y_true, spread_df["Traditional Spread"]),
    ],
    "Return": [
        avg_returns["CHRONOBERT"],
        avg_returns["BERT"],
        avg_returns["Traditional"],
    ]
}

metrics_df = pd.DataFrame(results)
metrics_df.to_csv("outputs/overall_results.csv", index=False)

