The purpose of this notebook is to make dummy data that looks like what we expect from the model files

In [6]:
import pandas as pd
import numpy as np

# Load the input file
input_path = "outputs/spreads.csv"  # Replace with your actual path
data = pd.read_csv(input_path, parse_dates=["Date"])

# Simulate CHRONOBERT predicted spread by adding small noise
np.random.seed(42)  # For reproducibility
noise = np.random.normal(loc=0, scale=2, size=len(data))  # Change scale for more/less variation
data["CHRONOBERT Spread"] = data["Spread"] + noise

np.random.seed(120)  # For reproducibility
noise = np.random.normal(loc=0, scale=2, size=len(data))  # Change scale for more/less variation
data["BERT Spread"] = data["Spread"] + noise

np.random.seed(1)  # For reproducibility
noise = np.random.normal(loc=0, scale=2, size=len(data))  # Change scale for more/less variation
data["Traditional Spread"] = data["Spread"] + noise

# Determine CHRONOBERT Position: Long if predicted < original (expect spread to rise), else Short
data["CHRONOBERT Position"] = np.where(data["CHRONOBERT Spread"] < data["Spread"], "Long", "Short")
data["BERT Position"] = np.where(data["BERT Spread"] < data["Spread"], "Long", "Short")
data["Traditional Position"] = np.where(data["Traditional Spread"] < data["Spread"], "Long", "Short")

# Select and rename columns for output
data.set_index("Date", inplace=True)


chrono = data[["Ticker Pair", "CHRONOBERT Spread", "CHRONOBERT Position"]]
output_path = "outputs/chrono_dummy.csv"
chrono.to_csv(output_path, index=True)

bert = data[["Ticker Pair", "BERT Spread", "BERT Position"]]
output_path = "outputs/bert_dummy.csv"
bert.to_csv(output_path, index=True)

traditional = data[["Ticker Pair", "Traditional Spread", "Traditional Position"]]
output_path = "outputs/traditional_dummy.csv"
traditional.to_csv(output_path, index=True)