In [1]:
import os
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
from pysr import PySRRegressor

import warnings
warnings.filterwarnings("ignore")

start_date = "2010-01-01"
end_date = "2020-01-01"

In [2]:
# Read the macro factors from paper "A Comprehensive Look at The Empirical Performance of Equity Premium Prediction"
# Source: https://sites.google.com/view/agoyal145
# Source: https://docs.google.com/spreadsheets/d/1g4LOaRj4TvwJr9RIaA_nwrXXWTOy46bP/edit#gid=2070662242

factors_annual = pd.read_excel("data_clean/macro_factors.xlsx", sheet_name = "Annual", index_col = 0)
factors_monthly = pd.read_excel("data_clean/macro_factors.xlsx", sheet_name = "Monthly", index_col = 0)

factors_annual.index = pd.to_datetime(factors_annual.index, format='%Y')
factors_monthly.index = pd.to_datetime(factors_monthly.index, format='%Y%m')

factors_annual = factors_annual.iloc[(factors_annual.index >= start_date) & (factors_annual.index < end_date), 3:].dropna(axis=1)
factors_monthly = factors_monthly.iloc[(factors_monthly.index >= start_date) & (factors_monthly.index < end_date), 3:].dropna(axis=1)

In [3]:
# Read the portfolio weights long-format produced by main_1_get_weight.ipynb
file_names = os.listdir('portfolio_weights')
file_names = [file for file in file_names if os.path.isfile(os.path.join('portfolio_weights', file))]
file_names_annual = [string for string in file_names if "annual" in string]
file_names_monthly = [string for string in file_names if "month" in string]

names_annual = [file_names_annual.replace('weight_annual_', '').replace('.csv', '') for file_names_annual in file_names_annual]
names_monthly = [file_names_monthly.replace('weight_monthly_', '').replace('.csv', '') for file_names_monthly in file_names_monthly]

weights_annual = {}
for i in range(len(file_names_annual)):
    file = file_names_annual[i]
    strategy_name = names_annual[i]
    tmp_weight = pd.read_csv("portfolio_weights/" + file)
    tmp_weight["Date"] = pd.to_datetime(tmp_weight["Date"])
    tmp_weight = tmp_weight.pivot(index = "Date", columns = "Ticker", values = "Weight")[::252]
    weights_annual[strategy_name] = tmp_weight
    
weights_monthly = {}
for i in range(len(file_names_monthly)):
    file = file_names_monthly[i]
    strategy_name = names_monthly[i]
    tmp_weight = pd.read_csv("portfolio_weights/" + file)
    tmp_weight["Date"] = pd.to_datetime(tmp_weight["Date"])
    tmp_weight = tmp_weight.pivot(index = "Date", columns = "Ticker", values = "Weight")[::21]
    weights_monthly[strategy_name] = tmp_weight

# Symbolic Regression on Annual Weight

In [19]:
x = factors_annual
y = weights_annual["sae3CNNlstm"]

# # Note: there is some randomness; thus using from_file
# model = PySRRegressor(
#     niterations=40,  # < Increase me for better results
#     binary_operators=["+", "-", "*", "/"],
#     extra_sympy_mappings={"inv": lambda x: 1 / x},
#     loss="loss(prediction, target) = (prediction - target)^2",
#     progress=False
# )

# model.fit(x, y)

In [46]:
model = PySRRegressor.from_file("pysr_result.pkl")
model

Checking if pysr_result.pkl exists...
Loading model from pysr_result.pkl


In [58]:
for i in range(len(model.equations)):
    best = model.get_best()[i]["equation"]
    print(f"{y.columns[i]} = " + best)

AGG = (CRSP_SPvwx - (((svar / (eqis + ntis)) + cay) * -1.865049))
DBC = ((corpr + eqis) - (ltr * (corpr * 4.225352)))
VIX = (((((cay / (-0.47917607 + ik)) + ntis) + ((eqis - lty) - svar)) - AAA) / 0.63616264)
VTI = ((0.022022313 - (svar * ((0.021675795 / (BAA - Rfree)) - BAA))) / AAA)


In [42]:
# error from pySR: (error is pretty small)
y_hat = model.predict(x)
y - y_hat

Checking if pysr_result.pkl exists...
Loading model from pysr_result.pkl


Ticker,AGG,DBC,VIX,VTI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-12-29,-0.011566,0.008247,-0.016714,-0.018699
2011-12-28,0.032672,-0.028976,0.000152,0.00217
2012-12-27,-0.187323,0.159491,-0.00986,-0.06276
2013-12-26,0.034241,0.029307,-0.009217,0.023094
2014-12-26,-0.011823,0.019298,-0.00495,0.071462
2015-12-24,0.040172,-0.06479,-0.002006,0.032159
2016-12-23,0.062844,-0.094228,0.002142,-0.06074
2017-12-26,-0.027377,-0.037739,-0.007485,0.000324
2018-12-24,0.022045,0.022171,0.007524,0.010868
2019-12-23,0.009356,0.001061,0.009294,0.007615
