In [3]:
from config import WINDOW_SIZE
from data.loader import load_price_data, load_carhart_factors
from features.engineering import compute_log_returns
from models.regression import fit_linear_model, evaluate_model
from evaluation.metrics import compute_sharpe
from strategy.value import value_strategy_predictor

In [4]:
import numpy as np
import pandas as pd

In [7]:
# Load data
data_dir = "data/data_full"
price_df = load_price_data(data_dir)
tickers = list(price_df.columns)
prices = price_df[tickers]

In [8]:
# Compute returns
log_ret = compute_log_returns(prices)
ff3, mom, carhart = load_carhart_factors(data_dir, log_ret.index)

In [9]:
# Split train/test
cutoff = "2019-08-31"
train_ret = log_ret[log_ret.index <= cutoff]
test_ret = log_ret[log_ret.index > cutoff]
train_ch = carhart[carhart.index <= cutoff]
test_ch = carhart[carhart.index > cutoff]

In [10]:
# Train model
X_train = np.array(train_ch)[:-1, :]
y_train = np.array(train_ret)[1:, :]
X_test = np.array(test_ch)[:-1, :]
y_test = np.array(test_ret)[1:, :]

In [11]:
model = fit_linear_model(X_train, y_train)
results = evaluate_model(model, X_test, y_test)

In [12]:
print("Test R^2:", results['r2'])
print("Test MSE:", results['mse'])

Test R^2: 0.00015956321115649414
Test MSE: 39.29556652783605


In [13]:
# Strategy simulation
factors = pd.concat([log_ret, carhart], axis=1)
n_titles = int(0.05 * len(tickers))

In [14]:
def linear_predictor(inputs):
    return model.predict(inputs[[-1], :])[0]

In [15]:
values = value_strategy_predictor(factors, prices, tickers, linear_predictor,
                                  horizon=WINDOW_SIZE, frequency=20, n_titles=n_titles,
                                  shift=len(y_train))

ValueError: X has 449 features, but LinearRegression is expecting 5 features as input.

In [None]:
sharpe = compute_sharpe(values)
print("Strategy Sharpe Ratio:", sharpe)