In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error, r2_score
from ticker_download_manager import TickerDownloadManager

## Assemble dataframes of predictors and targets

In [None]:
predictor_tickers = ["I:SPX", "I:DJI", "I:COMP", "I:NDX"]
tdm_predictors_path = os.path.join("input", "annual_predictors")
tdm_predictors = TickerDownloadManager(download_folder_name=tdm_predictors_path, tickers=predictor_tickers)
long_predictors_df, _, _ = tdm_predictors.get_latest_tickers(days_in_past=252, use_cache=True)
target_tickers = ["GILD", "SBUX", "NVDA"]
tdm_targets_path = os.path.join("input", "annual_targets")
tdm_responses = TickerDownloadManager(download_folder_name=tdm_targets_path, tickers=target_tickers)
long_targets_df, _, _ = tdm_responses.get_latest_tickers(days_in_past=252, use_cache=True)

In [None]:
long_predictors_df

In [None]:
long_targets_df

In [None]:
long_predictors_df["datetime"] = pd.to_datetime(long_predictors_df["datetime"])
long_predictors_df.sort_values(by=["datetime", "ticker"], inplace=True)
long_predictors_df.set_index("datetime", inplace=True)
wide_predictors_df = long_predictors_df[["ticker", "close"]].pivot(columns="ticker", values="close")
wide_predictors_df

In [None]:
long_targets_df["datetime"] = pd.to_datetime(long_targets_df["datetime"])
long_targets_df.sort_values(by=["datetime", "ticker"], inplace=True)
long_targets_df.set_index("datetime", inplace=True)
wide_targets_df = long_targets_df[["ticker", "close"]].pivot(columns="ticker", values="close")
wide_targets_df

## Convert everything to log returns

In [None]:
predictors_log_returns_df = np.log(wide_predictors_df / wide_predictors_df.shift(1))[1:]
predictors_log_returns_df

In [None]:
target_log_returns_df = np.log(wide_targets_df / wide_targets_df.shift(1))[1:]
target_log_returns_df

## Train regressions on every target using all predictors

### Explanatory predicting same-day close

In [None]:
n_train = 220
for target_ticker in target_log_returns_df.columns:
    print(f"********** Modeling {target_ticker} *********")
    np.random.seed(123)
    Xy = predictors_log_returns_df.merge(target_log_returns_df[target_ticker], left_index=True, right_index=True).dropna()
    X = Xy[predictor_tickers]
    y = Xy[target_ticker]
    X_train = X.iloc[:n_train, :]
    y_train = y[:n_train]
    X_test = X.iloc[n_train:, :]
    y_test = y[n_train:]
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = root_mean_squared_error(y_test, y_pred)
    rmse_pct = (np.exp(rmse) - 1) * 100
    r2 = r2_score(y_test, y_pred)
    print(f"RMSE: {rmse:.4f} ({rmse_pct:.4f}%)")
    print(f"R2: {r2:.4f}")
    print(f"Coeffcients: {list(zip(wide_predictors_df.columns, model.coef_))}")
    print(f"Intercept: {model.intercept_:.4f}")
    

### Predicting *next* day's close

First explore the necessary DataFrame shift.

In [None]:
shift_me = pd.DataFrame({"col0": [1, 2, 3, 4], "col1": [1, 2, 3, 4]})
shift_me["col1"] = shift_me["col1"].shift(-1)
shift_me

Then perform the modeling. It isn't a huge code change

In [None]:
n_train = 220
for target_ticker in target_log_returns_df.columns:
    print(f"********** Modeling {target_ticker} *********")
    np.random.seed(123)
    Xy = predictors_log_returns_df.merge(target_log_returns_df[target_ticker], left_index=True, right_index=True)
    Xy[target_ticker] = Xy[target_ticker].shift(-1)
    Xy.dropna(inplace=True)
    X = Xy[predictor_tickers]
    y = Xy[target_ticker]
    X_train = X.iloc[:n_train, :]
    y_train = y[:n_train]
    X_test = X.iloc[n_train:, :]
    y_test = y[n_train:]
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = root_mean_squared_error(y_test, y_pred)
    rmse_pct = (np.exp(rmse) - 1) * 100
    r2 = r2_score(y_test, y_pred)
    print(f"RMSE: {rmse:.4f} ({rmse_pct:.4f}%)")
    print(f"R2: {r2:.4f}")
    print(f"Coeffcients: {list(zip(wide_predictors_df.columns, model.coef_))}")
    print(f"Intercept: {model.intercept_:.4f}")