In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ticker_download_manager import TickerDownloadManager

## Assemble dataframe of predictors

In [2]:
predictor_tickers = ["I:SPX", "I:DJI", "I:COMP", "I:NDX"]
tdm_predictors_path = os.path.join("input", "annual_predictors")
tdm_predictors = TickerDownloadManager(download_folder_name=tdm_predictors_path, tickers=predictor_tickers)
long_predictors_df, _, _ = tdm_predictors.get_latest_tickers(days_in_past=252, use_cache=True)
target_tickers = ["GILD", "SBUX", "NVDA"]
tdm_targets_path = os.path.join("input", "annual_targets")
tdm_responses = TickerDownloadManager(download_folder_name=tdm_targets_path, tickers=target_tickers)
long_targets_df, _, _ = tdm_responses.get_latest_tickers(days_in_past=252, use_cache=True)

In [3]:
long_predictors_df

Unnamed: 0,datetime,ticker,open,high,low,close,volume,vwap,transactions
0,2024-05-28 23:59:59,I:SPX,5315.910000,5315.910000,5280.890000,5306.040000,,,
1,2024-05-28 23:59:59,I:NDX,18855.150159,18875.197139,18755.944478,18869.440037,,,
2,2024-05-28 23:59:59,I:COMP,16988.314759,17032.655407,16917.511581,17019.880141,,,
3,2024-05-28 23:59:59,I:DJI,39028.990000,39028.990000,38706.140000,38852.860000,,,
4,2024-05-29 23:59:59,I:NDX,18708.073630,18814.815205,18706.624582,18736.754522,,,
...,...,...,...,...,...,...,...,...,...
1004,2025-05-28 23:59:59,I:SPX,5925.540000,5939.920000,5881.880000,5888.550000,,,
1005,2025-05-29 23:59:59,I:DJI,42190.020000,42266.000000,41828.350000,42215.730000,,,
1006,2025-05-29 23:59:59,I:COMP,19389.391805,19389.391805,19091.542474,19175.872041,,,
1007,2025-05-29 23:59:59,I:SPX,5939.960000,5943.130000,5873.800000,5912.170000,,,


In [4]:
long_targets_df

Unnamed: 0,datetime,ticker,open,high,low,close,volume,vwap,transactions
0,2024-05-28 23:59:59,GILD,65.310,65.5515,63.740,63.940,5593404.0,64.2810,73383
1,2024-05-28 23:59:59,SBUX,78.790,78.8000,77.410,77.480,8833325.0,78.0983,125897
2,2024-05-28 23:59:59,NVDA,110.244,114.9390,109.883,113.901,652354890.0,112.6298,1706145
3,2024-05-29 23:59:59,GILD,63.500,63.9100,63.100,63.500,5609571.0,63.4564,57419
4,2024-05-29 23:59:59,SBUX,77.100,78.3200,76.830,76.880,9268066.0,77.2934,126584
...,...,...,...,...,...,...,...,...,...
751,2025-05-28 23:59:59,GILD,109.115,109.4500,107.230,108.440,4598301.0,108.3785,72433
752,2025-05-28 23:59:59,SBUX,87.430,87.5950,84.540,86.000,12328514.0,86.0036,147622
753,2025-05-29 23:59:59,SBUX,84.900,85.7007,83.699,84.050,10310709.0,84.4479,128697
754,2025-05-29 23:59:59,GILD,108.590,111.2400,108.120,111.110,5596890.0,110.6356,82431


In [5]:
long_predictors_df["datetime"] = pd.to_datetime(long_predictors_df["datetime"])
long_predictors_df.sort_values(by=["datetime", "ticker"], inplace=True)
long_predictors_df.set_index("datetime", inplace=True)
wide_predictors_df = long_predictors_df[["ticker", "close"]].pivot(columns="ticker", values="close")
wide_predictors_df

ticker,I:COMP,I:DJI,I:NDX,I:SPX
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-05-28 23:59:59,17019.880141,38852.86,18869.440037,5306.04
2024-05-29 23:59:59,16920.580419,38441.54,18736.754522,5266.95
2024-05-30 23:59:59,16737.079148,38111.48,18538.662594,5235.48
2024-05-31 23:59:59,16735.015277,38686.32,18536.650840,5277.51
2024-06-03 23:59:59,16828.669938,38571.03,18600.974302,5283.40
...,...,...,...,...
2025-05-22 23:59:59,18925.734941,41859.09,21112.471220,5842.01
2025-05-23 23:59:59,18737.207177,41603.07,20915.654860,5802.82
2025-05-27 23:59:59,19199.163247,42343.65,21414.990676,5921.54
2025-05-28 23:59:59,19100.937701,42098.70,21318.168484,5888.55


In [6]:
long_targets_df["datetime"] = pd.to_datetime(long_targets_df["datetime"])
long_targets_df.sort_values(by=["datetime", "ticker"], inplace=True)
long_targets_df.set_index("datetime", inplace=True)
wide_targets_df = long_targets_df[["ticker", "close"]].pivot(columns="ticker", values="close")
wide_targets_df

ticker,GILD,NVDA,SBUX
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-05-28 23:59:59,63.94,113.901,77.48
2024-05-29 23:59:59,63.50,114.825,76.88
2024-05-30 23:59:59,64.08,110.500,78.76
2024-05-31 23:59:59,64.27,109.633,80.22
2024-06-03 23:59:59,63.43,115.000,82.08
...,...,...,...
2025-05-22 23:59:59,106.74,132.830,83.97
2025-05-23 23:59:59,107.37,131.290,84.40
2025-05-27 23:59:59,109.09,135.500,87.01
2025-05-28 23:59:59,108.44,134.810,86.00
