# Now set up RL training

In [107]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline

# from finrl.agents.portfolio_optimization.architectures import EIIE

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint

# import sys
# sys.path.append("../FinRL-Library")

import itertools

In [108]:
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

In [112]:

# Need to start 5 years before the train start day of the model
TRAIN_START_DATE = '2004-04-01'
# TRAIN_START_DATE = '2022-01-01'

TRAIN_END_DATE = '2020-12-31'
TEST_START_DATE = '2021-01-01'
TEST_END_DATE = '2024-11-14'

from finrl.config_tickers import DOW_30_TICKER

# TODO Drop the DOW stock
value_to_remove = "DOW"
# Create a new list without the specified string
DOW_30_TICKER = [x for x in DOW_30_TICKER if x != value_to_remove]
print(DOW_30_TICKER)

import yfinance as yf
import pandas as pd

tickers = DOW_30_TICKER
tickers.append("^DJI")



# Download Dow Jones data
raw_df = yf.download(" ".join(tickers), start=TRAIN_START_DATE, end=TEST_END_DATE, interval='1mo', group_by = "ticker",)

# Display the downloaded data
print(raw_df)


[                       0%                       ]

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS']


[*********************100%***********************]  30 of 30 completed

Ticker                             ^DJI                              \
Price                              Open          High           Low   
Date                                                                  
2004-04-01 00:00:00+00:00  10357.519531  10570.809570  10219.179688   
2004-05-01 00:00:00+00:00  10227.269531  10386.320312   9852.190430   
2004-06-01 00:00:00+00:00  10187.179688  10498.669922  10134.860352   
2004-07-01 00:00:00+00:00  10434.000000  10448.089844   9913.919922   
2004-08-01 00:00:00+00:00  10138.450195  10211.250000   9783.910156   
...                                 ...           ...           ...   
2024-07-01 00:00:00+00:00  39186.199219  41376.000000  39037.941406   
2024-08-01 00:00:00+00:00  40916.960938  41585.210938  38499.269531   
2024-09-01 00:00:00+00:00  41489.671875  42628.320312  39993.070312   
2024-10-01 00:00:00+00:00  42262.968750  43325.089844  41704.628906   
2024-11-01 00:00:00+00:00  41869.820312  44486.699219  41647.300781   

Ticke




In [113]:
market = raw_df[["^DJI"]].dropna().reset_index().set_index("Date")
tickers.remove("^DJI")

In [114]:
import statsmodels.api as sm

start_loop = pd.to_datetime(TRAIN_START_DATE) + pd.DateOffset(years=5)

date_range_month = pd.date_range(start=start_loop, end=TEST_END_DATE, freq='MS')


results_df = pd.DataFrame(columns=['date', 'ticker', 'beta'])

for date in date_range_month:

    # Start and end date for regression
    beta_start = (date - pd.DateOffset(years=5)).tz_localize('UTC')
    beta_end = date.tz_localize('UTC')

    print(beta_start)
    print(beta_end)

    for ticker in DOW_30_TICKER:

        stock_data = raw_df[ticker].dropna().reset_index().set_index("Date")
        
        filtered_stock_data = stock_data[(stock_data.index >= beta_start) & (stock_data.index <= beta_end)]
        filtered_market = market[(market.index >= beta_start) & (market.index <= beta_end)]

        monthly_prices = pd.concat([filtered_stock_data["Adj Close"], filtered_market["^DJI"]["Adj Close"]], axis=1)
        monthly_prices.columns = [ticker, "market"]
        monthly_returns = monthly_prices.pct_change(1).dropna(axis=0)

        X = monthly_returns["market"].values
        y = monthly_returns[ticker].values

        X = sm.add_constant(X)

        model = sm.OLS(y, X)

        results = model.fit()
        new_row = pd.DataFrame({'date': [date], 'ticker': [ticker], 'beta': [results.params[1]]})
        results_df = pd.concat([results_df, new_row], ignore_index=True)

2004-04-01 00:00:00+00:00
2009-04-01 00:00:00+00:00
2004-05-01 00:00:00+00:00
2009-05-01 00:00:00+00:00
2004-06-01 00:00:00+00:00
2009-06-01 00:00:00+00:00
2004-07-01 00:00:00+00:00
2009-07-01 00:00:00+00:00
2004-08-01 00:00:00+00:00
2009-08-01 00:00:00+00:00
2004-09-01 00:00:00+00:00
2009-09-01 00:00:00+00:00
2004-10-01 00:00:00+00:00
2009-10-01 00:00:00+00:00
2004-11-01 00:00:00+00:00
2009-11-01 00:00:00+00:00
2004-12-01 00:00:00+00:00
2009-12-01 00:00:00+00:00
2005-01-01 00:00:00+00:00
2010-01-01 00:00:00+00:00
2005-02-01 00:00:00+00:00
2010-02-01 00:00:00+00:00
2005-03-01 00:00:00+00:00
2010-03-01 00:00:00+00:00
2005-04-01 00:00:00+00:00
2010-04-01 00:00:00+00:00
2005-05-01 00:00:00+00:00
2010-05-01 00:00:00+00:00
2005-06-01 00:00:00+00:00
2010-06-01 00:00:00+00:00
2005-07-01 00:00:00+00:00
2010-07-01 00:00:00+00:00
2005-08-01 00:00:00+00:00
2010-08-01 00:00:00+00:00
2005-09-01 00:00:00+00:00
2010-09-01 00:00:00+00:00
2005-10-01 00:00:00+00:00
2010-10-01 00:00:00+00:00
2005-11-01 0

In [115]:
print(results_df)

           date ticker      beta
0    2009-04-01    AXP  2.173087
1    2009-04-01   AMGN  0.518205
2    2009-04-01   AAPL  1.655085
3    2009-04-01     BA  1.388298
4    2009-04-01    CAT  1.900801
...         ...    ...       ...
5447 2024-11-01     VZ  0.445725
5448 2024-11-01      V  1.033801
5449 2024-11-01    WBA  0.816164
5450 2024-11-01    WMT  0.500176
5451 2024-11-01    DIS  1.381003

[5452 rows x 3 columns]


In [116]:
results_df.to_csv('beta_outputs.csv', index=False)