In [1]:
import time
from datetime import datetime
import pandas as pd
import numpy as np
import itertools
# Just to use YahooDownloader
# import finrl

In [2]:
# query_string="https://query1.finance.yahoo.com/v7/finance/download/TSLA?period1=1606780800&period2=1609372800&interval=1wk&events=history&includeAdjustedClose=true"
#ticker = 'TSLA'
#period1 = int(time.mktime(datetime.datetime(2020,12,1, 23,59).timetuple()))
#period2 = int(time.mktime(datetime.datetime(2020,12,31, 23,59).timetuple()))
#interval='1wk' # 1d, 1m
#interval = '1d'
#query_string=f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true"
#df = pd.read_csv(query_string)

## 1. Get universe of stocks. 

We have downloaded a snapshot of DJI universe. We take list of tickers from `data/DJI_tickers.csv`

In [3]:
tickers_df = pd.read_csv("data/DJI_tickers.csv")
tickers = tickers_df["DOW_30_TICKER"].values
print(tickers)

['AAPL' 'MSFT' 'JPM' 'V' 'RTX' 'PG' 'GS' 'NKE' 'DIS' 'AXP' 'HD' 'INTC'
 'WMT' 'IBM' 'MRK' 'UNH' 'KO' 'CAT' 'TRV' 'JNJ' 'CVX' 'MCD' 'VZ' 'CSCO'
 'XOM' 'BA' 'MMM' 'PFE' 'WBA' 'DD']


## 2. Download historical data

In [4]:
import time
from datetime import datetime

def ticker_downloader(p_start_date_str, p_end_date_str, p_ticker, p_interval = '1d'):
    # Add timestamp
    st = p_start_date_str + " " + "23:59"
    end = p_end_date_str + " " + "23:59"
    # Reconvert to timestamp
    st = datetime.strptime(st, "%Y.%m.%d %H:%M")
    end = datetime.strptime(end, "%Y.%m.%d %H:%M")
    # Get back system time format for final query
    period1 = int(time.mktime(st.timetuple()))
    period2 = int(time.mktime(end.timetuple()))
    query_string=f"https://query1.finance.yahoo.com/v7/finance/download/{p_ticker}?period1={period1}&period2={period2}&interval={p_interval}&events=history&includeAdjustedClose=true"
    result_df = None
    try:
        result_df = pd.read_csv(query_string)
    except Exception as e:
        print("Oops!", e.__class__, "occurred.")
        print(f"ticker_downloader: For ticker={p_ticker}, start={p_start_date_str}, end={p_end_date_str}: {e}")
        return None
    return result_df


# Test using TSLA
#ticker_downloader("2020.12.01", "2020.12.31", "TSLA")
# Test using non existent TSLA
# ticker_downloader("2020.12.01", "2020.12.31", "TSLAXXX")
# Test using non existent in date
# ticker_downloader("1980.12.01", "2020.12.31", "TSLA")


def download_all_tickers(p_start_date_str, p_end_date_str, p_tickers_list, sleep=True):
    # make sure in correct input format
    p_start_date_str = datetime.strptime(p_start_date_str,"%Y.%m.%d").strftime("%Y.%m.%d")
    p_end_date_str = datetime.strptime(p_end_date_str  ,"%Y.%m.%d").strftime("%Y.%m.%d")
    
    data_dict = {}
    for ticker in p_tickers_list:
        # Not sure if Yahoo blocks me for very fast request, lets sleep for random 0-5 seconds
        if sleep:
            if ticker != p_tickers_list[0]:
                time.sleep(np.random.randint(low = 0, high = 5, size = 1)[0])
        print(f"Downloading {ticker}..")
        data_dict[ticker] = ticker_downloader(p_start_date_str, p_end_date_str, ticker, '1d')
        # add ticker information if it is found
        if data_dict[ticker] is not None:
            data_dict[ticker]["tic"] = ticker
    print("Download finished! Organising records now..")
    # join whole universe with this data
    all_data = pd.concat([v for k,v in data_dict.items()])
    del data_dict
    # Some data may be missing - let us create a union of them all
    all_data["Date"] = [ datetime.strptime(dt,"%Y-%m-%d").date() for dt in all_data.Date.values]
    all_data = pd.DataFrame(list(itertools.product(all_data["Date"].unique(),all_data["tic"].unique())))\
                    .rename(columns={0:"Date", 1:"tic"})\
                    .merge(all_data, how='left',  left_on=["Date", "tic"], right_on=["Date", "tic"])\
                    .sort_values(by=["Date", "tic"])
    print("Done!")
    return all_data
        


In [5]:
all_data = download_all_tickers("1990.01.01", "2021.12.01", tickers)

Downloading AAPL..
Downloading MSFT..
Downloading JPM..
Downloading V..
Downloading RTX..
Downloading PG..
Downloading GS..
Downloading NKE..
Downloading DIS..
Downloading AXP..
Downloading HD..
Downloading INTC..
Downloading WMT..
Downloading IBM..
Downloading MRK..
Downloading UNH..
Downloading KO..
Downloading CAT..
Downloading TRV..
Downloading JNJ..
Downloading CVX..
Downloading MCD..
Downloading VZ..
Downloading CSCO..
Downloading XOM..
Downloading BA..
Downloading MMM..
Downloading PFE..
Downloading WBA..
Downloading DD..
Download finished! Organising records now..
Done!


In [6]:
all_data

Unnamed: 0,Date,tic,Open,High,Low,Close,Adj Close,Volume
0,1990-01-02,AAPL,0.314732,0.334821,0.312500,0.332589,0.266423,183198400.0
9,1990-01-02,AXP,8.915386,9.011943,8.851015,8.979757,5.152581,4301237.0
25,1990-01-02,BA,19.791668,20.500000,19.666668,20.500000,11.043626,2042400.0
17,1990-01-02,CAT,7.234375,7.359375,7.218750,7.359375,3.404977,2910400.0
23,1990-01-02,CSCO,,,,,,
...,...,...,...,...,...,...,...,...
241263,2021-12-01,V,196.029999,196.809998,190.100006,190.160004,190.160004,13204900.0
241282,2021-12-01,VZ,50.500000,50.919998,49.740002,49.770000,49.770000,26295500.0
241288,2021-12-01,WBA,45.299999,45.700001,43.689999,43.720001,43.720001,6257600.0
241272,2021-12-01,WMT,140.509995,140.720001,137.039993,137.139999,137.139999,12207100.0
