In [60]:
import time
from datetime import datetime
import pandas as pd
import numpy as np
# Just to use YahooDownloader
import finrl

In [2]:
query_string="https://query1.finance.yahoo.com/v7/finance/download/TSLA?period1=1606780800&period2=1609372800&interval=1wk&events=history&includeAdjustedClose=true"

In [9]:
ticker = 'TSLA'
period1 = int(time.mktime(datetime.datetime(2020,12,1, 23,59).timetuple()))
period2 = int(time.mktime(datetime.datetime(2020,12,31, 23,59).timetuple()))
interval='1wk' # 1d, 1m
interval = '1d'
query_string=f"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true"

In [10]:
query_string


'https://query1.finance.yahoo.com/v7/finance/download/TSLA?period1=1606838340&period2=1609430340&interval=1d&events=history&includeAdjustedClose=true'

In [11]:
df = pd.read_csv(query_string)

In [12]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-12-01,597.590027,597.849976,572.049988,584.76001,584.76001,40103500
1,2020-12-02,556.440002,571.539978,541.210022,568.820007,568.820007,47775700
2,2020-12-03,590.02002,598.969971,582.429993,593.380005,593.380005,42552000
3,2020-12-04,591.01001,599.039978,585.5,599.039978,599.039978,29401300
4,2020-12-07,604.919983,648.789978,603.049988,641.76001,641.76001,56309700
5,2020-12-08,625.51001,651.280029,618.5,649.880005,649.880005,64265000
6,2020-12-09,653.690002,654.320007,588.0,604.47998,604.47998,71291200
7,2020-12-10,574.369995,627.75,566.340027,627.070007,627.070007,67083200
8,2020-12-11,615.01001,624.0,596.799988,609.98999,609.98999,46475000
9,2020-12-14,619.0,642.75,610.200012,639.830017,639.830017,52040600


## 1. Get universe of stocks. 

We have downloaded a snapshot of DJI universe. We take list of tickers from `data/DJI_tickers.csv`

In [50]:
tickers_df = pd.read_csv("data/DJI_tickers.csv")
tickers_list = tickers_df["DOW_30_TICKER"].values
tickers_list

array(['AAPL', 'MSFT', 'JPM', 'V', 'RTX', 'PG', 'GS', 'NKE', 'DIS', 'AXP',
       'HD', 'INTC', 'WMT', 'IBM', 'MRK', 'UNH', 'KO', 'CAT', 'TRV',
       'JNJ', 'CVX', 'MCD', 'VZ', 'CSCO', 'XOM', 'BA', 'MMM', 'PFE',
       'WBA', 'DD'], dtype=object)

## 2. Download historical data

In [78]:
import time
from datetime import datetime

def ticker_downloader(p_start_date_str, p_end_date_str, p_ticker, p_interval = '1d'):
    # Add timestamp
    st = p_start_date_str + " " + "23:59"
    end = p_end_date_str + " " + "23:59"
    # Reconvert to timestamp
    st = datetime.strptime(st, "%Y.%m.%d %H:%M")
    end = datetime.strptime(end, "%Y.%m.%d %H:%M")
    # Get back system time format for final query
    period1 = int(time.mktime(st.timetuple()))
    period2 = int(time.mktime(end.timetuple()))
    query_string=f"https://query1.finance.yahoo.com/v7/finance/download/{p_ticker}?period1={period1}&period2={period2}&interval={p_interval}&events=history&includeAdjustedClose=true"
    result_df = None
    try:
        result_df = pd.read_csv(query_string)
    except Exception as e:
        print("Oops!", e.__class__, "occurred.")
        print(f"ticker_downloader: For ticker={p_ticker}, start={p_start_date_str}, end={p_end_date_str}: {e}")
        return None
    return result_df


# Test using TSLA
#ticker_downloader("2020.12.01", "2020.12.31", "TSLA")
# Test using non existent TSLA
# ticker_downloader("2020.12.01", "2020.12.31", "TSLAXXX")
# Test using non existent in date
# ticker_downloader("1980.12.01", "2020.12.31", "TSLA")


def download_all_tickers(p_start_date_str, p_end_date_str, p_tickers_list, sleep=True):
    # make sure in correct input format
    p_start_date_str = datetime.strptime(p_start_date_str,"%Y.%m.%d").strftime("%Y.%m.%d")
    p_end_date_str = datetime.strptime(p_end_date_str  ,"%Y.%m.%d").strftime("%Y.%m.%d")
    
    data_dict = {}
    for ticker in tickers_list:
        # Not sure if Yahoo blocks me for very fast request, lets sleep for random 0-5 seconds
        if sleep:
            if ticker != tickers_list[0]:
                time.sleep(np.random.randint(low = 0, high = 5, size = 1)[0])
        print(f"Downloading {ticker}..")
        data_dict[ticker] = ticker_downloader(p_start_date_str, p_end_date_str, ticker, '1d')
        # add ticker information if it is found
        if data_dict[ticker] is not None:
            data_dict[ticker]["tic"] = ticker
    print("Download finished!")
    return data_dict
        
all_data_dict = download_all_tickers("1990.01.01", "2021.12.01", tickers_list)

Downloading AAPL..
Downloading MSFT..
Downloading JPM..
Downloading V..
Downloading RTX..
Downloading PG..
Downloading GS..
Downloading NKE..
Downloading DIS..
Downloading AXP..
Downloading HD..
Downloading INTC..
Downloading WMT..
Downloading IBM..
Downloading MRK..
Downloading UNH..
Downloading KO..
Downloading CAT..
Downloading TRV..
Downloading JNJ..
Downloading CVX..
Downloading MCD..
Downloading VZ..
Downloading CSCO..
Downloading XOM..
Downloading BA..
Downloading MMM..
Downloading PFE..
Downloading WBA..
Downloading DD..


In [79]:
# join whole universe with this data
all_data_dict


{'AAPL':             Date        Open        High         Low       Close   Adj Close  \
 0     1990-01-02    0.314732    0.334821    0.312500    0.332589    0.266423   
 1     1990-01-03    0.339286    0.339286    0.334821    0.334821    0.268211   
 2     1990-01-04    0.341518    0.345982    0.332589    0.335938    0.269106   
 3     1990-01-05    0.337054    0.341518    0.330357    0.337054    0.270000   
 4     1990-01-08    0.334821    0.339286    0.330357    0.339286    0.271788   
 ...          ...         ...         ...         ...         ...         ...   
 8038  2021-11-24  160.750000  162.139999  159.639999  161.940002  161.940002   
 8039  2021-11-26  159.570007  160.449997  156.360001  156.809998  156.809998   
 8040  2021-11-29  159.369995  161.190002  158.789993  160.240005  160.240005   
 8041  2021-11-30  159.990005  165.520004  159.919998  165.300003  165.300003   
 8042  2021-12-01  167.479996  170.300003  164.529999  164.770004  164.770004   
 
          Volume  

In [68]:
start_date = "1990.01.01"
end_date = "2021.12.01"

"aa" != "aa"
    #del tmp

False

2

In [14]:
from finrl.config  import DOW_30_TICKER  

ImportError: cannot import name 'DOW_30_TICKER' from 'finrl.config' (/Users/jvsingh/anaconda3/envs/rlproject/lib/python3.7/site-packages/finrl/config/__init__.py)

In [12]:
finrl.config.config

AttributeError: module 'finrl.config' has no attribute 'config'