In [254]:
import warnings
warnings.filterwarnings('ignore')

In [255]:
from pathlib import Path
import requests
from io import BytesIO
from zipfile import ZipFile, BadZipFile

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from sklearn.datasets import fetch_openml

pd.set_option('display.expand_frame_repr', False)

import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import requests
from random import sample

In [256]:
# load API keys from .env file
load_dotenv()
TRADIER_TOKEN = os.getenv('TRADIER_TOKEN')
EOD_TOKEN = os.getenv('EOD_TOKEN')

# set data store path variable
DATA_STORE = Path('assets_v1.h5')

## Get Tickers

In [140]:
def get_tickers(): 
    
    # pulls all tickers of ETFs on NYSE or NASDAQ

    r = requests.get('https://eodhistoricaldata.com/api/exchange-symbol-list/US', 
        params={'api_token': EOD_TOKEN, 'fmt': 'json'}
        )
    data = r.json()
    r.close()

    df = pd.DataFrame(data)
    df = df[
        (df.Type == 'Common Stock') &
        ((df.Exchange == 'NYSE ARCA') |
        (df.Exchange == 'NASDAQ'))
        ]

    df.index = df.Code
    df.drop('Code', axis = 1, inplace=True)
    ticker_list = list(df.index)
    return ticker_list

In [141]:
# # get etf tickers
# tickers = get_tickers()

In [142]:
# # save ticker list to csv
# df = pd.DataFrame(tickers)
# df.to_csv('tickers.csv')

In [143]:
# pull in s&p 500 tickers
tickers = pd.read_csv('ticker_list.csv', header=None, usecols=[0], names = ['symbols'])
tickers = list(tickers['symbols'])

## Get Stock Price Data

In [257]:
# from tqdm.notebook import tqdm
from progressbar import ProgressBar

def get_historical_price_test(tickers, data_type):

    # pulls historical daily OLHC prices and volume
    d = {}
    pbar = ProgressBar()

    for i, ticker in pbar(enumerate(tickers)): 

        r = requests.get('https://eodhistoricaldata.com/api' + '/' + data_type + '/' + tickers[i] + '.US', 
            params={'api_token': EOD_TOKEN, 'fmt': 'json'}
            )
        data = r.json()
        r.close()

        d[tickers[i]] = pd.DataFrame.from_records(data).set_index('date')

    df = pd.concat(d.values(), axis=0, keys=d.keys())

    return df

In [258]:
# placeholder tickers
# tickers = ['SPXS', 'SPXL']

# stores list of random tickers of given length 
#tickers_sample = tickers[0:11]

#tickers_sample = ['MSFT', 'AMZN', 'AAPL', 'JBHT', 'GS', 'JPM', 'CSX']

# get daily prices df
df1 = get_historical_price_test(tickers, 'eod')

/ |#                                                  | 0 Elapsed Time: 0:00:00
- |       #                                           | 1 Elapsed Time: 0:00:00
\ |             #                                     | 2 Elapsed Time: 0:00:01
| |                  #                                | 3 Elapsed Time: 0:00:01
/ |                       #                           | 4 Elapsed Time: 0:00:02
- |                           #                       | 5 Elapsed Time: 0:00:02
\ |                                 #                 | 6 Elapsed Time: 0:00:03
| |                                       #           | 7 Elapsed Time: 0:00:03
/ |                                           #       | 8 Elapsed Time: 0:00:04
- |                                                 # | 9 Elapsed Time: 0:00:04
\ |                                           #      | 10 Elapsed Time: 0:00:05
| |                                    #             | 11 Elapsed Time: 0:00:06
/ |                              #      

In [259]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4003688 entries, ('AAPL', '1980-12-12') to ('NWS', '2023-02-22')
Data columns (total 6 columns):
 #   Column          Dtype  
---  ------          -----  
 0   open            float64
 1   high            float64
 2   low             float64
 3   close           float64
 4   adjusted_close  float64
 5   volume          int64  
dtypes: float64(5), int64(1)
memory usage: 199.2+ MB


In [260]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('stocks/prices/daily', df1)

## S&P 500 Price Data

In [244]:
sp500_stooq = (pd.read_csv('^spx_d.csv', index_col=0,
                     parse_dates=True).loc['1950':'2019'].rename(columns=str.lower))
print(sp500_stooq.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17700 entries, 1950-01-03 to 2019-12-31
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    17700 non-null  float64
 1   high    17700 non-null  float64
 2   low     17700 non-null  float64
 3   close   17700 non-null  float64
 4   volume  17700 non-null  float64
dtypes: float64(5)
memory usage: 829.7 KB
None


In [245]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('sp500/stooq', sp500_stooq)

## S&P 500 Constituents

In [246]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = pd.read_html(url, header=0)[0]

df.columns = ['ticker', 'name', 'gics_sector', 'gics_sub_industry',
              'location', 'first_added', 'cik', 'founded']

In [247]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('sp500/stocks', df)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 503 entries, 0 to 502
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ticker             503 non-null    object
 1   name               503 non-null    object
 2   gics_sector        503 non-null    object
 3   gics_sub_industry  503 non-null    object
 4   location           503 non-null    object
 5   first_added        497 non-null    object
 6   cik                503 non-null    int64 
 7   founded            503 non-null    object
dtypes: int64(1), object(7)
memory usage: 31.6+ KB


## Fundemental Data

In [234]:
from progressbar import ProgressBar

def get_stock_fundmentals(tickers): 

    raw_data = {}
    multi_ticker_dict = {}

    columns = ['General', 'Highlights']

    pbar = ProgressBar()

    for i in pbar(range(len(tickers))): 

        r = requests.get('https://eodhistoricaldata.com/api/fundamentals/' + tickers[i] + '.US', 
            params={'api_token': '63dc0e2f4efc43.34327983', 'fmt': 'json'}
            )
        data = r.json()  
        r.close()
        raw_data[tickers[i]] = data
        
        Officers = raw_data[tickers[i]]['General'].pop('Officers', None)
        Listings = raw_data[tickers[i]]['General'].pop('Listings', None)
        AddressData = raw_data[tickers[i]]['General'].pop('AddressData', None)
        NumberDividendsByYear = raw_data[tickers[i]]['SplitsDividends'].pop('NumberDividendsByYear', None)

        columns = ['General', 'Highlights', 'Valuation', 'SharesStats',
            'Technicals','SplitsDividends', 'AnalystRatings']

        single_ticker_dict = {}
        
        for name in columns:
            single_ticker_dict[name] = pd.Series(raw_data[tickers[i]][name])

        single_ticker_series = pd.concat(single_ticker_dict)
        multi_ticker_dict[tickers[i]] = single_ticker_series

    multi_ticker_series = pd.concat(multi_ticker_dict)

    return multi_ticker_series

In [240]:
# placeholder tickers
# tickers = ['SPXS', 'SPXL']

# stores list of random tickers of given length 
# tickers_sample = tickers[0:11]

# tickers_sample = ['MSFT', 'AMZN']#, 'AAPL', 'JBHT', 'GS', 'JPM', 'CSX']

# get daily prices df
df = get_stock_fundmentals(tickers)

  0% (0 of 503) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--
  0% (1 of 503) |                        | Elapsed Time: 0:00:00 ETA:   0:03:32
  0% (2 of 503) |                        | Elapsed Time: 0:00:00 ETA:   0:03:27
  0% (3 of 503) |                        | Elapsed Time: 0:00:01 ETA:   0:03:55
  0% (4 of 503) |                        | Elapsed Time: 0:00:01 ETA:   0:03:43
  0% (5 of 503) |                        | Elapsed Time: 0:00:02 ETA:   0:03:39
  1% (6 of 503) |                        | Elapsed Time: 0:00:02 ETA:   0:03:39
  1% (7 of 503) |                        | Elapsed Time: 0:00:04 ETA:   0:06:29
  1% (8 of 503) |                        | Elapsed Time: 0:00:04 ETA:   0:06:30
  1% (9 of 503) |                        | Elapsed Time: 0:00:05 ETA:   0:06:32
  1% (10 of 503) |                       | Elapsed Time: 0:00:05 ETA:   0:05:54
  2% (11 of 503) |                       | Elapsed Time: 0:00:05 ETA:   0:05:25
  2% (12 of 503) |                      

In [241]:
df

AAPL  General         Code                    AAPL
                      Type            Common Stock
                      Name               Apple Inc
                      Exchange              NASDAQ
                      CurrencyCode             USD
                                          ...     
NWS   AnalystRatings  StrongBuy                3.0
                      Buy                      2.0
                      Hold                     2.0
                      Sell                     0.0
                      StrongSell               0.0
Length: 48288, dtype: object

In [242]:
with pd.HDFStore(DATA_STORE) as store:
    store.put('stocks/base_fundementals', df)

## Bond Price Indexes

In [243]:
securities = {'BAMLCC0A0CMTRIV'   : 'US Corp Master TRI',
              'BAMLHYH0A0HYM2TRIV': 'US High Yield TRI',
              'BAMLEMCBPITRIV'    : 'Emerging Markets Corporate Plus TRI',
              #'GOLDAMGBD228NLBM'  : 'Gold (London, USD)',
              'DGS10'             : '10-Year Treasury CMR',
              }

df = web.DataReader(name=list(securities.keys()), data_source='fred', start=2000)
df = df.rename(columns=securities).dropna(how='all').resample('B').mean()

with pd.HDFStore(DATA_STORE) as store:
    store.put('fred/assets', df)