In [1]:
# necessary when running from jupyter lab o.g. docker image
!pip install sqlalchemy_utils psycopg2-binary yfinance

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.8.6-cp37-cp37m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 2.5 MB/s eta 0:00:01
Installing collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.8.6


In [1]:
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
import numpy as np

import yfinance as yf

In [2]:
from sqlalchemy import create_engine
# psycopg2
# 192.168.86.27 local static ip
# connects to Docker SQL database
engine = create_engine('postgresql+psycopg2://postgres:DataScience@192.168.86.27:54320/finance')

In [3]:
# read Postgresql data into python as Pandas df
stocks = pd.read_sql_table(table_name = 'equities', schema='public', con=engine)
stocks

Unnamed: 0,ticker
0,AAPL
1,ABBV
2,ADM
3,AFL
4,AFRM
...,...
71,VZ
72,WFC
73,WM
74,WMT


In [4]:
# the sp500 + our holdings
yahoo = pd.read_sql("SELECT DISTINCT symbol FROM yahoo.sp500", con = engine)
yahoo

Unnamed: 0,symbol
0,ABC
1,VFC
2,RSG
3,EXPD
4,COG
...,...
500,CARR
501,GD
502,DIS
503,NTRS


In [11]:
# what symbols are in the large database, but not our current holdings list?
# aka, is there anything new that needs to be added?
missing_tickers = pd.read_sql("SELECT ticker AS missing \
FROM public.equities \
EXCEPT SELECT symbol AS missing \
FROM yahoo.sp500", con = engine)
stocks = list(missing_tickers['missing'])
stocks

['VIGIX',
 'IPOE',
 'AQN',
 'FTEC',
 'SHOP',
 'CURLF',
 'ELY',
 'BX',
 'SNXFX',
 'VEA',
 'CVNA',
 'VSGIX',
 'VOO',
 'SNOW',
 'VTI',
 'UL',
 'SWPPX',
 'VGT',
 'UBER',
 'TRI',
 'VB',
 'MGK',
 'SWTSX',
 'GMBTU',
 'BABA',
 'GIX',
 'VGK',
 'PHG',
 'SQ',
 'SCHA',
 'TCNNF',
 'CASY',
 'VTWO',
 'SPY',
 'VT',
 'AFRM']

In [6]:
# get symbols and equity type
pd.read_sql('''SELECT "symbol", "quoteType" \
FROM yahoo.sp500''', con = engine)

Unnamed: 0,symbol,quoteType
0,MMM,EQUITY
1,ABT,EQUITY
2,ABBV,EQUITY
3,ABMD,EQUITY
4,ACN,EQUITY
...,...,...
500,ZBH,EQUITY
501,ZION,EQUITY
502,ZTS,EQUITY
503,BRK-B,EQUITY


In [7]:
# return all rows that are different between two tables
pd.read_sql("SELECT * \
FROM   public.equities \
FULL   OUTER JOIN yahoo.sp500 ON public.equities.ticker = yahoo.sp500.symbol \
WHERE  yahoo.sp500.symbol IS NULL OR \
       public.equities.ticker IS NULL", con = engine)

Unnamed: 0,ticker,symbol,zip,sector,fullTimeEmployees,longBusinessSummary,city,phone,state,country,...,sharesShortPriorMonth,category,fiveYearAverageReturn,regularMarketPrice,logo_url,address2,fax,toCurrency,lastDividendDate,impliedSharesOutstanding
0,,MMM,55144-1000,Industrials,96163.0,"3M Company develops, manufactures, and markets...",St. Paul,651-733-1110,MN,United States,...,7707047.0,,,167.27,https://logo.clearbit.com/3m.com,,,,,
1,,ABT,60064,Healthcare,107000.0,"Abbott Laboratories discovers, develops, manuf...",North Chicago,224 667 6100,IL,United States,...,9768079.0,,,109.09,https://logo.clearbit.com/abbott.com,"Department 377 Building AP6A-1, Abbott park",,,,
2,,ABMD,01923,Healthcare,1536.0,"Abiomed, Inc. engages in the research, develop...",Danvers,978 646 1400,MA,United States,...,1935637.0,,,323.53,https://logo.clearbit.com/abiomed.com,,978 777 8411,,,
3,,ACN,2,Technology,514000.0,"Accenture plc, a professional services company...",Dublin,353 1 646 2000,,Ireland,...,5688424.0,,,256.45,https://logo.clearbit.com/accenture.com,Grand Canal Harbour,353 1 646 2020,,,
4,,ATVI,90405,Communication Services,9080.0,"Activision Blizzard, Inc., together with its s...",Santa Monica,310 255 2000,CA,United States,...,8045686.0,,,89.64,https://logo.clearbit.com/activisionblizzard.com,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,PHG,,,,,,,,,,...,,,,,,,,,,
497,IPOE,,,,,,,,,,...,,,,,,,,,,
498,ELY,,,,,,,,,,...,,,,,,,,,,
499,SPY,,,,,,,,,,...,,,,,,,,,,


In [4]:
# get list of S&P 500
import bs4 as bs
import pickle
import requests

def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
        
    with open("sp500tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
        
    return tickers

sp = save_sp500_tickers()

# initialize character 
char = '\n'
  
# Remove character from Strings list 
# using list comprehension + replace() 
sp = [ele.replace(char, '') for ele in sp] 

In [5]:
# fix Berkshire and others (can't read the period, needs the hyphen)
for i in sp:
    sp = [i.replace('.','-') for i in sp]

In [6]:
# find problematic strings
matching = [s for s in sp if "-" in s]
print(matching)

# remove the problems
for i in matching:
    sp.remove(i)

['BRK-B', 'BF-B']


In [7]:
sp[:10] # first 10 from the list

['MMM', 'ABT', 'ABBV', 'ABMD', 'ACN', 'ATVI', 'ADBE', 'AMD', 'AAP', 'AES']

In [8]:
# read Postgresql data into python as Pandas df
stocks = pd.read_sql_table(table_name = 'equities', schema='public', con=engine)
stocks

Unnamed: 0,ticker
0,AAPL
1,ABBV
2,ADM
3,AFL
4,AFRM
...,...
71,VZ
72,WFC
73,WM
74,WMT


In [12]:
stocks = list(stocks['ticker'])
stocks

TypeError: list indices must be integers or slices, not str

In [10]:
# establish tickers object with all 500 stocks of sp500
tickers = yf.Tickers(sp) # using list of these 500 from wikipedia

NameError: name 'sp' is not defined

In [13]:
# for missing tickers
tickers = yf.Tickers(stocks) # using list of current holdings

In [14]:
tickers

yfinance.Tickers object <VIGIX,IPOE,AQN,FTEC,SHOP,CURLF,ELY,BX,SNXFX,VEA,CVNA,VSGIX,VOO,SNOW,VTI,UL,SWPPX,VGT,UBER,TRI,VB,MGK,SWTSX,GMBTU,BABA,GIX,VGK,PHG,SQ,SCHA,TCNNF,CASY,VTWO,SPY,VT,AFRM>

In [15]:
# make list of api urls to iterate over
tick_list = []
for i in stocks:
    row = "tickers.tickers." + str(i) + ".info"
    tick_list.append(row)

In [16]:
tick_list[:10] # first 10

['tickers.tickers.VIGIX.info',
 'tickers.tickers.IPOE.info',
 'tickers.tickers.AQN.info',
 'tickers.tickers.FTEC.info',
 'tickers.tickers.SHOP.info',
 'tickers.tickers.CURLF.info',
 'tickers.tickers.ELY.info',
 'tickers.tickers.BX.info',
 'tickers.tickers.SNXFX.info',
 'tickers.tickers.VEA.info']

## Get basic info for each company and save to sql

In [17]:
# start with empty dataframe
empty = pd.DataFrame()
empty

#def getStockInfo(tick_list):

for i in tick_list:
        try:
            # using list of strings over which to evaluate, created above
            # must use eval(i) becuase specifying a variable, which cannot be a string
            df = pd.DataFrame.from_dict(eval(i), orient='index').T
            df = df.set_index('symbol')

            # full join to avoid pesky issues with columns not alwasy matching
            empty = pd.concat([empty,df])
    
        except:
            print('Error obtaining info for ' + str(i))
            pass

In [32]:
#empty = empty.drop(['address3'], axis=1)

In [33]:
empty

Unnamed: 0_level_0,previousClose,regularMarketOpen,twoHundredDayAverage,trailingAnnualDividendYield,payoutRatio,volume24Hr,regularMarketDayHigh,navPrice,averageDailyVolume10Day,totalAssets,...,heldPercentInstitutions,heldPercentInsiders,shortRatio,sharesShortPreviousMonthDate,floatShares,dateShortInterest,shortPercentOfFloat,sharesShortPriorMonth,impliedSharesOutstanding,fax
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VIGIX,125.09,,125.215,0.0136702,,,,,0,144793616384.0,...,,,,,,,,,,
IPOE,17.48,18.28,17.4049,,,,19.48,,9301200,,...,,,1.37,1610668800.0,77874720.0,1613088000.0,,2111750.0,100625000.0,
AQN,15.4,15.44,15.7396,0.0393507,0.4426,,15.695,,1648642,,...,0.54397,0.00159,9.62,1611878400.0,598332445.0,1614297600.0,,7923743.0,,905-465-4514
FTEC,104.07,105.4,99.3314,,,,105.418,104.11,506285,5393951744.0,...,,,,,,,,,,
SHOP,1130.74,1169.64,1098.95,,0.0,,1169.97,,2098528,,...,0.67333,0.00402,0.73,1611878400.0,110839759.0,1614297600.0,0.0178,1084950.0,122721000.0,
CURLF,16.37,16.3,11.4638,,0.0,,16.74,,1814771,,...,0.00387,0.3539,,,383514302.0,,,,683059968.0,
ELY,30.28,30.45,22.8348,0.000330251,,,30.59,,2068314,,...,1.05758,0.02005,5.9,1611878400.0,92053454.0,1614297600.0,0.1677,16178607.0,,
GBTC,49.12,51.14,25.4105,,,,52.05,,13214642,,...,,,,,,,,,,
BX,69.35,70.39,60.1112,0.0325883,1.2733,,71.07,,3736071,,...,0.63631,0.00697,2.29,1611878400.0,659301765.0,1614297600.0,0.0097,5155613.0,693974016.0,212-583-5749
SNXFX,84.86,,81.2397,,,,,,0,11773452288.0,...,,,,,,,,,,


In [34]:
# when finished, print the data to sql database
empty.to_sql(name = 'sp500', schema = 'yahoo', con=engine, if_exists='append')

In [37]:
#empty[empty.index == 'BAC']

In [36]:
# read Postgresql data into python as Pandas df
stocks = pd.read_sql_table(table_name = 'sp500', schema='yahoo', con=engine)
stocks

Unnamed: 0,symbol,zip,sector,fullTimeEmployees,longBusinessSummary,city,phone,state,country,companyOfficers,...,sharesShortPriorMonth,category,fiveYearAverageReturn,regularMarketPrice,logo_url,address2,fax,toCurrency,lastDividendDate,impliedSharesOutstanding
0,MMM,55144-1000,Industrials,96163.0,"3M Company develops, manufactures, and markets...",St. Paul,651-733-1110,MN,United States,{},...,7707047.0,,,167.27,https://logo.clearbit.com/3m.com,,,,,
1,ABT,60064,Healthcare,107000.0,"Abbott Laboratories discovers, develops, manuf...",North Chicago,224 667 6100,IL,United States,{},...,9768079.0,,,109.09,https://logo.clearbit.com/abbott.com,"Department 377 Building AP6A-1, Abbott park",,,,
2,ABBV,60064,Healthcare,47000.0,"AbbVie Inc. discovers, develops, manufactures,...",North Chicago,847 932 7900,IL,United States,{},...,13270647.0,,,109.8,https://logo.clearbit.com/abbvie.com,,,,,
3,ABMD,01923,Healthcare,1536.0,"Abiomed, Inc. engages in the research, develop...",Danvers,978 646 1400,MA,United States,{},...,1935637.0,,,323.53,https://logo.clearbit.com/abiomed.com,,978 777 8411,,,
4,ACN,2,Technology,514000.0,"Accenture plc, a professional services company...",Dublin,353 1 646 2000,,Ireland,{},...,5688424.0,,,256.45,https://logo.clearbit.com/accenture.com,Grand Canal Harbour,353 1 646 2020,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,SCHA,,,,The investment seeks to track as closely as po...,,877-824-5615,,,{},...,,Small Blend,0.16450001,101.23,,,,,,
538,TCNNF,32351,Healthcare,4000.0,"Trulieve Cannabis Corp., together with its sub...",Quincy,844 878 5438,FL,United States,{},...,,,,47.49,https://logo.clearbit.com/trulieve.com,,,,,117794000.0
539,CASY,50021,Consumer Defensive,17282.0,"Casey's General Stores, Inc., together with it...",Ankeny,515 965 6100,IA,United States,{},...,873147.0,,,198.39,https://logo.clearbit.com/caseys.com,,,,1.611878e+09,
540,VTWO,,,,The investment seeks to track the performance ...,,866-499-8473,,,{},...,,Small Blend,0.1758,182.26,,,,,,


In [None]:
# get info for missing tickers
brk = yf.Ticker(matching[0]).info
bf = yf.Ticker(matching[1]).info

brk = pd.DataFrame.from_dict(brk, orient='index').T
brk = brk.set_index('symbol')

bf = pd.DataFrame.from_dict(bf, orient='index').T
bf = bf.set_index('symbol')

data = pd.concat([brk,bf])

In [None]:
data

In [None]:
# when finished, print the data to sql database
data.to_sql(name = 'sp500', schema = 'yahoo', con=engine, if_exists='append')

In [None]:
# read Postgresql data into python as Pandas df
#stocks = pd.read_sql_table('yf', con=engine)
#stocks

## Get OHLC historical data for each company - EOD (end of day)

In [None]:
# get historical market data
hist = msft.history(period="max")
hist

In [None]:
matching

In [None]:
sp[0:25]

In [None]:
for i in matching:
    print(i)

In [None]:
len(sp)

In [None]:
import time
for i in sp[401:503]:
    try:
        ohlc = yf.Ticker(i).history(period="max")
        ohlc.to_sql(name = i, schema = 'ohlc', con=engine, if_exists='append')
        time.sleep(1)
    except:
        print('Error obtaining OHLC for ' + str(i))
        pass

In [None]:
ohlc = yf.Ticker(matching[0]).history(period="max")
ohlc.to_sql(name = matching[0], schema = 'ohlc', con=engine, if_exists='append')

time.sleep(1)
ohlc = yf.Ticker(matching[1]).history(period="max")
ohlc.to_sql(name = matching[1], schema = 'ohlc', con=engine, if_exists='append')

In [88]:
# what do we have data for already in database?
# list all tables in schema
db_tables = pd.read_sql("SELECT table_name FROM information_schema.tables \
       WHERE table_schema = 'ohlc'", con = engine)
tables = db_tables['table_name'].tolist()
tables[1:10]

['ARE', 'ALXN', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'AAPL', 'ATVI']

In [65]:
# ledger stocks
stocks[1:10]

['ABBV', 'ADM', 'AFL', 'AMH', 'ANCUF', 'AQN', 'BABA', 'BBY', 'BRK-B']

In [75]:
# what companies do we still need data for? 
# compare sp500 with owned stocks from ledger
import numpy as np
main_list = list(np.setdiff1d(stocks,tables))
main_list

['AMH',
 'ANCUF',
 'AQN',
 'BABA',
 'BX',
 'CASY',
 'COTY',
 'CRSR',
 'CVNA',
 'DKNG',
 'ELY',
 'FTEC',
 'GMBTU',
 'GS-P-A',
 'GSEU',
 'HIMS',
 'IPOE',
 'MGK',
 'PHG',
 'PLTR',
 'RKT',
 'SCHA',
 'SHOP',
 'SNOW',
 'SNXFX',
 'SPY',
 'SWPPX',
 'SWTSX',
 'TRI',
 'UBER',
 'UL',
 'VB',
 'VEA',
 'VGK',
 'VGT',
 'VIGIX',
 'VOO',
 'VSGIX',
 'VT',
 'VTI',
 'VTWO',
 'W',
 'WFC-P-Z']

In [76]:
missing_tickers = yf.Tickers(main_list) # using list of current holdings
missing_tickers

yfinance.Tickers object <AMH,ANCUF,AQN,BABA,BX,CASY,COTY,CRSR,CVNA,DKNG,ELY,FTEC,GMBTU,GS-P-A,GSEU,HIMS,IPOE,MGK,PHG,PLTR,RKT,SCHA,SHOP,SNOW,SNXFX,SPY,SWPPX,SWTSX,TRI,UBER,UL,VB,VEA,VGK,VGT,VIGIX,VOO,VSGIX,VT,VTI,VTWO,W,WFC-P-Z>

In [79]:
import time
for i in main_list:
    try:
        ohlc = yf.Ticker(i).history(period="max")
        ohlc.to_sql(name = i, schema = 'ohlc', con=engine, if_exists='append')
        time.sleep(1)
    except:
        print('Error obtaining OHLC for ' + str(i))
        pass

Error obtaining OHLC for AMH
- GS-P-A: No data found, symbol may be delisted
- WFC-P-Z: No data found, symbol may be delisted


In [147]:
test

Unnamed: 0,symbol,last_date,today
1,ABBV,2021-01-14,2021-02-06
2,ADM,2021-01-14,2021-02-06


In [90]:
MSFT = pd.read_sql_table('MSFT', engine, schema='ohlc')
MSFT

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1986-03-13,0.056367,0.064656,0.056367,0.061893,1031788800,0.0,0.0
1,1986-03-14,0.061893,0.065209,0.061893,0.064103,308160000,0.0,0.0
2,1986-03-17,0.064103,0.065761,0.064103,0.065209,133171200,0.0,0.0
3,1986-03-18,0.065209,0.065761,0.062998,0.063551,67766400,0.0,0.0
4,1986-03-19,0.063551,0.064103,0.061893,0.062446,47894400,0.0,0.0
...,...,...,...,...,...,...,...,...
8778,2021-01-08,218.679993,220.580002,217.029999,219.619995,22956200,0.0,0.0
8779,2021-01-11,218.470001,218.910004,216.729996,217.490005,23047000,0.0,0.0
8780,2021-01-12,216.500000,217.100006,213.320007,214.929993,23249300,0.0,0.0
8781,2021-01-13,214.020004,216.759995,213.929993,216.339996,20049900,0.0,0.0


In [91]:
MSFT['Date'].max()

Timestamp('2021-01-14 00:00:00')

In [118]:
tickers.ticker

0        AAPL
1        ABBV
2         ADM
3         AFL
4         AMH
       ...   
79    WFC-P-Z
80         WM
81        WMT
82        XOM
83        XYL
Name: ticker, Length: 84, dtype: object

In [115]:
last_dates = list()
tickers_list = list()

for i in tickers.ticker:
    try:
        ticker_df = pd.read_sql_table(i, engine, schema='ohlc')
        
        last_date = ticker_df['Date'].max()
        ticker_symbol = i
        
        last_dates.append(last_date)
        tickers_list.append(ticker_symbol)

        #ohlc_ticker_df = ohlc_ticker_df.append(df)
    
        #ohlc = yf.Ticker(i).history(period="max")
        #ohlc.to_sql(name = i, schema = 'ohlc', con=engine, if_exists='append')
        #time.sleep(1)
    except:
        print('Error obtaining data for ' + str(i))
        pass

Error obtaining data for AMH


In [129]:
from datetime import date
today = date.today()

df = pd.DataFrame()
df['symbol'] = tickers_list
df['last_date'] = last_dates
df['today'] = today

df

Unnamed: 0,symbol,last_date,today
0,AAPL,2021-01-14,2021-02-06
1,ABBV,2021-01-14,2021-02-06
2,ADM,2021-01-14,2021-02-06
3,AFL,2021-01-14,2021-02-06
4,ANCUF,2021-02-05,2021-02-06
...,...,...,...
78,WFC-P-Z,NaT,2021-02-06
79,WM,2021-01-14,2021-02-06
80,WMT,2021-01-14,2021-02-06
81,XOM,2021-01-14,2021-02-06


In [125]:
df[df['symbol'] == 'ANCUF']

Unnamed: 0,symbol,last_date
4,ANCUF,2021-02-05


In [None]:
# for each ticker, grab data for the missing dates up to today
# and paste them to sql database

In [151]:
for a, b, c in df.itertuples(index=False):
    try:
        ohlc = yf.Ticker(a).history(start = b, end = c)
        ohlc.to_sql(name = a, schema = 'ohlc', con=engine, if_exists='append')
        time.sleep(1)
    except:
        print('Error obtaining OHLC for ' + str(a))
        pass

Error obtaining OHLC for GS-P-A
Error obtaining OHLC for WFC-P-Z


In [80]:
# list all tables in schema
pd.read_sql("SELECT table_name FROM information_schema.tables \
       WHERE table_schema = 'ohlc'", con = engine)

Unnamed: 0,table_name
0,ALB
1,ARE
2,ALXN
3,ALGN
4,ALLE
...,...
537,VT
538,VTI
539,VTWO
540,W


In [None]:
# start with empty dataframe
empty = pd.DataFrame()
empty

#def getStockInfo(tick_list):

for i in matching:
        try:
            # using list of strings over which to evaluate, created above
            # must use eval(i) becuase specifying a variable, which cannot be a string
            df = pd.DataFrame.from_dict(eval(i), orient='index').T
            df = df.set_index('symbol')

            # full join to avoid pesky issues with columns not alwasy matching
            empty = pd.concat([empty,df])
    
        except:
            print('Error obtaining info for ' + str(i))
            pass

In [None]:
# when finished, print the data to sql database
data.to_sql(name = i, schema = 'ohlc', con=engine, if_exists='append')

In [None]:
# get stock info
msft = yf.Ticker("BRK-B")
basic_data = msft.info
basic_data

In [None]:
df = pd.DataFrame.from_dict(basic_data, orient='index').T
df = df.set_index('symbol')
df

df.to_sql('yahoo', con=engine, if_exists='append')

## ETF compositions

In [8]:
from bs4 import BeautifulSoup
import re
import requests

In [9]:
!pip install selenium webdriver_manager



In [10]:
# for running with concurrent docker image of selenium
# docker run -d -p 192.168.86.27:4444:4444 selenium/standalone-chrome

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

options = webdriver.ChromeOptions()
options.add_argument('--disable-logging')

#options = Options()
#This will run Selenium headless, meaning you won't see the browser window open. If you want to see the browser open, set it to False
options.headless = True

browser = webdriver.Remote("http://192.168.86.27:4444/wd/hub", DesiredCapabilities.CHROME, options = options)

In [11]:
# for running on local computer

#from selenium import webdriver
#from webdriver_manager.chrome import ChromeDriverManager

#options = webdriver.ChromeOptions()
#options.add_argument('--disable-logging')

#options = Options()
#This will run Selenium headless, meaning you won't see the browser window open. If you want to see the browser open, set it to False
#options.headless = True

# breaks in Docker Jupyter Lab
#driver = webdriver.Chrome(ChromeDriverManager().install(), options = options)

In [12]:
def get_table(soup):
    for t in soup.select('table'):
        header = t.select('thead tr th')
        if len(header) > 2:
            if (header[0].get_text().strip() == 'Symbol'
                and header[2].get_text().strip().startswith('% Holding')):
                return t
    raise Exception('could not find symbol list table')

In [13]:
etf_symbol = 'SPY'

url = 'https://www.barchart.com/stocks/quotes/{}/constituents?page=all'.format(etf_symbol)

# Loads the ETF constituents page and reads the holdings table
browser.get(url)
html = browser.page_source
soup = BeautifulSoup(html, 'html')
table = get_table(soup)

In [14]:
# Reads the holdings table line by line and appends each asset to a
    # dictionary along with the holdings percentage
asset_dict = {}
for row in table.select('tr')[1:-1]:
    try:
        cells = row.select('td')
        # print(row)
        symbol = cells[0].get_text().strip()
        # print(symbol)
        name = cells[1].text.strip()
        celltext = cells[2].get_text().strip()
        percent = float(celltext.rstrip('%'))
        shares = int(cells[3].text.strip().replace(',', ''))
        if symbol != "" and percent != 0.0:
            asset_dict[symbol] = {
                'name': name,
                'percent': percent,
                'shares': shares,
            }
    except BaseException as ex:
        print(ex)
        
browser.quit()

df = pd.DataFrame(asset_dict).T
#df = df.set_index(etf_symbol)

In [15]:
df

Unnamed: 0,name,percent,shares
AAPL,Apple Inc,7.04,163598370
MSFT,Microsoft Corp,5.41,77393940
AMZN,AMAZON COM INC,4.37,4365764
FB,Facebook Inc.,2.09,24608548
TSLA,Tesla Inc,2.06,7762561
...,...,...,...
HFC,HollyFrontier Corp,0.01,1528056
XRX,Xerox Corp,0.01,1709864
UAA,UNDER ARMOUR A,0.01,1922612
UA,UNDER ARMOUR C,0.01,1995145


In [None]:
# get historical market data
hist = msft.history(period="max")
hist

In [None]:
# show dividends
msft.dividends

In [None]:
# show actions (dividends, splits)
msft.actions

In [None]:




# show financials
msft.financials
msft.quarterly_financials

# show major holders
msft.major_holders

# show institutional holders
msft.institutional_holders

# show balance sheet
msft.balance_sheet
msft.quarterly_balance_sheet

# show cashflow
msft.cashflow
msft.quarterly_cashflow

# show earnings
msft.earnings
msft.quarterly_earnings

# show sustainability
msft.sustainability

# show analysts recommendations
msft.recommendations