In [26]:
import requests
from requests_html import HTMLSession, HTML
import pandas as pd
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go

In [2]:
url1 = 'https://en.wikipedia.org/wiki/S%26P_500_Dividend_Aristocrats'
url2 = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

In [3]:
s = HTMLSession()
r = s.get(url2)

In [4]:
r.html

<HTML url='https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'>

In [6]:
table = r.html.find('table', first=True)
table

<Element 'table' class=('wikitable', 'sortable') id='constituents'>

In [7]:
rows = table.find('tr')

In [8]:
rows
row = rows[-1]

In [9]:
len(rows)

504

In [10]:
row

<Element 'tr' >

In [11]:
for i in row.find('td'):
    print(i.text)


ZTS
Zoetis
reports
Health Care
Pharmaceuticals
Parsippany, New Jersey
2013-06-21
0001555280
1952


In [12]:
table_details = [[c.text.strip() for c in row.find('td')] for row in rows[1:]]

In [None]:
table_details

In [14]:
columns = [
    'Symbol', 'Security', 'SEC Filings', 
    'GICS Sector', 'GICS Sub-Industry', 
    'Headquartes Location', 'Date First Added',
    'CIK', 'Founded']

In [15]:
df = pd.DataFrame(table_details, columns=columns)
df

Unnamed: 0,Symbol,Security,SEC Filings,GICS Sector,GICS Sub-Industry,Headquartes Location,Date First Added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,0000066740,1902
1,AOS,A. O. Smith,reports,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,0000091142,1916
2,ABT,Abbott,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,0000001800,1888
3,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,0001551152,2013 (1888)
4,ABMD,Abiomed,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,0000815094,1981
...,...,...,...,...,...,...,...,...,...
498,YUM,Yum! Brands,reports,Consumer Discretionary,Restaurants,"Louisville, Kentucky",1997-10-06,0001041061,1997
499,ZBRA,Zebra Technologies,reports,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois",2019-12-23,0000877212,1969
500,ZBH,Zimmer Biomet,reports,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,0001136869,1927
501,ZION,Zions Bancorporation,reports,Financials,Regional Banks,"Salt Lake City, Utah",2001-06-22,0000109380,1873


In [16]:
def ticker_info(tickers):
    data = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = tickers,

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "ytd",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1d",

        # Whether to ignore timezone when aligning ticker data from 
        # different timezones. Default is True. False may be useful for 
        # minute/hourly data.
        ignore_tz = False,

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,

        
        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )

    return data

In [19]:
df.loc[:5, 'Symbol']

0     MMM
1     AOS
2     ABT
3    ABBV
4    ABMD
5     ACN
Name: Symbol, dtype: object

In [20]:
test = ticker_info(list(df.loc[:5, 'Symbol']))

[*********************100%***********************]  6 of 6 completed


In [25]:
test['MMM+---------------------------']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-03 00:00:00-05:00,173.069700,173.817018,170.662708,172.506775,1930700
2022-01-04 00:00:00-05:00,173.224972,175.923119,172.788225,174.923447,2522200
2022-01-05 00:00:00-05:00,171.914746,176.427830,171.788569,174.205261,2952400
2022-01-06 00:00:00-05:00,175.554333,175.942550,172.312662,172.759125,2505400
2022-01-07 00:00:00-05:00,172.885297,175.175812,172.002087,174.651703,2800200
...,...,...,...,...,...
2022-10-19 00:00:00-04:00,115.099998,115.680000,113.239998,114.199997,2578600
2022-10-20 00:00:00-04:00,113.980003,115.650002,112.889999,113.110001,3142900
2022-10-21 00:00:00-04:00,113.150002,116.940002,113.050003,116.809998,3342000
2022-10-24 00:00:00-04:00,117.529999,119.139999,117.190002,118.379997,3394100


In [None]:
# push one of those columns into yfinance
# exploe the data

In [28]:
test['MMM']

array([[1.73069700e+02, 1.73817018e+02, 1.70662708e+02, 1.72506775e+02,
        1.93070000e+06],
       [1.73224972e+02, 1.75923119e+02, 1.72788225e+02, 1.74923447e+02,
        2.52220000e+06],
       [1.71914746e+02, 1.76427830e+02, 1.71788569e+02, 1.74205261e+02,
        2.95240000e+06],
       ...,
       [1.13150002e+02, 1.16940002e+02, 1.13050003e+02, 1.16809998e+02,
        3.34200000e+06],
       [1.17529999e+02, 1.19139999e+02, 1.17190002e+02, 1.18379997e+02,
        3.39410000e+06],
       [1.13510002e+02, 1.18599998e+02, 1.13150002e+02, 1.18500000e+02,
        5.01010000e+06]])

In [32]:
fig = px.line(test['MMM'], x=test['MMM'].index, y="Open")
fig.show()

In [33]:
fig = px.line(test['AOS'], x=test['AOS'].index, y="Open")
fig.show()

In [38]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    name="AOS",
    mode='lines',
    x=test['AOS'].index ,
    y=test['AOS']["Open"]
))

fig.add_trace(go.Scatter(
    name="MMM",
    mode='lines',
    x=test['MMM'].index ,
    y=test['MMM']["Open"]
))

fig.show()

In [37]:
test['MMM']["Open"]

Date
2022-01-03 00:00:00-05:00    173.069700
2022-01-04 00:00:00-05:00    173.224972
2022-01-05 00:00:00-05:00    171.914746
2022-01-06 00:00:00-05:00    175.554333
2022-01-07 00:00:00-05:00    172.885297
                                ...    
2022-10-19 00:00:00-04:00    115.099998
2022-10-20 00:00:00-04:00    113.980003
2022-10-21 00:00:00-04:00    113.150002
2022-10-24 00:00:00-04:00    117.529999
2022-10-25 00:00:00-04:00    113.510002
Name: Open, Length: 205, dtype: float64