## Get Stock Symbols from S&P 500

In [12]:
import yfinance as yf
import pandas as pd
import datetime as dt
import json

In [13]:
data = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

In [14]:
sp500 = data[0]
sp500 = sp500[[
    'Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry',
    'Date first added'
]]
sp500.head(10)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Date first added
0,MMM,3M Company,Industrials,Industrial Conglomerates,1976-08-09
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,1964-03-31
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,2012-12-31
3,ABMD,Abiomed,Health Care,Health Care Equipment,2018-05-31
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,2011-07-06
5,ATVI,Activision Blizzard,Communication Services,Interactive Home Entertainment,2015-08-31
6,ADBE,Adobe Inc.,Information Technology,Application Software,1997-05-05
7,AMD,Advanced Micro Devices,Information Technology,Semiconductors,2017-03-20
8,AAP,Advance Auto Parts,Consumer Discretionary,Automotive Retail,2015-07-09
9,AES,AES Corp,Utilities,Independent Power Producers & Energy Traders,1998-10-02


In [15]:
sp500_symbols = sp500['Symbol'].values.tolist()
sp500_sectors = list(set(sp500['GICS Sector'].values.tolist()))

sp500_symbols_new = []

for symbol in sp500_symbols:
    if '.' in symbol:
        sp500_symbols_new.append(symbol.replace('.', '-'))
    else:
        sp500_symbols_new.append(symbol)

sp500_symbols = sp500_symbols_new
#print(sp500_symbols)

## Retrieve Dividend Data

In [16]:
div_data = {}
count = 0
min_div_pay_years = 20

#sp500_symbols = ['HPQ']

for symbol in sp500_symbols:
    count += 1
    if (count % 100 == 0):
        print(count)
    #print(f"getting dividend data from {symbol}")
    annual_div = {}
    prices = yf.Ticker(symbol).history(period='max')
    dividends = prices[prices['Dividends'] > 0]
    if len(dividends) > 0:
        first_year = dividends.index[0].year
        last_year = dt.datetime.today().year
    else:
        continue

    # print(dividends)

    # get annual dividend sum from first year it paid out div
    for year in range(first_year, last_year):
        div_sum = dividends[dividends.index.year == year]['Dividends'].sum()
        annual_div[year] = div_sum
    # min # years
    # div_data[symbol][0] = annual dividend sum
    # div_data[symbol][1] = # years dividends were paid out

    # if len(annual_div) >= year_threshold:
    if 0 in list(annual_div.values()):
        continue
    if len(annual_div) > min_div_pay_years:
        div_data[symbol] = []
        additionals = {}
        additionals['consecutive_yrs'] = len(annual_div)
        div_data[symbol].append(annual_div)
        div_data[symbol].append(additionals)

100
200
300
400
500


In [6]:
div_data

{'MMM': [{1970: 0.082,
   1971: 0.115752,
   1972: 0.120376,
   1973: 0.13125,
   1974: 0.1565,
   1975: 0.16875,
   1976: 0.181,
   1977: 0.2125,
   1978: 0.25,
   1979: 0.3,
   1980: 0.35,
   1981: 0.375,
   1982: 0.4,
   1983: 0.4125,
   1984: 0.425,
   1985: 0.4375,
   1986: 0.45,
   1987: 0.465,
   1988: 0.53,
   1989: 0.65,
   1990: 0.73,
   1991: 0.78,
   1992: 0.8,
   1993: 0.83,
   1994: 0.88,
   1995: 0.94,
   1996: 2.16,
   1997: 1.06,
   1998: 1.1,
   1999: 1.12,
   2000: 1.16,
   2001: 1.2,
   2002: 1.24,
   2003: 1.32,
   2004: 1.44,
   2005: 1.68,
   2006: 1.84,
   2007: 1.92,
   2008: 2.0,
   2009: 2.04,
   2010: 2.1,
   2011: 2.2,
   2012: 2.36,
   2013: 2.54,
   2014: 3.42,
   2015: 4.1,
   2016: 4.44,
   2017: 4.7,
   2018: 5.44,
   2019: 5.76,
   2020: 5.88},
  {'consecutive_yrs': 51}],
 'ABT': [{1983: 0.026934,
   1984: 0.032265,
   1985: 0.037877,
   1986: 0.045172000000000004,
   1987: 0.03984,
   1988: 0.06453,
   1989: 0.075751,
   1990: 0.09034,
   1991: 0.107

## Calculate Average Dividend Growth Rate Since Inception

In [17]:
for symbol in div_data.keys():
    start_year = list(div_data[symbol][0].keys())[0]
    last_year = dt.datetime.today().year - 1
    prev_five_yrs = last_year - 6
    prev_fifteen_yrs = last_year - 16
    print(f"working on {symbol} | start year = {start_year}")
    rate_change = []
    for year in range(prev_five_yrs, last_year + 1):
        rate_change.append(
            100 * (div_data[symbol][0][year] - div_data[symbol][0][year - 1]) /
            div_data[symbol][0][year - 1])
    five_yrs_div_growth_avg = sum(rate_change) / len(rate_change)
    div_data[symbol][1]['5YrsDivGrowthAvg'] = five_yrs_div_growth_avg
    
    rate_change = []
    for year in range(prev_fifteen_yrs, last_year + 1):
        rate_change.append(
            100 * (div_data[symbol][0][year] - div_data[symbol][0][year - 1]) /
            div_data[symbol][0][year - 1])
    fifteen_yrs_div_growth_avg = sum(rate_change) / len(rate_change)
    div_data[symbol][1]['15YrsDivGrowthAvg'] = fifteen_yrs_div_growth_avg

working on MMM | start year = 1970
working on ABT | start year = 1983
working on AFL | start year = 1984
working on APD | start year = 1983
working on ALB | start year = 1994
working on ARE | start year = 1998
working on LNT | start year = 1987
working on ALL | start year = 1993
working on MO | start year = 1962
working on AEE | start year = 1998
working on AEP | start year = 1970
working on AXP | start year = 1977
working on AME | start year = 1984
working on AOS | start year = 1986
working on APA | start year = 1982
working on ADM | start year = 1983
working on AJG | start year = 1988
working on T | start year = 1984
working on ATO | start year = 1989
working on ADP | start year = 1983
working on AVB | start year = 1998
working on BKR | start year = 1987
working on BLL | start year = 1985
working on BAC | start year = 1986
working on BK | start year = 1987
working on BAX | start year = 1982
working on BA | start year = 1962
working on BXP | start year = 1997
working on BMY | start ye

In [18]:
div_data

{'MMM': [{1970: 0.082,
   1971: 0.115752,
   1972: 0.120376,
   1973: 0.13125,
   1974: 0.1565,
   1975: 0.16875,
   1976: 0.181,
   1977: 0.2125,
   1978: 0.25,
   1979: 0.3,
   1980: 0.35,
   1981: 0.375,
   1982: 0.4,
   1983: 0.4125,
   1984: 0.425,
   1985: 0.4375,
   1986: 0.45,
   1987: 0.465,
   1988: 0.53,
   1989: 0.65,
   1990: 0.73,
   1991: 0.78,
   1992: 0.8,
   1993: 0.83,
   1994: 0.88,
   1995: 0.94,
   1996: 2.16,
   1997: 1.06,
   1998: 1.1,
   1999: 1.12,
   2000: 1.16,
   2001: 1.2,
   2002: 1.24,
   2003: 1.32,
   2004: 1.44,
   2005: 1.68,
   2006: 1.84,
   2007: 1.92,
   2008: 2.0,
   2009: 2.04,
   2010: 2.1,
   2011: 2.2,
   2012: 2.36,
   2013: 2.54,
   2014: 3.42,
   2015: 4.1,
   2016: 4.44,
   2017: 4.7,
   2018: 5.44,
   2019: 5.76,
   2020: 5.88},
  {'consecutive_yrs': 51,
   '5YrsDivGrowthAvg': 13.198230876467122,
   '15YrsDivGrowthAvg': 9.458024783503907}],
 'ABT': [{1983: 0.026934,
   1984: 0.032265,
   1985: 0.037877,
   1986: 0.045172000000000004,
 

## Export & Save Dividend Data to JSON

In [19]:
with open('data/historical_div_sp500.json', 'w') as fp:
    json.dump(div_data, fp)