## Get Stock Symbols from S&P 500

In [1]:
import yfinance as yf
import pandas as pd
import datetime as dt
import json

In [2]:
data = pd.read_html(
    'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

In [3]:
sp500 = data[0]
sp500 = sp500[[
    'Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry',
    'Date first added'
]]
sp500.head(10)

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Date first added
0,MMM,3M Company,Industrials,Industrial Conglomerates,1976-08-09
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,1964-03-31
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,2012-12-31
3,ABMD,Abiomed,Health Care,Health Care Equipment,2018-05-31
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,2011-07-06
5,ATVI,Activision Blizzard,Communication Services,Interactive Home Entertainment,2015-08-31
6,ADBE,Adobe Inc.,Information Technology,Application Software,1997-05-05
7,AMD,Advanced Micro Devices,Information Technology,Semiconductors,2017-03-20
8,AAP,Advance Auto Parts,Consumer Discretionary,Automotive Retail,2015-07-09
9,AES,AES Corp,Utilities,Independent Power Producers & Energy Traders,1998-10-02


In [4]:
sp500_symbols = sp500['Symbol'].values.tolist()
sp500_sectors = list(set(sp500['GICS Sector'].values.tolist()))

sp500_symbols_new = []

for symbol in sp500_symbols:
    if '.' in symbol:
        sp500_symbols_new.append(symbol.replace('.', '-'))
    else:
        sp500_symbols_new.append(symbol)

sp500_symbols = sp500_symbols_new

## Retrieve Dividend Data

In [5]:
div_data = {}
year_threshold = 40

#sp500_symbols = ['ABT']

for symbol in sp500_symbols:
    annual_div = {}
    prices = yf.Ticker(symbol).history(period='max')
    dividends = prices[prices['Dividends'] > 0]
    if len(dividends) > 0:
        first_year = dividends.index[0].year
    else:
        continue
    last_year = dt.datetime.today().year
    

    # get annual dividend sum from first year it paid out div
    for year in range(first_year, last_year):
        div_sum = dividends[dividends.index.year == year]['Dividends'].sum()
        annual_div[year] = div_sum
        if div_sum == 0:  # dividend discontinued
            break
    # min # years
    # div_data[symbol][0] = annual dividend sum
    # div_data[symbol][1] = # years dividends were paid out
    
    if len(annual_div) >= year_threshold:
        div_data[symbol] = []
        additionals = {}
        additionals['consecutive_yrs'] = len(annual_div)
        div_data[symbol].append(annual_div)
        div_data[symbol].append(additionals)

## Calculate 5Y, 10Y, 25Y Average Dividend Growth Rate

In [6]:
last_year = dt.datetime.today().year - 1
five_year = last_year - 5
ten_year = last_year - 10
twenty_five_year = last_year - 25

In [7]:
for symbol in div_data.keys():
    rate_change = []
    for year in range(five_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    five_year_growth_avg = sum(rate_change) / len(rate_change)

    rate_change = []
    for year in range(ten_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    ten_year_growth_avg = sum(rate_change) / len(rate_change)

    rate_change = []
    for year in range(twenty_five_year, last_year + 1):
        rate_change.append(100 * (div_data[symbol][0][year] - div_data[symbol][0][year-1])/div_data[symbol][0][year-1])
    twenty_five_year_growth_avg = sum(rate_change) / len(rate_change)

    div_data[symbol][1]['5yrsGrowthAvg'] = five_year_growth_avg
    div_data[symbol][1]['10yrsGrowthAvg'] = ten_year_growth_avg
    div_data[symbol][1]['25yrsGrowthAvg'] = twenty_five_year_growth_avg

In [8]:
div_data

{'MMM': [{1970: 0.082,
   1971: 0.115752,
   1972: 0.120376,
   1973: 0.13125,
   1974: 0.1565,
   1975: 0.16875,
   1976: 0.181,
   1977: 0.2125,
   1978: 0.25,
   1979: 0.3,
   1980: 0.35,
   1981: 0.375,
   1982: 0.4,
   1983: 0.4125,
   1984: 0.425,
   1985: 0.4375,
   1986: 0.45,
   1987: 0.465,
   1988: 0.53,
   1989: 0.65,
   1990: 0.73,
   1991: 0.78,
   1992: 0.8,
   1993: 0.83,
   1994: 0.88,
   1995: 0.94,
   1996: 2.16,
   1997: 1.06,
   1998: 1.1,
   1999: 1.12,
   2000: 1.16,
   2001: 1.2,
   2002: 1.24,
   2003: 1.32,
   2004: 1.44,
   2005: 1.68,
   2006: 1.84,
   2007: 1.92,
   2008: 2.0,
   2009: 2.04,
   2010: 2.1,
   2011: 2.2,
   2012: 2.36,
   2013: 2.54,
   2014: 3.42,
   2015: 4.1,
   2016: 4.44,
   2017: 4.7,
   2018: 5.44,
   2019: 5.76,
   2020: 5.88},
  {'consecutive_yrs': 51,
   '5yrsGrowthAvg': 9.623657807321878,
   '10yrsGrowthAvg': 10.45368575314163,
   '25yrsGrowthAvg': 10.340858794164397}],
 'MO': [{1962: 0.006252,
   1963: 0.006252,
   1964: 0.006252,

## Export & Save Dividend Data to JSON

In [9]:
with open('data/historical_div_sp500.json', 'w') as fp:
    json.dump(div_data, fp)