# polygon.io

    - Using API to get stock dividend data
    - Note:: on free acount limit is 5 api calls per min 
<!-- link to polygon website-->
[Polygon](https://polygon.io)

## import libraries

In [None]:
import requests
import datetime
import pandas as pd
import time

## get all symbols which are in the dividend dashboard current holdings sheet 

In [None]:
# read in the current_holdings full sheet from dividend dashboard
dividend_dashboard_df = pd.read_excel(r'C:\\Dividend_Projects\\DIVIDEND DASHBOARD\\data\\Dividend_Dashboard.xlsx', sheet_name='current_holdings')
# keep only the followign columns: Ticker, Div. Earned	Acct. Type
dividend_dashboard_df = dividend_dashboard_df[['Ticker', 'Shares']]
dividend_dashboard_df

In [None]:
# save the tickers as a list
tickers = dividend_dashboard_df['Ticker'].tolist()
print('Length - ',len(tickers))
print('\n',tickers)
print('\n[ Make sure there are no duplicates ]'.upper())

# make sure the list is unique
tickers = list(set(tickers))
print('\nLength - ',len(tickers))
print('\n',tickers)


In [None]:
'''NOTE:
    - The API key is limited to 5 requests per minute and 1000 requests per day.
'''
# all results will be a list of dictionaries with thhe key being the ticker 
all_results = {}
symbol_error_lst = []
# polygon.io api key
api = 'I3RTEm6vso7yOXBhGcYSidwUhRHaSgWy'
sleep = 25

for symbol in tickers:
    #  we can hit the API no more than 5 times per minute
    #  so we need to pause for 20 seconds between each request for safety

    dividend_url = f'https://api.polygon.io/v3/reference/dividends?ticker={symbol}&limit=1&sort=ex_dividend_date&apiKey={api}'
    response = requests.get(dividend_url)
    
    # if the status code is 200 then the request was successful
    if response.status_code != 200:
        print(f'Error for {symbol}')
        print(f'Status Code: {response.status_code}')
        symbol_error_lst.append(symbol)
        print("\tNext Run Time is:",(datetime.datetime.now() + datetime.timedelta(seconds=sleep)).strftime("%H:%M:%S"))
        time.sleep(sleep)
        continue
    try:
        data = response.json()
        key = data['results'][0]['ticker']
        value = data['results'][0]
        # append the results to the all_results dictionary
        all_results[key] = value
        print(f'Finished {symbol}')
        # print when the next run time will be as long as it is not the last ticker
        if symbol != tickers[-1]:
            print("\tNext Run Time is:",(datetime.datetime.now() + datetime.timedelta(seconds=sleep)).strftime("%H:%M:%S"))
            time.sleep(sleep)
        else:
            print('Finished Gathering Data!')
    except:
        print(f'Error for {symbol}')
        symbol_error_lst.append(symbol)
        print("\tNext Run Time is:",(datetime.datetime.now() + datetime.timedelta(seconds=sleep)).strftime("%H:%M:%S"))
        time.sleep(sleep)
    


In [76]:
# convert the dictionary to a dataframe
dividend_df = pd.DataFrame.from_dict(all_results, orient='index')
# reset teh index and drop the old index
dividend_df.reset_index(inplace=True)
# keep only the following columns ticker, cash_amount, frequency, ex_dividend_date,	pay_date in this order
dividend_df = dividend_df[['ticker', 'cash_amount', 'frequency', 'ex_dividend_date', 'pay_date']]  	
# convert the date columns to datetime
dividend_df['ex_dividend_date'] = pd.to_datetime(dividend_df['ex_dividend_date'])
dividend_df['pay_date'] = pd.to_datetime(dividend_df['pay_date']) 
dividend_df

Unnamed: 0,ticker,cash_amount,frequency,ex_dividend_date,pay_date
0,DALN,0.1600,4,2023-11-09,2023-12-01
1,MSB,0.3500,4,2023-10-27,2023-11-20
2,AGNC,0.1200,12,2023-11-29,2023-12-11
3,JHI,0.2084,4,2023-09-08,2023-09-29
4,BKCC,0.1000,4,2023-12-14,2024-01-08
...,...,...,...,...,...
59,CLM,0.1086,12,2024-03-14,2024-03-29
60,NEWT,0.1800,4,2023-10-09,2023-10-20
61,GPMT,0.2000,4,2023-09-29,2023-10-16
62,HIMX,0.4800,1,2023-06-29,2023-07-12


In [77]:
# filter out the symbols in symbol_error_lst from the dividend_dashboard_df 
dividend_dashboard_df = dividend_dashboard_df[~dividend_dashboard_df['Ticker'].isin(symbol_error_lst)]
dividend_dashboard_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 66 entries, 0 to 67
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ticker  66 non-null     object 
 1   Shares  66 non-null     float64
dtypes: float64(1), object(1)
memory usage: 1.5+ KB


In [78]:
#  show duplicates in dividend_dashboard_df
dividend_dashboard_df[dividend_dashboard_df.duplicated(subset=['Ticker'], keep=False)]


Unnamed: 0,Ticker,Shares
25,CLM,10.0
43,CLM,7.0
52,ARR,2.8
53,ARR,1.6


In [79]:
# Checking for duplicates where 'symbol' and 'type' are the same
duplicates_count = dividend_dashboard_df.duplicated(subset=['Ticker']).sum()

# Printing the duplicates
duplicates_df = dividend_dashboard_df[dividend_dashboard_df.duplicated(subset=['Ticker'], keep=False)]

# Grouping the DataFrame by 'symbol' and 'type' and summing the 'payout'
grouped_df = dividend_dashboard_df.groupby(['Ticker']).sum().reset_index()

# print out the duplicates count, duplicates dataframe, and grouped dataframe in a nice formated information with a space and header
print(f'''
Duplicates Count: {duplicates_count}\n
Duplicates DataFrame:
{duplicates_df}\n
Grouped DataFrame: 
{grouped_df}\n
''')


Duplicates Count: 2

Duplicates DataFrame:
   Ticker  Shares
25    CLM    10.0
43    CLM     7.0
52    ARR     2.8
53    ARR     1.6

Grouped DataFrame: 
   Ticker  Shares
0    ACRE     7.0
1    AGNC     5.0
2     ARR     4.4
3     AVK     7.0
4    BKCC    25.0
..    ...     ...
59      T     4.0
60    TWO     5.0
61    UAN     1.0
62    VIA     3.0
63    ZTR    12.0

[64 rows x 2 columns]




In [80]:
grouped_df.info()
# print a spacer
print('\n')
dividend_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ticker  64 non-null     object 
 1   Shares  64 non-null     float64
dtypes: float64(1), object(1)
memory usage: 1.1+ KB


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   ticker            64 non-null     object        
 1   cash_amount       64 non-null     float64       
 2   frequency         64 non-null     int64         
 3   ex_dividend_date  64 non-null     datetime64[ns]
 4   pay_date          64 non-null     datetime64[ns]
dtypes: datetime64[ns](2), float64(1), int64(1), object(1)
memory usage: 2.6+ KB


In [81]:
# merge the dividend_df with the dividend_dashboard_df
merged_data = grouped_df.merge(dividend_df, how='left', left_on='Ticker', right_on='ticker')
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 0 to 63
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Ticker            64 non-null     object        
 1   Shares            64 non-null     float64       
 2   ticker            64 non-null     object        
 3   cash_amount       64 non-null     float64       
 4   frequency         64 non-null     int64         
 5   ex_dividend_date  64 non-null     datetime64[ns]
 6   pay_date          64 non-null     datetime64[ns]
dtypes: datetime64[ns](2), float64(2), int64(1), object(2)
memory usage: 4.0+ KB


In [82]:
merged_data

Unnamed: 0,Ticker,Shares,ticker,cash_amount,frequency,ex_dividend_date,pay_date
0,ACRE,7.0,ACRE,0.33000,4,2023-12-28,2024-01-17
1,AGNC,5.0,AGNC,0.12000,12,2023-11-29,2023-12-11
2,ARR,4.4,ARR,0.40000,12,2023-12-14,2023-12-28
3,AVK,7.0,AVK,0.11720,12,2023-11-14,2023-11-30
4,BKCC,25.0,BKCC,0.10000,4,2023-12-14,2024-01-08
...,...,...,...,...,...,...,...
59,T,4.0,T,0.27750,4,2023-10-06,2023-11-01
60,TWO,5.0,TWO,0.45000,4,2023-10-02,2023-10-27
61,UAN,1.0,UAN,1.55000,4,2023-11-10,2023-11-20
62,VIA,3.0,VIA,0.90625,4,2023-02-28,2023-03-15


In [88]:
# create a column called 'next_div_earned' which is the product of the 'Shares' and 'cash_amount' columns
merged_data['next_div_earned'] = merged_data.Shares.mul(merged_data.cash_amount)
# create a column called 'est_yr_yield' which is the product of the 'next_div_earned' and frequency
merged_data['est_yr_yield'] = merged_data.next_div_earned.mul(merged_data.frequency)
# sort by pay_date in descending order
merged_data.sort_values(by='pay_date', inplace=True)
# filter out any date that is less than today's date
merged_data = merged_data[merged_data['pay_date'] >= datetime.datetime.today()]
merged_data

Unnamed: 0,Ticker,Shares,ticker,cash_amount,frequency,ex_dividend_date,pay_date,next_div_earned,est_yr_yield
14,DSX,17.0,DSX,0.15,4,2023-11-24,2023-12-04,2.55,10.2
1,AGNC,5.0,AGNC,0.12,12,2023-11-29,2023-12-11,0.6,7.2
30,GOGL,7.0,GOGL,0.1,4,2023-12-05,2023-12-13,0.7,2.8
57,SJT,7.0,SJT,0.01691,12,2023-11-29,2023-12-14,0.11837,1.42044
56,SBLK,4.0,SBLK,0.22,4,2023-12-04,2023-12-18,0.88,3.52
19,EURN,6.0,EURN,0.57,4,2023-12-12,2023-12-20,3.42,13.68
26,GGB,20.0,GGB,0.096166,4,2023-11-20,2023-12-20,1.923312,7.693248
41,LGI,6.0,LGI,0.09339,12,2023-12-11,2023-12-22,0.56034,6.72408
24,GAB,17.0,GAB,0.15,4,2023-12-14,2023-12-22,2.55,10.2
37,IEP,2.0,IEP,1.0,4,2023-11-16,2023-12-27,2.0,8.0


In [89]:
# save the merged_data to an excel file called dividend_info
merged_data.to_excel('dividend_info.xlsx', index=False)