In [1]:
import time
import pandas as pd
import requests
import json
import concurrent.futures
from decouple import config

# Disable warnings
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
def get_jsonparsed_data(url):
    """
    Sends a GET request to FMP's Earning Surprise API and returns the resulting data in a dictionary
    """
    # sending get request and saving the response as response object
    response = requests.get(url=url)
    data = json.loads(response.text)
    return data

In [3]:
# Read in nasdaq data and extract symbols column
nasdaq_list = pd.read_csv("nasdaq_list.csv")
# Pull API key from .env file
FMP_API_KEY = config("FMP_API_KEY")

nasdaq_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8202 entries, 0 to 8201
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Symbol      8202 non-null   object 
 1   Name        8202 non-null   object 
 2   Market Cap  7729 non-null   float64
 3   Country     7458 non-null   object 
 4   IPO Year    4820 non-null   float64
 5   Sector      6178 non-null   object 
 6   Industry    6195 non-null   object 
dtypes: float64(2), object(5)
memory usage: 448.7+ KB


In [4]:
# Filtering data
filtered_df = nasdaq_list[nasdaq_list["Market Cap"] > 1000000000]
print(filtered_df)

     Symbol                                               Name    Market Cap  \
0         A             Agilent Technologies Inc. Common Stock  4.520558e+10   
1        AA                    Alcoa Corporation Common Stock   8.245636e+09   
2       AAC  Ares Acquisition Corporation Class A Ordinary ...  1.223750e+09   
12      AAL          American Airlines Group Inc. Common Stock  1.076169e+10   
16     AAON                             AAON Inc. Common Stock  4.169077e+09   
...     ...                                                ...           ...   
8187    ZTO  ZTO Express (Cayman) Inc. American Depositary ...  2.333182e+10   
8189    ZTS                   Zoetis Inc. Class A Common Stock  1.056630e+11   
8190   ZUMZ                           Zumiez Inc. Common Stock  1.144194e+09   
8191    ZUO                    Zuora Inc. Class A Common Stock  2.382352e+09   
8197    ZWS      Zurn Water Solutions Corporation Common Stock  4.301840e+09   

            Country  IPO Year          

In [5]:
symbols = filtered_df.Symbol

url_list = []
for idx, val in enumerate(symbols):
    url = "https://financialmodelingprep.com/api/v3/historical/earning_calendar/{}?limit=80&apikey={}".format(val, FMP_API_KEY)
    url_list.append(url)

In [6]:
# Call FMP API for each URL using Concurrent library
# Run takes 211 seconds ... be patient
with concurrent.futures.ThreadPoolExecutor() as executor:
    res = [executor.submit(get_jsonparsed_data, url) for url in url_list]
    concurrent.futures.wait(res)

In [7]:
res[3].result()

[{'date': '2022-01-26',
  'symbol': 'AAL',
  'eps': None,
  'epsEstimated': -1.76,
  'time': 'bmo',
  'revenue': 0,
  'revenueEstimated': 9412170000},
 {'date': '2021-10-21',
  'symbol': 'AAL',
  'eps': -0.99,
  'epsEstimated': -1.04,
  'time': 'bmo',
  'revenue': 8969000000,
  'revenueEstimated': 9412170000},
 {'date': '2021-07-22',
  'symbol': 'AAL',
  'eps': -1.69,
  'epsEstimated': -1.71,
  'time': 'bmo',
  'revenue': 7478000000,
  'revenueEstimated': 9478960000},
 {'date': '2021-04-22',
  'symbol': 'AAL',
  'eps': -4.32,
  'epsEstimated': -4.3,
  'time': 'bmo',
  'revenue': 4008000000,
  'revenueEstimated': 4049410000},
 {'date': '2021-01-28',
  'symbol': 'AAL',
  'eps': -3.86,
  'epsEstimated': -4.11,
  'time': 'bmo',
  'revenue': 4028000000,
  'revenueEstimated': 3874850000},
 {'date': '2020-10-22',
  'symbol': 'AAL',
  'eps': -5.54,
  'epsEstimated': -5.88,
  'time': 'bmo',
  'revenue': 3173000000,
  'revenueEstimated': 3367732851.9855595},
 {'date': '2020-07-23',
  'symbol': '

In [19]:
df = pd.DataFrame()
ve_num = 0
for x in range(len(symbols)):
    try:
        res_df = pd.DataFrame.from_records(res[x].result())
        df = pd.concat([df, res_df])
    # If value error occurs skip the stock
    except ValueError as e:
        ve_num += 1
        pass
print("There were {} stocks skipped.".format(ve_num))

In [20]:
print(df)

          date symbol   eps epsEstimated time       revenue   revenueEstimated
0   2021-11-22      A  1.21         1.17  amc  1.660000e+09       1461300000.0
1   2021-08-17      A   1.1         0.99  amc  1.586000e+09       1659210000.0
2   2021-05-25      A  0.71         0.83  amc  1.525000e+09  1745481927.710843
3   2021-02-16      A  0.94         0.89  amc  1.548000e+09  1461033707.865169
4   2020-11-23      A  0.72         0.93  amc  1.483000e+09  1817870967.741935
..         ...    ...   ...          ...  ...           ...                ...
12  2018-11-29    ZUO  -0.1        -0.13  amc  6.163700e+07         80128100.0
13  2018-08-30    ZUO -0.13        -0.16  amc  5.775400e+07    71081846.153846
14  2018-05-31    ZUO -0.32         -0.4  amc  5.174400e+07         62092800.0
15  2018-04-12    ZUO   NaN          NaN  bmo  0.000000e+00                0.0
0   2021-10-26    ZWS  0.55         0.26  bmo  5.572000e+08               None

[133881 rows x 7 columns]


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 133881 entries, 0 to 0
Data columns (total 7 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   date              133881 non-null  object 
 1   symbol            133881 non-null  object 
 2   eps               129630 non-null  object 
 3   epsEstimated      130399 non-null  object 
 4   time              133881 non-null  object 
 5   revenue           133881 non-null  float64
 6   revenueEstimated  133036 non-null  object 
dtypes: float64(1), object(6)
memory usage: 8.2+ MB


In [23]:
# Filter earings
filtered_earnings = df.loc[(pd.to_datetime(df['date']) > '2012-01-01')]
filtered_earnings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 81230 entries, 0 to 0
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              81230 non-null  object 
 1   symbol            81230 non-null  object 
 2   eps               77194 non-null  object 
 3   epsEstimated      77772 non-null  object 
 4   time              81230 non-null  object 
 5   revenue           81230 non-null  float64
 6   revenueEstimated  80385 non-null  object 
dtypes: float64(1), object(6)
memory usage: 5.0+ MB


In [24]:
# Drop revenue and revenueEstimated columns
filtered_earnings = filtered_earnings.drop(['revenue', 'revenueEstimated'], axis = 1)
filtered_earnings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 81230 entries, 0 to 0
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   date          81230 non-null  object
 1   symbol        81230 non-null  object
 2   eps           77194 non-null  object
 3   epsEstimated  77772 non-null  object
 4   time          81230 non-null  object
dtypes: object(5)
memory usage: 3.7+ MB


In [25]:
# Drop any rows with null values
filtered_earnings = filtered_earnings.dropna()
filtered_earnings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 76721 entries, 0 to 0
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   date          76721 non-null  object
 1   symbol        76721 non-null  object
 2   eps           76721 non-null  object
 3   epsEstimated  76721 non-null  object
 4   time          76721 non-null  object
dtypes: object(5)
memory usage: 3.5+ MB


In [27]:
print(filtered_earnings.to_string())

          date symbol        eps epsEstimated time
0   2021-11-22      A       1.21         1.17  amc
1   2021-08-17      A        1.1         0.99  amc
2   2021-05-25      A       0.71         0.83  amc
3   2021-02-16      A       0.94         0.89  amc
4   2020-11-23      A       0.72         0.93  amc
5   2020-08-18      A   0.644013         0.66  amc
6   2020-05-21      A       0.33         0.61  amc
7   2020-02-18      A       0.64         0.81  amc
8   2019-11-25      A       0.63         0.86  amc
9   2019-08-14      A       0.61         0.72  amc
10  2019-05-14      A   0.574132         0.72  amc
11  2019-02-20      A       1.58         0.73  amc
12  2018-11-19      A   0.611285         0.74  amc
13  2018-08-14      A       0.74         0.63  amc
14  2018-05-14      A       0.64         0.64  amc
15  2018-02-14      A      -0.99         0.58  amc
16  2017-11-20      A   0.551402         0.62  amc
17  2017-08-15      A       0.55         0.52  amc
18  2017-05-22      A       0.5

In [28]:
filtered_earnings.to_csv("historical_earnings.csv", index=False)