In [1]:
import time
import pandas as pd
import requests
import json
import concurrent.futures
from decouple import config

# Disable warnings
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
def get_jsonparsed_data(url):
    """
    Sends a GET request to FMP's Earning Surprise API and returns the resulting data in a dictionary
    """
    # sending get request and saving the response as response object
    response = requests.get(url=url)
    data = json.loads(response.text)
    return data

In [3]:
# Read in nasdaq data and extract symbols column
earnings_df = pd.read_csv("historical_earnings.csv")
# Pull API key from .env file
FMP_API_KEY = config("FMP_API_KEY")
# Gather dates to iterate over
dates = earnings_df["date"]
earnings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76721 entries, 0 to 76720
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   date          76721 non-null  object 
 1   symbol        76721 non-null  object 
 2   eps           76721 non-null  float64
 3   epsEstimated  76721 non-null  float64
 4   time          76720 non-null  object 
dtypes: float64(2), object(3)
memory usage: 2.9+ MB


In [4]:

# Build a list of url's that we will make API requests to:
url_list = []
for idx, val in enumerate(dates):
    ticker = earnings_df.symbol[idx]
    url = "https://financialmodelingprep.com/api/v3/historical-price-full/{}?from={}&to={}&apikey={}".format(ticker, val, val, FMP_API_KEY)
    url_list.append(url)

In [5]:
# For testing
urls = url_list[:4]

In [6]:
# Call FMP API for each URL using Concurrent library
# Run takes 211 seconds ... be patient
with concurrent.futures.ThreadPoolExecutor() as executor:
    res = [executor.submit(get_jsonparsed_data, url) for url in url_list]
    concurrent.futures.wait(res)

In [7]:
res[1].result()

{'symbol': 'A',
 'historical': [{'date': '2021-08-17',
   'open': 161.729996,
   'high': 161.839996,
   'low': 159.289993,
   'close': 160.910004,
   'adjClose': 160.498489,
   'volume': 2614200.0,
   'unadjustedVolume': 2614200.0,
   'change': -0.81999,
   'changePercent': -0.507,
   'vwap': 160.68,
   'label': 'August   17, 21',
   'changeOverTime': -0.00507}]}

In [20]:
res[7000].result()

{'symbol': 'XYL',
 'historical': [{'date': '2016-02-04',
   'open': 36.060001,
   'high': 38.080002,
   'low': 35.470001,
   'close': 37.080002,
   'adjClose': 34.510849,
   'volume': 3974600.0,
   'unadjustedVolume': 3974600.0,
   'change': 1.02,
   'changePercent': 2.829,
   'vwap': 36.87667,
   'label': 'February 04, 16',
   'changeOverTime': 0.02829}]}

In [22]:
import numpy as np

na_row = {
    'symbol': [np.nan],
    'date': [np.nan],
    'open': [np.nan],
    'high': [np.nan],
    'low': [np.nan],
    'close': [np.nan],
    'adjClose': [np.nan],
    'volume': [np.nan],
    'unadjustedVolume': [np.nan],
    'change': [np.nan],
    'changePercent': [np.nan],
    'vwap': [np.nan],
    'label': [np.nan],
    'changeOverTime': [np.nan]
}

na_df = pd.DataFrame.from_dict(na_row)
print(na_df)

   symbol  date  open  high  low  close  adjClose  volume  unadjustedVolume  \
0     NaN   NaN   NaN   NaN  NaN    NaN       NaN     NaN               NaN   

   change  changePercent  vwap  label  changeOverTime  
0     NaN            NaN   NaN    NaN             NaN  


In [23]:
df = pd.DataFrame()
ve_num = 0
ke_num = 0
for x in range(len(url_list)):
    try:
        res_df = pd.DataFrame.from_records(res[x].result()["historical"])
        stock = pd.DataFrame.from_records(res[x].result())["symbol"]
        res_df.insert(0, "symbol", stock)
        df = pd.concat([df, res_df])
    # If value error occurs skip the stock
    except ValueError as ve:
        ve_num += 1
        print(ve)
        pass
    except KeyError as ke:
        ke_num += 1
        df = pd.concat([df, na_df])
        print("KeyError at index: {}, occurence number: {}".format(x, ke))
print("There were {} stocks skipped.".format(ve_num))

KeyError at index: 244, occurence number: 'historical'
KeyError at index: 1193, occurence number: 'historical'
KeyError at index: 1281, occurence number: 'historical'
KeyError at index: 1572, occurence number: 'historical'
KeyError at index: 1855, occurence number: 'historical'
KeyError at index: 2153, occurence number: 'historical'
KeyError at index: 2274, occurence number: 'historical'
KeyError at index: 2275, occurence number: 'historical'
KeyError at index: 2276, occurence number: 'historical'
KeyError at index: 2277, occurence number: 'historical'
KeyError at index: 2278, occurence number: 'historical'
KeyError at index: 2279, occurence number: 'historical'
KeyError at index: 2280, occurence number: 'historical'
KeyError at index: 2281, occurence number: 'historical'
KeyError at index: 2282, occurence number: 'historical'
KeyError at index: 2283, occurence number: 'historical'
KeyError at index: 2284, occurence number: 'historical'
KeyError at index: 2285, occurence number: 'histo

In [24]:
print(ke_num)

682


In [25]:
print(df)

   symbol        date        open        high         low       close  \
0       A  2021-11-22  165.000000  165.679993  162.779999  162.779999   
0       A  2021-08-17  161.729996  161.839996  159.289993  160.910004   
0       A  2021-05-25  133.410004  134.800003  133.009995  133.229996   
0       A  2021-02-16  129.309998  131.080002  127.849998  127.949997   
0       A  2020-11-23  111.400002  112.419998  109.559998  112.209999   
..    ...         ...         ...         ...         ...         ...   
0     ZUO  2019-03-21   23.820000   24.434000   23.680000   24.330000   
0     ZUO  2018-11-29   20.240000   20.690001   19.709999   20.559999   
0     ZUO  2018-08-30   33.419998   34.750000   32.014999   34.009998   
0     ZUO  2018-05-31   21.840000   22.959999   21.670000   22.180000   
0     ZWS  2021-10-26   36.490000   36.850000   35.910000   36.110000   

      adjClose     volume  unadjustedVolume   change  changePercent  \
0   162.565887  2110400.0         2110400.0 -2.22000

In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 76721 entries, 0 to 0
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   symbol            76039 non-null  object 
 1   date              76039 non-null  object 
 2   open              76039 non-null  float64
 3   high              76039 non-null  float64
 4   low               76039 non-null  float64
 5   close             76039 non-null  float64
 6   adjClose          76039 non-null  float64
 7   volume            75956 non-null  float64
 8   unadjustedVolume  75956 non-null  float64
 9   change            76039 non-null  float64
 10  changePercent     76039 non-null  float64
 11  vwap              75956 non-null  float64
 12  label             76039 non-null  object 
 13  changeOverTime    76039 non-null  float64
dtypes: float64(11), object(3)
memory usage: 8.8+ MB


In [None]:
df.to_csv("OUTPUT.csv", index=False)