# Get Historical Market Cap Data

https://site.financialmodelingprep.com/developer/docs#historical-market-cap-company-information

format: https://financialmodelingprep.com/api/v3/historical-market-capitalization/AAPL?limit=100&from=2023-10-10&to=2023-12-10

each query is 5 years only

In [5]:
import pandas as pd
import requests

# Load the CSV file into a pandas DataFrame
df = pd.read_csv("~/Box/STAT 222 Capstone/Intermediate Data/Credit_Rating/combined_credit_rating_data.csv")

# Extract all distinct "Symbol" values
distinct_symbols = df["Symbol"].unique()

# Convert the array of distinct symbols to a pandas list
distinct_symbols_list = distinct_symbols.tolist()

print(len(distinct_symbols_list))

886


In [6]:
# Create symbol start and end dates for individual requests
group_1_dates = ["2010-01-01", "2013-12-31"]
group_2_dates = ["2014-01-01", "2016-12-31"]

# Dataframe of distinct symbols_list and group_1_dates as two columns
df1 = pd.DataFrame(distinct_symbols_list, columns=["Symbol"])
df1["Start_Date"] = group_1_dates[0]
df1["End_Date"] = group_1_dates[1]

# Dataframe of distinct symbols_list and group_2_dates as two columns
df2 = pd.DataFrame(distinct_symbols_list, columns=["Symbol"])
df2["Start_Date"] = group_2_dates[0]
df2["End_Date"] = group_2_dates[1]

# Concatenate the two dataframes
api_requests_df = pd.concat([df1, df2]).sort_values(by=["Symbol", "Start_Date"]).reset_index(drop=True)
api_requests_df


Unnamed: 0,Symbol,Start_Date,End_Date
0,AA,2010-01-01,2013-12-31
1,AA,2014-01-01,2016-12-31
2,AAL,2010-01-01,2013-12-31
3,AAL,2014-01-01,2016-12-31
4,AAP,2010-01-01,2013-12-31
...,...,...,...
1767,YUM,2014-01-01,2016-12-31
1768,ZBRA,2010-01-01,2013-12-31
1769,ZBRA,2014-01-01,2016-12-31
1770,ZTS,2010-01-01,2013-12-31


In [7]:
# API key for accessing the financial data
api_key = "GzzXB8zIm8u1NJzcFKZNOwcOyxarZEmi"

# Function to fetch market cap data during period for company
def fetch_market_cap(company, start_date, end_date):
    market_cap = requests.get(f'https://financialmodelingprep.com/api/v3/historical-market-capitalization/{company}?from={start_date}&to={end_date}&apikey={api_key}')
    market_cap = market_cap.json()
    return market_cap

# tqdm for progress bar
#from tqdm.notebook import tqdm

In [8]:
# calculate time
# do one request
# Multiply by number of rows in api_requests_df to get overall time estimate
import time
start_time = time.time()
test = fetch_market_cap('AAPL', '2010-01-01', '2013-12-31')
end_time = time.time()
# convert into minutes
print((end_time - start_time) * len(api_requests_df) /60)

21.977516428629556


In [9]:
# Iterate over dataframe rows
# May need to break this up into smaller chunks to avoid API rate limits
market_cap_data = [] # Initialize empty list to store the fetched data
for index, row in api_requests_df.iterrows():

    # Fetch market cap data
    company = row["Symbol"]
    start_date = row["Start_Date"]
    end_date = row["End_Date"]
    market_cap = fetch_market_cap(company, start_date, end_date)
    
    # Append fetched data to list
    market_cap_data.extend(market_cap)

# Combine data into a single DataFrame
market_cap_data = pd.DataFrame(market_cap_data)
market_cap_data

Unnamed: 0,symbol,date,marketCap
0,AA,2013-12-31,4.640618e+09
1,AA,2013-12-30,4.597010e+09
2,AA,2013-12-27,4.667873e+09
3,AA,2013-12-26,4.553402e+09
4,AA,2013-12-24,4.524330e+09
...,...,...,...
1282801,ZTS,2014-01-08,1.587733e+10
1282802,ZTS,2014-01-07,1.605742e+10
1282803,ZTS,2014-01-06,1.599739e+10
1282804,ZTS,2014-01-03,1.603240e+10


In [10]:
# Save parquet file
market_cap_data.to_parquet("~\Box\STAT 222 Capstone\Raw Data\Market Cap Data\market_cap_data.parquet")

  market_cap_data.to_parquet("~\Box\STAT 222 Capstone\Raw Data\Market Cap Data\market_cap_data.parquet")
