In [13]:
import io
import pandas as pd
import requests
import csv
import re

In [45]:
FMP_API_ENDPOINT="https://financialmodelingprep.com/api"
FMP_API_KEY="6b5ead8d3c6bceb25d50bc6237dc8543"

In [46]:
def process_fmp_financial_statements(file_path):
    raw_df = pd.read_csv(file_path)
    raw_df.rename({"Unnamed: 1": "metric"}, axis=1, inplace=True)
    df = raw_df[~raw_df['metric'].isna()]
    df = df.drop("date", axis=1)
    df = df.rename({"metric": "date"}, axis=1)
    df = df.set_index("date").transpose()
    df.index = df.index.map(lambda s: re.sub(r"_Q[1-4]", "", s))
    df = df.sort_index(ascending=False)
    return df


In [59]:
def pull_fmp_financial_statements(stock_symbols, output_dir):
    statment_types = ["income-statement", "balance-sheet-statement", "cash-flow-statement"]
    session = requests.Session()
    for raw_sym in stock_symbols:
        sym = raw_sym.replace("/", "-")
        merged_file_name = f"{output_dir}/{sym}-all.csv"
        merged_df = None
        for stmt_type in statment_types:
            statement_url = f"{FMP_API_ENDPOINT}/v3/{stmt_type}/{sym}"
            resp = session.get(url = statement_url, params={
                    "apikey": FMP_API_KEY,
                    "period": "quarter",
                    "limit": 1,
                    "datatype": "csv"
                })
            if resp.status_code != 200:
                raise RuntimeError(f"http status is {resp.status_code}")
            file_name = f"{output_dir}/{sym}-{stmt_type}"
            raw_file_path = f"{file_name}-raw.csv"
            processed_file_path = f"{file_name}.csv"
            with open(raw_file_path, 'w', encoding='utf-8') as f:
                writer = csv.writer(f)
                reader = csv.reader(resp.content.decode('utf-8').splitlines())
                for row in reader:
                    if len(row) >= 3 and len(row[2]) > 0:
                        writer.writerow(row)
            df = process_fmp_financial_statements(raw_file_path)
            # df.to_csv(processed_file_path)
            if merged_df is not None:
                merged_df = merged_df.join(other=df, how="outer", rsuffix=f"_{stmt_type}")
            else:
                merged_df = df
        merged_df = merged_df.sort_index(ascending=False)
        merged_df.to_csv(merged_file_name, index=True)

In [60]:
output_dir="../../output"
pull_fmp_financial_statements(["AAPL"], output_dir)

# process_fmp_financial_statements(file_path="../../output/AAPL-balance-sheet-statement-raw.csv")

In [62]:
df = pd.read_csv("../../output/AAPL-all.csv", index_col=0)
df.columns

Index(['revenue', 'costOfRevenue', 'grossProfit', 'grossProfitRatio',
       'ResearchAndDevelopmentExpenses', 'GeneralAndAdministrativeExpenses',
       'SellingAndMarketingExpenses', 'otherExpenses', 'operatingExpenses',
       'costAndExpenses', 'interestExpense', 'depreciationAndAmortization',
       'EBITDA', 'EBITDARatio', 'operatingIncome', 'operatingIncomeRatio',
       'totalOtherIncomeExpensesNet', 'incomeBeforeTax',
       'incomeBeforeTaxRatio', 'incomeTaxExpense', 'netIncome',
       'netIncomeRatio', 'EPS', 'EPSDiluted', 'weightedAverageShsOut',
       'weightedAverageShsOutDil', 'cashAndCashEquivalents',
       'shortTermInvestments', 'cashAndShortTermInvestments', 'netReceivables',
       'inventory', 'otherCurrentAssets', 'totalCurrentAssets',
       'propertyPlantEquipmentNet', 'goodwill', 'intangibleAssets',
       'goodwillAndIntangibleAssets', 'longTermInvestments', 'taxAssets',
       'otherNonCurrentAssets', 'totalNonCurrentAssets', 'otherAssets',
       'totalAs