In [1]:
import pandas as pd
import numpy as np
import requests
from pathlib import Path

In [2]:
# Keys
# to get an API key please create an account on data provider's website here - https://site.financialmodelingprep.com
# then paste it in 5 specified places below

## Data Staging

In [3]:
def get_income_statement(ticker):
    url = "https://financialmodelingprep.com/api/v3/income-statement/" + ticker + "?period=annual&apikey=__PASTE_KEY_HERE__"
    response = requests.get(url)
    print(f"Income statement response: {response} for {ticker}")
    income = response.json()
    
    return income

In [4]:
def get_balance_sheet(ticker):
    url = "https://financialmodelingprep.com/api/v3/balance-sheet-statement/" + ticker + "?period=annual&apikey=__PASTE_KEY_HERE__"
    response = requests.get(url)
    print(f"Balance sheet response: {response} for {ticker}")
    balance = response.json()
    
    return balance

In [5]:
def get_cashflow_statement(ticker):
    url = "https://financialmodelingprep.com/api/v3/cash-flow-statement/" + ticker + "?period=annual&apikey=__PASTE_KEY_HERE__"
    response = requests.get(url)
    print(f"Cashflow statement response: {response} for {ticker}")
    cashflow = response.json()
    
    return cashflow

In [6]:
def get_market_cap2(date_list, ticker):
    marketCap = []
    for date in date_list:
        url = "https://financialmodelingprep.com/api/v3/historical-market-capitalization/" + ticker + "?from=" + date + "&to=" + date + "&apikey=__PASTE_KEY_HERE__"
        response = requests.get(url)
        dailyCap = response.json()
        if dailyCap == []:
            while dailyCap == []:
                day = list(date)
                if not day[-1] == "0":
                    day[-1] = str(int(day[-1]) - 1)
                else:
                    day[-2:] = list(str(int("".join(day[-2:])) - 1))
                date = "".join(day)
                
                url = "https://financialmodelingprep.com/api/v3/historical-market-capitalization/" + ticker + "?from=" + date + "&to=" + date + "&apikey=__PASTE_KEY_HERE__"
                response = requests.get(url)
                dailyCap = response.json()
            else:
                marketCap += dailyCap
        else:
            marketCap += dailyCap
                
    print(f"Market cap processing response: {response} for {ticker}")
    return marketCap

#### Aggregate function

In [7]:
def get_reports(ticker):
    income = get_income_statement(ticker)
    balance = get_balance_sheet(ticker)
    cashflow = get_cashflow_statement(ticker)

    if ([d['date'] for d in income] == [d['date'] for d in balance] == [d['date'] for d in cashflow]) == True:
        date_list = [_['date'] for _ in cashflow]
    else:
        print("Dates on reports don't match!")
    marketCap = get_market_cap2(date_list, ticker)

    return income, balance, cashflow, marketCap

## Data Enrichment

In [8]:
def make_dataframe(income, balance, cashflow, marketCap):
    reportsCount = len(cashflow)

    for x in range(0, reportsCount):
        if x == 0:
            df = pd.DataFrame(income[x] | balance[x] | cashflow[x] | marketCap[x], index=[x])
        else:
            df = pd.concat([df, pd.DataFrame(income[x] | balance[x] | cashflow[x] | marketCap[x], index=[x])])
            
    return df

In [9]:
def remove_statics(df, keys=["reportedCurrency", "cik", "fillingDate", "acceptedDate", "calendarYear", "period", "link", "finalLink"]):
    for key in keys:
        if key in df.columns:
            df.pop(key)
            
    return df

In [10]:
def get_target(df):
    for i in df.index:
        ifor_val = 0
        if not i == 0:
            if df["marketCap"][i-1] > df["marketCap"][i]:
                ifor_val = 1
        df.at[i, "target"] = ifor_val
        
    return df

In [11]:
def drop_oldest_date(df):
    df.drop(df.head(1).index, inplace=True)

    return df

#### Aggregate function

In [12]:
def enrich_report(income, balance, cashflow, marketCap):
    df = make_dataframe(income, balance, cashflow, marketCap)
    df = remove_statics(df)
    df = get_target(df)
    df = drop_oldest_date(df)
    
    return df

## Data processing

In [13]:
def prepare_report(ticker):
    income, balance, cashflow, marketCap = get_reports(ticker)
    df = enrich_report(income, balance, cashflow, marketCap)
    print("Report prepared")
    
    return df

In [14]:
def create_parquet():
    my_file = Path("training_data.parquet")
    if not my_file.is_file():
        pd.DataFrame().to_parquet("training_data.parquet")

In [15]:
def join_and_export(df):
    create_parquet()
    try:
        training_data
    except NameError:
        training_data = pd.read_parquet("training_data.parquet")

    training_data = pd.concat([training_data, df]).reset_index(drop=True)
    training_data.to_parquet("training_data.parquet")

    return training_data

In [16]:
def prepare_and_export_report(ticker):
    df = prepare_report(ticker)
    training_data = join_and_export(df)
    
    return training_data

In [None]:
ticker_list = []

for ticker in ticker_list:
    training_data = prepare_and_export_report(ticker)