In [1]:
import pandas as pd
import requests
import time

from etl_resources import sqlite_connection, get_api_key, get_symbol_list

In [2]:
def build_statements_dataset():
    
    '''
    This function uses the alphavantage api to pull financial statements for 
    S&P 100 equities. Balance sheets, cash flow, income statements, and earnings
    estimates are included.
    '''
    
    api_key = get_api_key()
    symbol_list = get_symbol_list()
    con = sqlite_connection()
    
    statements = {'INCOME_STATEMENT':'quarterlyReports',
                  'BALANCE_SHEET':'quarterlyReports',
                  'CASH_FLOW':'quarterlyReports'}
    
    for statement, filters in statements.items(): 
        
        for symbol in symbol_list:

            time.sleep(20) # Handle the rate limiter
            url = f"https://www.alphavantage.co/query?function={statement}&symbol={symbol}&apikey={api_key}"

            try:
                
                print(f"Parsing {statement} for {symbol}")
                response = requests.get(url)
                json = response.json()[f"{filters}"]
                df = pd.DataFrame.from_dict(json)
                df['ticker'] = symbol

                table_name = statement.lower() 
                df.to_sql(name=table_name, if_exists='append', index=False, con=con)

            except:
                # TODO: Better failure logging
                print(f"Failed: {url}")
 


In [3]:
def process_financials():
    
    con = sqlite_connection()
    
    tables = ['balance_sheet','cash_flow','income_statement']
    
    non_nums = ['fiscalDateEnding','reportedCurrency','ticker']
    
    for table in tables:
        print(f'Processing {table}')
        df = pd.read_sql(f"select * from {table}",con=con)
        
        nums = [col for col in df.columns if col not in non_nums]
        
        for field in nums:
            df[field] = pd.to_numeric(df[field],errors='coerce')
        
        df['fiscalDateEnding'] = pd.to_datetime(df['fiscalDateEnding'])
        
        df.columns = [col.lower().strip() for col in df.columns]
        
        df.to_sql(name=f"{table}_clean",con=con, if_exists='replace')
        
    

In [4]:
def process_timeseries_differences():
    
    con = sqlite_connection()
    cur = con.cursor()
    
    try:
        cur.execute("drop table balance_sheet_qtr")
    except:
        print("balance_sheet_qtr doesnt exist")
    
    
    cur.execute("select distinct ticker from balance_sheet_clean")
    res = cur.fetchall()
    res = [val[0] for val in res]
    
    for ticker in res:
        qry = f'''
    select b.*,c.quarter, c.year from balance_sheet_clean b

    left join calendar c on b.fiscaldateending = c.date
    where b.ticker='{ticker}'
    order by b.ticker, c.year, c.quarter
        '''

        df = pd.read_sql(qry,con=con)
        df = df.drop(columns=['reportedcurrency','index'])
        df = df.set_index(['fiscaldateending', 'quarter','year', 'ticker'])
        perc_df = df.pct_change()
        diff_df = df.fillna(0)
        diff_df = diff_df.diff()

        combined_df = pd.merge(perc_df, diff_df, on=['fiscaldateending','quarter','year','ticker'])
        combined_df.columns = [col.replace('_x','_pct').replace('_y','_val') for col in combined_df.columns]
        final_df = pd.merge(df,combined_df, on=['fiscaldateending','quarter','year','ticker'])

        final_df.to_sql(name='balance_sheet_qtr',if_exists='append', con=con)

In [5]:
#build_statements_dataset()
#process_financials()
process_timeseries_differences()

balance_sheet_qtr doesnt exist


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,totalassets,totalcurrentassets,cashandcashequivalentsatcarryingvalue,cashandshortterminvestments,inventory,currentnetreceivables,totalnoncurrentassets,propertyplantequipment,accumulateddepreciationamortizationppe,intangibleassets,...,currentlongtermdebt_val,longtermdebtnoncurrent_val,shortlongtermdebttotal_val,othercurrentliabilities_val,othernoncurrentliabilities_val,totalshareholderequity_val,treasurystock_val,retainedearnings_val,commonstock_val,commonstocksharesoutstanding_val
fiscaldateending,quarter,year,ticker,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2016-09-30 00:00:00,3,2016,AAPL,321686000000,106869000000,20484000000,67155000000.0,2132000000.0,29299000000.0,385247000000.0,27010000000.0,34235000000.0,8620000000.0,...,,,,,,,,,,
2016-12-31 00:00:00,4,2016,AAPL,331141000000,103332000000,16371000000,60452000000.0,2712000000.0,27977000000.0,413447000000.0,26510000000.0,36249000000.0,8271000000.0,...,-1000000.0,-1870000000.0,-76989000000.0,-191000000.0,1827000000.0,4141000000.0,0.0,3637000000.0,893000000.0,-80743000.0
2017-03-31 00:00:00,1,2017,AAPL,334532000000,101990000000,15157000000,15157000000.0,2910000000.0,,232542000000.0,27163000000.0,37961000000.0,8090000000.0,...,500000000.0,10974000000.0,10817000000.0,-207000000.0,1569000000.0,1692000000.0,0.0,924000000.0,1435000000.0,-49608000.0
2017-06-30 00:00:00,2,2017,AAPL,345173000000,112875000000,18571000000,18571000000.0,3146000000.0,,232298000000.0,29286000000.0,39695000000.0,8105000000.0,...,2496000000.0,5333000000.0,9817000000.0,-74000000.0,-872000000.0,-1657000000.0,0.0,-2400000000.0,866000000.0,-36033000.0
2017-09-30 00:00:00,3,2017,AAPL,375319000000,128645000000,20289000000,74181000000.0,4855000000.0,35673000000.0,246674000000.0,33783000000.0,41293000000.0,8015000000.0,...,1000000.0,7343000000.0,111455000000.0,22943000000.0,1817000000.0,1622000000.0,0.0,-195000000.0,1422000000.0,-43581000.0
2017-12-31 00:00:00,4,2017,AAPL,406794000000,143810000000,27491000000,77153000000.0,4421000000.0,50899000000.0,470928000000.0,33679000000.0,43431000000.0,8038000000.0,...,2000000.0,6715000000.0,-97027000000.0,-22507000000.0,3339000000.0,6152000000.0,0.0,6263000000.0,580000000.0,-44550000.0
2018-03-31 00:00:00,1,2018,AAPL,367502000000,130053000000,45059000000,87940000000.0,7662000000.0,22408000000.0,416735000000.0,35077000000.0,45425000000.0,,...,2000000000.0,-2560000000.0,114000000.0,-269000000.0,3101000000.0,-13321000000.0,0.0,-12695000000.0,1597000000.0,-138369000.0
2018-06-30 00:00:00,2,2018,AAPL,349197000000,115761000000,31971000000,70970000000.0,5936000000.0,26367000000.0,406209000000.0,38117000000.0,47251000000.0,,...,-3000000000.0,-4234000000.0,-7010000000.0,-372000000.0,-1161000000.0,-11929000000.0,0.0,-12462000000.0,580000000.0,-100365000.0
2018-09-30 00:00:00,3,2018,AAPL,365725000000,131339000000,25913000000,25913000000.0,3956000000.0,48995000000.0,234386000000.0,41304000000.0,49099000000.0,,...,3286000000.0,-3393000000.0,104261000000.0,25924000000.0,3220000000.0,-7802000000.0,0.0,-9036000000.0,1577000000.0,-87931000.0
2018-12-31 00:00:00,4,2018,AAPL,373719000000,140828000000,44771000000,86427000000.0,4988000000.0,36981000000.0,232891000000.0,39597000000.0,51929000000.0,,...,988000000.0,-746000000.0,-104603000000.0,3376000000.0,5641000000.0,10745000000.0,0.0,10110000000.0,769000000.0,-25183000.0


In [6]:
con = sqlite_connection()

res

['MMM',
 'BKNG',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'MO',
 'AMZN',
 'AXP',
 'AIG',
 'AMGN',
 'AAPL',
 'T',
 'BAC',
 'BK',
 'BIIB',
 'BLK',
 'BMY',
 'COF',
 'CAT',
 'CHTR',
 'CVX',
 'CSCO',
 'C',
 'CL',
 'CMCSA',
 'COP',
 'COST',
 'CVS',
 'DHR',
 'DUK',
 'LLY',
 'EMR',
 'EXC',
 'XOM',
 'FB',
 'FDX',
 'F',
 'GD',
 'GE',
 'GM',
 'GILD',
 'GOOG',
 'GOOGL',
 'HD',
 'HON',
 'INTC',
 'IBM',
 'JNJ',
 'JPM',
 'KMI',
 'LMT',
 'LOW',
 'MA',
 'MCD',
 'MDT',
 'MRK',
 'MET',
 'MSFT',
 'MDLZ',
 'MS',
 'NFLX',
 'NEE',
 'NKE',
 'NVDA',
 'OXY',
 'ORCL',
 'PYPL',
 'PEP',
 'PFE',
 'PM',
 'PG',
 'QCOM',
 'SLB',
 'SPG',
 'SO',
 'SBUX',
 'TGT',
 'TXN',
 'ALL',
 'BA',
 'KO',
 'DOW',
 'GS',
 'KHC',
 'UNP',
 'UPS',
 'UNH',
 'USB',
 'VZ',
 'V',
 'WMT',
 'WBA',
 'DIS',
 'WFC']