In [1]:
# Import Dependencies
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd

In [2]:
# Input
symbol = ['AMZN','TSLA','GOOGL', 'MSFT', 'AAPL']
url = {}
balance_sheet = {}
financials = {}
cashflows = {}
first_time = 0

# Manual Removal of Headers
bs_column_removal = ['Period Ending',
                     'Current Assets',
                     'Current Liabilities',
                     "Stockholders' Equity"]

fin_column_removal = ['Revenue',
                      'Operating Expenses',
                      'Income from Continuing Operations',
                      "Non-recurring Events",
                      "Net Income"]

cf_column_removal = ['Period Ending',
                     'Operating Activities, Cash Flows Provided By or Used In',
                     'Investing Activities, Cash Flows Provided By or Used In',
                     'Financing Activities, Cash Flows Provided By or Used In']

In [3]:
for s in symbol:
    url[s,'Balance Sheet'] = 'https://finance.yahoo.com/quote/' + s + '/balance-sheet?p=' + s
    url[s,'Financials'] = 'https://finance.yahoo.com/quote/' + s + '/financials?p=' + s
    url[s,'Cashflows'] = 'https://finance.yahoo.com/quote/' + s + '/cash-flow?p=' + s
    
    # Balance Sheet
    page = requests.get(url[s,'Balance Sheet'])
    tree = html.fromstring(page.content)
    table = tree.xpath('//table')
    assert len(table) == 1
    
    if first_time == 0:
        balance_sheet[s] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()
        balance_sheet_columns = balance_sheet[s].columns
        bs_index = balance_sheet[s][balance_sheet[s].columns[0]].values
        first_time = 1
        del balance_sheet[s]
    
    for c in balance_sheet_columns:
        balance_sheet[s, c] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()[c]
        
    for r in bs_column_removal:
         del balance_sheet[s,r]
                
    # Financials / Income Statement
    page = requests.get(url[s,'Financials'])
    tree = html.fromstring(page.content)
    table = tree.xpath('//table')
    assert len(table) == 1
    
    if first_time == 1:
        financials[s] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()
        financials_columns = financials[s].columns
        fin_index = financials[s][financials[s].columns[0]].values
        first_time = 2
        del financials[s]
    
    for c in financials_columns:
        financials[s, c] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()[c]
        
    for r in fin_column_removal:
         del financials[s,r]
    
    # Cashflows
    page = requests.get(url[s,'Cashflows'])
    tree = html.fromstring(page.content)
    table = tree.xpath('//table')
    assert len(table) == 1
    
    if first_time == 2:
        cashflows[s] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()
        cashflows_columns = cashflows[s].columns
        cf_index = cashflows[s][cashflows[s].columns[0]].values
        first_time = 3
        del cashflows[s]
    
    for c in cashflows_columns:
        cashflows[s, c] = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0].set_index(0).transpose()[c]
        
    for r in cf_column_removal:
         del cashflows[s,r]

In [4]:
# Convert to DataFrames
balance_sheet = pd.DataFrame(balance_sheet).set_index(bs_index)
financials = pd.DataFrame(financials).set_index(fin_index)
cashflows = pd.DataFrame(cashflows).set_index(cf_index)

In [5]:
# Show Data
display(balance_sheet)
display(financials)
display(cashflows)

Unnamed: 0_level_0,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,...,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Cash And Cash Equivalents,Short Term Investments,Net Receivables,Inventory,Other Current Assets,Total Current Assets,Long Term Investments,Property Plant and Equipment,Goodwill,Intangible Assets,...,Misc. Stocks Options Warrants,Redeemable Preferred Stock,Preferred Stock,Common Stock,Retained Earnings,Treasury Stock,Capital Surplus,Other Stockholder Equity,Total Stockholder Equity,Net Tangible Assets
12/31/2018,31750000,9500000,16677000,17174000,418000,75101000,440000,61797000,14548000,4110000,...,-,-,-,40201000,70400000,-3454000,-,-3454000,107147000,107147000
12/31/2017,20522000,10464000,11835000,16047000,1329000,60197000,441000,48866000,13350000,3371000,...,-,-,-,35867000,98330000,-150000,-,-150000,134047000,134047000
12/31/2016,19334000,6647000,8339000,11461000,-,45781000,223000,29114000,3784000,854000,...,-,-,-,31251000,96364000,634000,-,634000,128249000,119629000
12/31/2015,15890000,3918000,5654000,10243000,-,35705000,16000,21838000,3759000,992000,...,-,-,-,27416000,92284000,-345000,-,-345000,119355000,110346000


Unnamed: 0_level_0,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,...,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Total Revenue,Cost of Revenue,Gross Profit,Research Development,Selling General and Administrative,Non Recurring,Others,Total Operating Expenses,Operating Income or Loss,Total Other Income/Expenses Net,...,Income Before Tax,Income Tax Expense,Minority Interest,Net Income From Continuing Ops,Discontinued Operations,Extraordinary Items,Effect Of Accounting Changes,Other Items,Preferred Stock And Other Adjustments,Net Income Applicable To Common Shares
12/31/2018,232887000,139156000,93731000,28837000,52177000,-,296000,220466000,12421000,-1145000,...,72903000,13372000,-,59531000,-,-,-,-,-,59531000
12/31/2017,177866000,111934000,65932000,22620000,38992000,-,214000,173760000,4106000,-304000,...,64089000,15738000,-,48351000,-,-,-,-,-,48351000
12/31/2016,135987000,88265000,47722000,16085000,27284000,-,167000,131801000,4186000,-390000,...,61372000,15685000,-,45687000,-,-,-,-,-,45687000
12/31/2015,107006000,71651000,35355000,12540000,20411000,-,171000,104773000,2233000,-687000,...,72515000,19121000,-,53394000,-,-,-,-,-,53394000


Unnamed: 0_level_0,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,AMZN,...,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Net Income,Depreciation,Adjustments To Net Income,Changes In Accounts Receivables,Changes In Liabilities,Changes In Inventories,Changes In Other Operating Activities,Total Cash Flow From Operating Activities,Capital Expenditures,Investments,...,Investments,Other Cash flows from Investing Activities,Total Cash Flows From Investing Activities,Dividends Paid,Sale Purchase of Stock,Net Borrowings,Other Cash Flows from Financing Activities,Total Cash Flows From Financing Activities,Effect Of Exchange Rate Changes,Change In Cash and Cash Equivalents
12/31/2018,10073000,15341000,6352000,-4615000,4414000,-1314000,472000,30723000,-13427000,1140000,...,30845000,-745000,16066000,-13712000,-,432000,-,-87876000,-,5624000
12/31/2017,3033000,11478000,4096000,-4780000,7838000,-3583000,283000,18365000,-11955000,-3054000,...,-33542000,-124000,-46446000,-12769000,-,29014000,-,-17974000,-,-195000
12/31/2016,2371000,8116000,2869000,-3436000,6985000,-1426000,1724000,17203000,-7804000,-2663000,...,-32022000,-924000,-45977000,-12150000,-,22057000,-,-20890000,-,-636000
12/31/2015,596000,5646000,2605000,-1755000,5586000,-2187000,913000,12039000,-5387000,-1066000,...,-44417000,-26000,-56274000,-11561000,-,29305000,749000,-17716000,-,7276000
