In [14]:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd

symbol = 'NVO'

url = 'https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol

# Set up the request headers that we're going to use, to simulate
# a request by the Chrome browser. Simulating a request from a browser
# is generally good practice when building a scraper
headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Cache-Control': 'max-age=0',
    'Connection': 'close',
    'DNT': '1', # Do Not Track Request Header 
    'Pragma': 'no-cache',
    'Referrer': 'https://google.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
}

# Fetch the page that we're going to parse, using the request headers
# defined above
page = requests.get(url, headers=headers)

# Parse the page with LXML, so that we can start doing some XPATH queries
# to extract the data that we want
tree = html.fromstring(page.content)

# Smoke test that we fetched the page by fetching and displaying the H1 element
tree.xpath("//h1/text()")

['Novo Nordisk A/S (NVO)']

In [15]:
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")

# Ensure that some table rows are found; if none are found, then it's possible
# that Yahoo Finance has changed their page layout, or have detected
# that you're scraping the page.
assert len(table_rows) > 0

parsed_rows = []

for table_row in table_rows:
    parsed_row = []
    el = table_row.xpath("./div")
    
    none_count = 0
    
    for rs in el:
        try:
            (text,) = rs.xpath('.//span/text()[1]')
            parsed_row.append(text)
        except ValueError:
            parsed_row.append(np.NaN)
            none_count += 1

    if (none_count < 4):
        parsed_rows.append(parsed_row)

df = pd.DataFrame(parsed_rows)
df

Unnamed: 0,0,1,2,3,4
0,Breakdown,12/31/2021,12/31/2020,12/31/2019,12/31/2018
1,Total Assets,194508000,144922000,125612000,110769000
2,Total Liabilities Net Minority Interest,123762000,81597000,68019000,58930000
3,Total Equity Gross Minority Interest,70746000,63325000,57593000,51839000
4,Total Capitalization,80400000,63325000,57593000,51839000
5,Common Stock Equity,70746000,63325000,57593000,51839000
6,Capital Lease Obligations,4129000,3672000,3824000,
7,Net Tangible Assets,27575000,42668000,51758000,46694000
8,Working Capital,-13921000,-4464000,3473000,4903000
9,Invested Capital,93262000,70009000,58252000,52354000


In [16]:
df = pd.DataFrame(parsed_rows)
df = df.set_index(0) # Set the index to the first column: 'Period Ending'.
df = df.transpose() # Transpose the DataFrame, so that our header contains the account names

# Rename the "Breakdown" column to "Date"
cols = list(df.columns)
cols[0] = 'Date'
df = df.set_axis(cols, axis='columns', inplace=False)

df

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number
1,12/31/2021,194508000,123762000,70746000,80400000,70746000,4129000.0,27575000,-13921000,93262000,27575000,26645000,11796000.0,2341100,2310000,31100
2,12/31/2020,144922000,81597000,63325000,63325000,63325000,3672000.0,42668000,-4464000,70009000,42668000,10356000,,2388139,2350000,38139
3,12/31/2019,125612000,68019000,57593000,57593000,57593000,3824000.0,51758000,3473000,58252000,51758000,4483000,,2448843,2400000,48843
4,12/31/2018,110769000,58930000,51839000,51839000,51839000,,46694000,4903000,52354000,46694000,515000,,2506332,2450000,56332


In [17]:
numeric_columns = list(df.columns)[1::] # Take all columns, except the first (which is the 'Date' column)

for column_name in numeric_columns:
    df[column_name] = df[column_name].str.replace(',', '') # Remove the thousands separator
    df[column_name] = df[column_name].astype(np.float64) # Convert the column to float64

df.dtypes

Date                                        object
Total Assets                               float64
Total Liabilities Net Minority Interest    float64
Total Equity Gross Minority Interest       float64
Total Capitalization                       float64
Common Stock Equity                        float64
Capital Lease Obligations                  float64
Net Tangible Assets                        float64
Working Capital                            float64
Invested Capital                           float64
Tangible Book Value                        float64
Total Debt                                 float64
Net Debt                                   float64
Share Issued                               float64
Ordinary Shares Number                     float64
Treasury Shares Number                     float64
dtype: object

In [18]:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd

def get_page(url):
    # Set up the request headers that we're going to use, to simulate
    # a request by the Chrome browser. Simulating a request from a browser
    # is generally good practice when building a scraper
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'close',
        'DNT': '1', # Do Not Track Request Header 
        'Pragma': 'no-cache',
        'Referrer': 'https://google.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }

    return requests.get(url, headers=headers)

def parse_rows(table_rows):
    parsed_rows = []

    for table_row in table_rows:
        parsed_row = []
        el = table_row.xpath("./div")

        none_count = 0

        for rs in el:
            try:
                (text,) = rs.xpath('.//span/text()[1]')
                parsed_row.append(text)
            except ValueError:
                parsed_row.append(np.NaN)
                none_count += 1

        if (none_count < 4):
            parsed_rows.append(parsed_row)
            
    return pd.DataFrame(parsed_rows)

def clean_data(df):
    df = df.set_index(0) # Set the index to the first column: 'Period Ending'.
    df = df.transpose() # Transpose the DataFrame, so that our header contains the account names
    
    # Rename the "Breakdown" column to "Date"
    cols = list(df.columns)
    cols[0] = 'Date'
    df = df.set_axis(cols, axis='columns', inplace=False)
    
    numeric_columns = list(df.columns)[1::] # Take all columns, except the first (which is the 'Date' column)

    for column_index in range(1, len(df.columns)): # Take all columns, except the first (which is the 'Date' column)
        df.iloc[:,column_index] = df.iloc[:,column_index].str.replace(',', '') # Remove the thousands separator
        df.iloc[:,column_index] = df.iloc[:,column_index].astype(np.float64) # Convert the column to float64
        
    return df

def scrape_table(url):
    # Fetch the page that we're going to parse
    page = get_page(url);

    # Parse the page with LXML, so that we can start doing some XPATH queries
    # to extract the data that we want
    tree = html.fromstring(page.content)

    # Fetch all div elements which have class 'D(tbr)'
    table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
    
    # Ensure that some table rows are found; if none are found, then it's possible
    # that Yahoo Finance has changed their page layout, or have detected
    # that you're scraping the page.
    assert len(table_rows) > 0
    
    df = parse_rows(table_rows)
    df = clean_data(df)
        
    return df

In [19]:
df_income=scrape_table('https://finance.yahoo.com/quote/' + symbol + '/financials?p=' + symbol)

In [20]:
df_income

Unnamed: 0,Date,Total Revenue,Cost of Revenue,Gross Profit,Operating Expense,Operating Income,Net Non Operating Interest Income Expense,Other Income Expense,Pretax Income,Tax Provision,...,Net Interest Income,EBIT,Reconciled Cost of Revenue,Reconciled Depreciation,Net Income from Continuing Operation Net Minority Interest,Total Unusual Items Excluding Goodwill,Total Unusual Items,Normalized EBITDA,Tax Rate for Calcs,Tax Effect of Unusual Items
1,ttm,157251000.0,25286000.0,131965000.0,65544000.0,66421000.0,-4098000.0,,62939000.0,12400000.0,...,-4098000.0,67169000.0,25286000.0,6717000.0,50539000.0,640000.0,640000.0,73246000.0,0.0,126090.0
2,12/31/2021,140800000.0,23658000.0,117142000.0,58498000.0,58644000.0,-180000.0,616000.0,59080000.0,11323000.0,...,-180000.0,59369000.0,23658000.0,6025000.0,47757000.0,640000.0,640000.0,64754000.0,0.0,122880.0
3,12/31/2020,126946000.0,20932000.0,106014000.0,51888000.0,54126000.0,-203000.0,-793000.0,53130000.0,10992000.0,...,-203000.0,53520000.0,20932000.0,5753000.0,42138000.0,-942000.0,-942000.0,60215000.0,0.0,-194994.0
4,12/31/2019,122021000.0,20088000.0,101933000.0,49450000.0,52483000.0,-436000.0,-3494000.0,48553000.0,9602000.0,...,-436000.0,48773000.0,20088000.0,5661000.0,38951000.0,-3357000.0,-3357000.0,57791000.0,0.0,-664686.0
5,12/31/2018,111831000.0,17617000.0,94214000.0,46966000.0,47248000.0,-106000.0,473000.0,47615000.0,8987000.0,...,-106000.0,47700000.0,17617000.0,3925000.0,38628000.0,461000.0,461000.0,51164000.0,0.0,87129.0


In [21]:
df_income.columns

Index(['Date', 'Total Revenue', 'Cost of Revenue', 'Gross Profit',
       'Operating Expense', 'Operating Income',
       'Net Non Operating Interest Income Expense', 'Other Income Expense',
       'Pretax Income', 'Tax Provision', 'Net Income Common Stockholders',
       'Diluted NI Available to Com Stockholders', 'Basic Average Shares',
       'Diluted Average Shares', 'Total Operating Income as Reported',
       'Total Expenses', 'Net Income from Continuing & Discontinued Operation',
       'Normalized Income', 'Interest Income', 'Interest Expense',
       'Net Interest Income', 'EBIT', 'Reconciled Cost of Revenue',
       'Reconciled Depreciation',
       'Net Income from Continuing Operation Net Minority Interest',
       'Total Unusual Items Excluding Goodwill', 'Total Unusual Items',
       'Normalized EBITDA', 'Tax Rate for Calcs',
       'Tax Effect of Unusual Items'],
      dtype='object')

In [22]:
BS_analysis = df# copy columns of dataframe
BS_analysis

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number
1,12/31/2021,194508000.0,123762000.0,70746000.0,80400000.0,70746000.0,4129000.0,27575000.0,-13921000.0,93262000.0,27575000.0,26645000.0,11796000.0,2341100.0,2310000.0,31100.0
2,12/31/2020,144922000.0,81597000.0,63325000.0,63325000.0,63325000.0,3672000.0,42668000.0,-4464000.0,70009000.0,42668000.0,10356000.0,,2388139.0,2350000.0,38139.0
3,12/31/2019,125612000.0,68019000.0,57593000.0,57593000.0,57593000.0,3824000.0,51758000.0,3473000.0,58252000.0,51758000.0,4483000.0,,2448843.0,2400000.0,48843.0
4,12/31/2018,110769000.0,58930000.0,51839000.0,51839000.0,51839000.0,,46694000.0,4903000.0,52354000.0,46694000.0,515000.0,,2506332.0,2450000.0,56332.0


In [23]:
current_assets = BS_analysis['Total Assets']
current_liabilities = BS_analysis['Total Liabilities Net Minority Interest']
working_capital = current_assets - current_liabilities
working_capital
total_sales = df_income['Total Revenue']
working_capital_per_dollar_of_sales = working_capital / total_sales
BS_analysis['Working Capital per Dollar of Sales'] = working_capital # copy columns of dataframe
BS_analysis

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number,Working Capital per Dollar of Sales
1,12/31/2021,194508000.0,123762000.0,70746000.0,80400000.0,70746000.0,4129000.0,27575000.0,-13921000.0,93262000.0,27575000.0,26645000.0,11796000.0,2341100.0,2310000.0,31100.0,70746000.0
2,12/31/2020,144922000.0,81597000.0,63325000.0,63325000.0,63325000.0,3672000.0,42668000.0,-4464000.0,70009000.0,42668000.0,10356000.0,,2388139.0,2350000.0,38139.0,63325000.0
3,12/31/2019,125612000.0,68019000.0,57593000.0,57593000.0,57593000.0,3824000.0,51758000.0,3473000.0,58252000.0,51758000.0,4483000.0,,2448843.0,2400000.0,48843.0,57593000.0
4,12/31/2018,110769000.0,58930000.0,51839000.0,51839000.0,51839000.0,,46694000.0,4903000.0,52354000.0,46694000.0,515000.0,,2506332.0,2450000.0,56332.0,51839000.0


In [24]:
current_ratio = current_assets / current_liabilities
BS_analysis['Current Ratio'] = current_ratio
BS_analysis

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number,Working Capital per Dollar of Sales,Current Ratio
1,12/31/2021,194508000.0,123762000.0,70746000.0,80400000.0,70746000.0,4129000.0,27575000.0,-13921000.0,93262000.0,27575000.0,26645000.0,11796000.0,2341100.0,2310000.0,31100.0,70746000.0,1.571629
2,12/31/2020,144922000.0,81597000.0,63325000.0,63325000.0,63325000.0,3672000.0,42668000.0,-4464000.0,70009000.0,42668000.0,10356000.0,,2388139.0,2350000.0,38139.0,63325000.0,1.77607
3,12/31/2019,125612000.0,68019000.0,57593000.0,57593000.0,57593000.0,3824000.0,51758000.0,3473000.0,58252000.0,51758000.0,4483000.0,,2448843.0,2400000.0,48843.0,57593000.0,1.846719
4,12/31/2018,110769000.0,58930000.0,51839000.0,51839000.0,51839000.0,,46694000.0,4903000.0,52354000.0,46694000.0,515000.0,,2506332.0,2450000.0,56332.0,51839000.0,1.879671


In [25]:
total_liabilities = df['Total Liabilities Net Minority Interest']
shareholders_equity = df['Common Stock Equity']
debt2equity_ratio = total_liabilities / shareholders_equity
BS_analysis['Debt to Equity Ratio'] = debt2equity_ratio
BS_analysis

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number,Working Capital per Dollar of Sales,Current Ratio,Debt to Equity Ratio
1,12/31/2021,194508000.0,123762000.0,70746000.0,80400000.0,70746000.0,4129000.0,27575000.0,-13921000.0,93262000.0,27575000.0,26645000.0,11796000.0,2341100.0,2310000.0,31100.0,70746000.0,1.571629,1.749385
2,12/31/2020,144922000.0,81597000.0,63325000.0,63325000.0,63325000.0,3672000.0,42668000.0,-4464000.0,70009000.0,42668000.0,10356000.0,,2388139.0,2350000.0,38139.0,63325000.0,1.77607,1.288543
3,12/31/2019,125612000.0,68019000.0,57593000.0,57593000.0,57593000.0,3824000.0,51758000.0,3473000.0,58252000.0,51758000.0,4483000.0,,2448843.0,2400000.0,48843.0,57593000.0,1.846719,1.181029
4,12/31/2018,110769000.0,58930000.0,51839000.0,51839000.0,51839000.0,,46694000.0,4903000.0,52354000.0,46694000.0,515000.0,,2506332.0,2450000.0,56332.0,51839000.0,1.879671,1.136789


In [26]:
net_credit_sales = df_income['Net Income from Continuing Operation Net Minority Interest']
average_net_receivables_for_the_period = df['Tangible Book Value']
receivable_turnover = net_credit_sales / average_net_receivables_for_the_period
BS_analysis['Receivable Turnover'] = receivable_turnover
BS_analysis

Unnamed: 0,Date,Total Assets,Total Liabilities Net Minority Interest,Total Equity Gross Minority Interest,Total Capitalization,Common Stock Equity,Capital Lease Obligations,Net Tangible Assets,Working Capital,Invested Capital,Tangible Book Value,Total Debt,Net Debt,Share Issued,Ordinary Shares Number,Treasury Shares Number,Working Capital per Dollar of Sales,Current Ratio,Debt to Equity Ratio,Receivable Turnover
1,12/31/2021,194508000.0,123762000.0,70746000.0,80400000.0,70746000.0,4129000.0,27575000.0,-13921000.0,93262000.0,27575000.0,26645000.0,11796000.0,2341100.0,2310000.0,31100.0,70746000.0,1.571629,1.749385,1.832783
2,12/31/2020,144922000.0,81597000.0,63325000.0,63325000.0,63325000.0,3672000.0,42668000.0,-4464000.0,70009000.0,42668000.0,10356000.0,,2388139.0,2350000.0,38139.0,63325000.0,1.77607,1.288543,1.11927
3,12/31/2019,125612000.0,68019000.0,57593000.0,57593000.0,57593000.0,3824000.0,51758000.0,3473000.0,58252000.0,51758000.0,4483000.0,,2448843.0,2400000.0,48843.0,57593000.0,1.846719,1.181029,0.814135
4,12/31/2018,110769000.0,58930000.0,51839000.0,51839000.0,51839000.0,,46694000.0,4903000.0,52354000.0,46694000.0,515000.0,,2506332.0,2450000.0,56332.0,51839000.0,1.879671,1.136789,0.834176
