In [433]:
import pandas as pd
import requests

from datetime import datetime
import time

from scrapy import Selector

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [200]:
url = 'https://en.wikipedia.org/wiki/Nasdaq-100'

html = requests.get(url).text
sel = Selector(text = html)

nasdaq100_df = pd.DataFrame()

for n in range(2,104):
    ticker = sel.xpath('//table[@id = "constituents"]/tbody/tr[{}]/td[2]'.format(n)).extract()[0].strip('</td>')
    company_name = sel.xpath('//table[@id = "constituents"]/tbody/tr[{}]/td[1]/a/@title'.format(n)).get()
    industry = sel.xpath('//table[@id = "constituents"]/tbody/tr[{}]/td[3]'.format(n)).get().strip('</td>')
    sub_industry = sel.xpath('//table[@id = "constituents"]/tbody/tr[{}]/td[3]'.format(n)).get().strip('</td>')
    new_list = [ticker, company_name, industry, sub_industry]
    pd_series_row = pd.Series(new_list, index = ['ticker', 'company_name', 'industry', 'sub_industry'])
    nasdaq100_df = nasdaq100_df.append(pd_series_row, ignore_index=True)
    print(ticker, end = ' ')

nasdaq100_df.head(4)

ATVI ADBE AMD ALGN GOOGL GOOG AMZN AEP AMGN ADI ANSS AAPL AMAT ASML TEAM ADSK ADP BIDU BIIB BKNG AVGO CDNS CDW CERN CHTR CHKP CTAS CSCO CTSH CMCSA CPRT COST CRWD CSX DXCM DOCU DLTR EBAY EA EXC FAST FISV FOXA FOX GILD HON IDXX ILMN INCY INTC INTU ISRG JD KDP KLAC KHC LRCX LULU MAR MRVL MTCH MELI FB MCHP MU MSFT MRNA MDLZ MNST NTES NFLX NVDA NXPI ORLY OKTA PCAR PAYX PYPL PTON PEP PDD QCOM REGN ROST SGEN SIRI SWKS SPLK SBUX SNPS TMUS TSLA TXN TCOM VRSN VRSK VRTX WBA WDAY XEL XLNX ZM 

Unnamed: 0,company_name,industry,sub_industry,ticker
0,Activision Blizzard,Communication Services,Communication Services,ATVI
1,Adobe Inc.,Information Technology,Information Technology,ADBE
2,Advanced Micro Devices,Information Technology,Information Technology,AMD
3,Align Technology,Health Care,Health Care,ALGN


In [438]:
def unix_to_date(unix_timestamp):
    ts = int(unix_timestamp)
    return datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d')

In [439]:
with open('/Users/ilya/Desktop/keys/yahoo_finance.txt','r') as file:
    key = file.readlines()

KEY = key[0]

In [440]:
def get_income_statement(ticker, period):
    financials_url = "https://yh-finance.p.rapidapi.com/stock/v2/get-financials"

    querystring = {"symbol":ticker,"region":"US"}

    headers = {
        'x-rapidapi-host': "yh-finance.p.rapidapi.com",
        'x-rapidapi-key': KEY
        }

    financials_response = requests.request("GET", financials_url, headers=headers, params=querystring).json()

    ##these are quarterly earnings specifically
    income_statment_df = pd.DataFrame()

    for quarter in financials_response['incomeStatementHistoryQuarterly']['incomeStatementHistory']:
        revenue_line = pd.Series([quarter['endDate']['fmt'],
                                  round(quarter['totalRevenue']['raw']/1000000000,3),
                                  round(quarter['grossProfit']['raw']/1000000000,3),
                                  round(quarter['netIncome']['raw']/1000000000,3),
                                  ],
        index = ['quarter','revenue_billion','gross_profit_billion','net_income_billion'])
        income_statment_df = income_statment_df.append(revenue_line, ignore_index=True)

    income_statment_df['quarter'] = [pd.Timestamp(quarter) for quarter in income_statment_df['quarter']]
    income_statment_df['quarter'] = ['4Q2020' if quarter <= pd.Timestamp('2021-01-01T12') else
                                     '1Q2021' if quarter > pd.Timestamp('2021-01-02T12') and quarter < pd.Timestamp('2021-04-01T12') else
                                     '2Q2021' if quarter > pd.Timestamp('2021-04-02T12') and quarter < pd.Timestamp('2021-07-01T12') else
                                     '3Q2021' if quarter > pd.Timestamp('2021-07-02T12') and quarter < pd.Timestamp('2021-10-02T12') else
                                     '4Q2021' for quarter in income_statment_df['quarter']]

    income_statment_df.loc[:,'gross_profit_margin'] = income_statment_df['gross_profit_billion']/income_statment_df['revenue_billion']
    income_statment_df.loc[:,'net_profit_margin'] = income_statment_df['net_income_billion']/income_statment_df['revenue_billion']

    income_statment_df['ticker'] = ticker

    return income_statment_df.head(period)

In [411]:
def get_la_ratio(symbol, quarters):
  balance_sheet_url = "https://yh-finance.p.rapidapi.com/stock/v2/get-balance-sheet"

  querystring = {"symbol":symbol,"region":"US"}

  headers = {
            'x-rapidapi-host': "yh-finance.p.rapidapi.com",
            'x-rapidapi-key': KEY
            }

  response = requests.request("GET", balance_sheet_url, headers=headers, params=querystring)

  balance_sheet_response = response.json()

  balance_df = pd.DataFrame()

  for item in balance_sheet_response['balanceSheetHistoryQuarterly']['balanceSheetStatements']:
    balance_line = pd.Series([item['endDate']['fmt'],
                              item['totalCurrentLiabilities']['raw'],
                              item['totalCurrentAssets']['raw']],
                              index = ['quarter','liabilities','assets'])

    balance_df = balance_df.append(balance_line, ignore_index = True)

  balance_df.loc[:,'la_ratio'] = round(balance_df['liabilities']/balance_df['assets'],2)
  balance_df.loc[:,'assets_billion'] = round(balance_df['assets']/1000000000,2)
  balance_df.loc[:,'liabilities_billion'] = round(balance_df['liabilities']/1000000000,2)
  balance_df = balance_df.drop(columns = ['assets','liabilities'])
  balance_df['ticker'] = symbol

  return balance_df.head(quarters)

In [412]:
def get_pe_ratio(symbol, quarters):
    stats_url = "https://yh-finance.p.rapidapi.com/stock/v2/get-statistics"

    querystring = {"symbol":symbol,"region":"US"}

    headers = {
              'x-rapidapi-host': "yh-finance.p.rapidapi.com",
              'x-rapidapi-key': KEY
              }

    stats_response = requests.request("GET", stats_url, headers=headers, params=querystring).json()

    ratio_df = pd.DataFrame()

    if stats_response['timeSeries']['quarterlyPeRatio'] != []:
        for quarter in stats_response['timeSeries']['quarterlyPeRatio']:
            if quarter is not None:
                pd_row = pd.Series([quarter['asOfDate'], quarter['reportedValue']['fmt']],
                index = ['quarter','pe_ratio'])
                ratio_df = ratio_df.append(pd_row, ignore_index=True)

    if ratio_df.empty == True:
        ratio_df['ticker'] = [symbol]
        ratio_df['pe_ratio'] = [None]
        ratio_df['quarter'] = [datetime.now().date()]

    else:
        pass

    ratio_df['ticker'] = symbol

    return ratio_df.sort_values(by = 'quarter', ascending = False).head(quarters)

In [413]:
def list_perfomance(symbol):
    weekly_history_df = get_weekly_history_df(symbol)

    performance_list = [symbol,\
                        round((weekly_history_df.iloc[0]['close'] - weekly_history_df.iloc[-1]['close'])/weekly_history_df.loc[0]['close'],3),\
                        round(weekly_history_df['close'].std(),2),
                        round(weekly_history_df['close'].mean(),2),
                        round(weekly_history_df['close'].std()/weekly_history_df['close'].mean(),2),
                        round(weekly_history_df.iloc[0]['close'],1)
                        ]

    return performance_list

In [428]:
counter = 0

attributes = ['ticker','yoy_growth','std','mean_price','volatility','last_close_price']
comparison_df = pd.DataFrame(columns = attributes)

print('Geting year stock prices...')
for ticker_name in list(nasdaq100_df['ticker'][0:25]):
    series_row = pd.Series(list_perfomance(ticker_name), index = attributes)
    comparison_df = comparison_df.append(series_row, ignore_index=True)
    print(ticker_name, end = ' ')

print('Done!')

la_ratio_main_df = pd.DataFrame()

print('Geting liabilities to assets ratio...')
for ticker_name in list(nasdaq100_df['ticker'][0:25]):
    mini_df = get_la_ratio(ticker_name,1)
    la_ratio_main_df = la_ratio_main_df.append(mini_df)
    print(ticker_name, end = ' ')

print('Done!')

income_statement_main_df = pd.DataFrame()

print('Geting profitability ratio...')
for ticker_name in list(nasdaq100_df['ticker'][0:25]):
    mini_df = get_income_statement(ticker_name,1)
    income_statement_main_df = income_statement_main_df.append(mini_df)
    print(ticker_name, end = ' ')

print('Done!')

pe_ratio_main_df = pd.DataFrame()

print('Geting price per earning ratio...')
for ticker_name in list(nasdaq100_df['ticker'][0:25]):
    mini_df = get_pe_ratio(ticker_name,1)
    pe_ratio_main_df = pe_ratio_main_df.append(mini_df)
    print(ticker_name, end = ' ')
    tim

print('Done!')

Geting year stock prices...
ATVI ADBE AMD ALGN GOOGL GOOG AMZN AEP AMGN ADI ANSS AAPL AMAT ASML TEAM ADSK ADP BIDU BIIB BKNG AVGO CDNS CDW CERN CHTR Done!
Geting liabilities to assets ratio...
ATVI ADBE AMD ALGN GOOGL GOOG AMZN AEP AMGN ADI ANSS AAPL AMAT ASML TEAM ADSK ADP BIDU BIIB BKNG AVGO CDNS CDW CERN CHTR Done!
Geting profitability ratio...
ATVI ADBE AMD ALGN GOOGL GOOG AMZN AEP AMGN ADI ANSS AAPL AMAT ASML TEAM ADSK ADP BIDU BIIB BKNG AVGO CDNS CDW CERN CHTR Done!
Geting price per earning ratio...
ATVI ADBE AMD ALGN GOOGL GOOG AMZN 

JSONDecodeError: Extra data: line 1 column 5 - line 1 column 19 (char 4 - 18)

In [430]:
pe_ratio_main_df = pd.DataFrame()

print('Geting price per earning ratio...')
for ticker_name in list(nasdaq100_df['ticker'][0:25]):
    mini_df = get_pe_ratio(ticker_name,1)
    pe_ratio_main_df = pe_ratio_main_df.append(mini_df)
    print(ticker_name, end = ' ')

print('Done!')

Geting price per earning ratio...
ATVI ADBE AMD ALGN GOOGL GOOG AMZN AEP AMGN ADI ANSS AAPL AMAT ASML TEAM ADSK ADP BIDU BIIB BKNG AVGO CDNS CDW CERN CHTR Done!


In [431]:
final_df = comparison_df.merge(la_ratio_main_df, left_on = ['ticker'], right_on = ['ticker'])\
                        .merge(income_statement_main_df, left_on = ['ticker'], right_on = ['ticker'])\
                        .merge(pe_ratio_main_df, left_on = ['ticker'], right_on = ['ticker'])

final_df = final_df.merge(nasdaq100_df, left_on = ['ticker'], right_on = ['ticker'])

final_df = final_df[['ticker','company_name','industry','sub_industry','last_close_price','yoy_growth','volatility',
                    'assets_billion','liabilities_billion','la_ratio','gross_profit_margin','net_profit_margin',
                    'pe_ratio']]

In [432]:
final_df

Unnamed: 0,ticker,company_name,industry,sub_industry,last_close_price,yoy_growth,volatility,assets_billion,liabilities_billion,la_ratio,gross_profit_margin,net_profit_margin,pe_ratio
0,ATVI,Activision Blizzard,Communication Services,Communication Services,60.6,-0.289,0.11,11.21,2.02,0.18,0.745,0.309,23.24
1,ADBE,Adobe Inc.,Information Technology,Information Technology,662.1,0.28,0.14,8.62,6.19,0.72,0.881,0.308,57.56
2,AMD,Advanced Micro Devices,Information Technology,Information Technology,154.8,0.437,0.19,7.99,3.56,0.45,0.484,0.214,36.49
3,ALGN,Align Technology,Health Care,Health Care,634.2,0.25,0.11,2.46,1.79,0.73,0.743,0.178,76.22
4,GOOGL,Alphabet Inc.,Communication Services,Communication Services,2843.7,0.372,0.17,184.11,61.78,0.34,0.576,0.291,29.01
5,GOOG,Alphabet Inc.,Communication Services,Communication Services,2856.1,0.372,0.17,184.11,61.78,0.34,0.576,0.291,28.92
6,AMZN,Amazon (company),Consumer Discretionary,Consumer Discretionary,3504.6,0.088,0.05,138.53,123.99,0.9,0.432,0.028,57.2
7,AEP,American Electric Power,Utilities,Utilities,82.6,-0.028,0.04,5.79,9.95,1.72,0.469,0.172,17.31
8,AMGN,Amgen,Health Care,Health Care,201.1,-0.118,0.06,24.38,14.84,0.61,0.76,0.281,21.61
9,ADI,Analog Devices,Information Technology,Information Technology,178.0,0.226,0.07,5.38,2.77,0.52,0.479,0.032,37.96
