## Libraries 

In [15]:
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook as tqdm
from time import sleep
import pandas as pd
import yfinance as yf
import datetime


In [16]:
# Tickers of top 60 companies according to market capitalization
tickers = ['AAPL','MSFT','NVDA','TSM','ASML','ADBE','CRM','ORCL','CSCO','ACN','INTC','AVGO','SHOP','SAP','TXN','INTU','QCOM','SONY',
          'SQ','AMAT','IBM','SNOW','TEAM','INFY','ADI','UBER','DELL','LRCX','MU','WDAY','ADSK','DOCU','NXPI','FTNT','KLAC','PLTR','SNPS',
          'DDOG','PANW','WIT','TEL','CDNS','APH','XLNX','STM','ZS','U','DIDI','MSI','MCHP','CTSH','TTD','ERIC','OKTA','HUBS','EPAM','APP','HPQ','MDB','NOK'
          ]

In [17]:
# the keys refer to stock metrics that we want to obtain
keys = ['shortName','currentPrice','marketCap','sector','industry','profitMargins','grossMargins','revenueGrowth','grossProfits','returnOnAssets','debtToEquity','returnOnEquity',
       'totalDebt','totalCash','totalRevenue','exchange','market','bookValue','priceToBook']

In [18]:
# Creation of the dataframe that we will store our metrics
df_stocks = pd.DataFrame(columns =['ticker'] + keys)

In [19]:
# we will iterate through every ticker in our list in order to find our metrics. We will utilize the ticker object that returns all the metrics of a specific stock
for ticker in tqdm(tickers):
    
    try:
        #creating the object
        stock = yf.Ticker(ticker.lower())
        stats = [stock.info[key] for key in keys]
        print('{} stats extracted'.format(ticker))
        df_stocks.loc[len(df_stocks)] = [ticker] + stats
        print('{} info extracted'.format(ticker))
    except:
        print('{} raised an exception'.format(ticker))
        continue

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


  0%|          | 0/60 [00:00<?, ?it/s]

AAPL stats extracted
AAPL info extracted
MSFT stats extracted
MSFT info extracted
NVDA stats extracted
NVDA info extracted
TSM stats extracted
TSM info extracted
ASML stats extracted
ASML info extracted
ADBE stats extracted
ADBE info extracted
CRM stats extracted
CRM info extracted
ORCL stats extracted
ORCL info extracted
CSCO stats extracted
CSCO info extracted
ACN stats extracted
ACN info extracted
INTC stats extracted
INTC info extracted
AVGO stats extracted
AVGO info extracted
SHOP stats extracted
SHOP info extracted
SAP stats extracted
SAP info extracted
TXN stats extracted
TXN info extracted
INTU stats extracted
INTU info extracted
QCOM stats extracted
QCOM info extracted
SONY stats extracted
SONY info extracted
SQ stats extracted
SQ info extracted
AMAT stats extracted
AMAT info extracted
IBM stats extracted
IBM info extracted
SNOW stats extracted
SNOW info extracted
TEAM stats extracted
TEAM info extracted
INFY stats extracted
INFY info extracted
ADI stats extracted
ADI info ext

In [20]:
df_stocks

Unnamed: 0,ticker,shortName,currentPrice,marketCap,sector,industry,profitMargins,grossMargins,revenueGrowth,grossProfits,returnOnAssets,debtToEquity,returnOnEquity,totalDebt,totalCash,totalRevenue,exchange,market,bookValue,priceToBook
0,AAPL,Apple Inc.,149.8,2476224020480.0,Technology,Consumer Electronics,0.25882,0.41779,0.288,152836000000,0.20179,216.392,1.47443,136521998336.0,62639001600.0,365817004032.0,NMS,us_market,3.841,39.00026
1,MSFT,Microsoft Corporation,331.62,2489796263936.0,Technology,Software—Infrastructure,0.38515,0.68865,0.22,115856000000,0.14589,51.938,0.49303,78934999040.0,130584002560.0,176250994688.0,NMS,us_market,20.242,16.382769
2,NVDA,NVIDIA Corporation,255.67,637129654272.0,Technology,Semiconductors,0.32324,0.63758,0.683,10557000000,0.1436,60.486,0.40375,12791000064.0,19654000640.0,21897000960.0,NMS,us_market,8.472,30.178234
3,TSM,Taiwan Semiconductor Manufactur,113.7,589648166912.0,Technology,Semiconductors,0.37933,0.51897,0.163,711146502000,0.13083,27.839,0.29666,578576973824.0,975818981376.0,1510758809600.0,NYQ,us_market,400.305,0.284033
4,ASML,ASML Holding N.V. - New York Re,812.88,338236932096.0,Technology,Semiconductor Equipment & Materials,0.3054,0.52124,0.324,6797200000,0.14755,35.695,0.43259,4105799936.0,4455699968.0,17879599104.0,NMS,us_market,28.199,28.826555
5,ADBE,Adobe Inc.,650.36,309441265664.0,Technology,Software—Infrastructure,0.38671,0.88171,0.22,11146000000,0.142,32.503,0.44697,4685000192.0,6163999744.0,15098999808.0,NMS,us_market,30.282,21.476784
6,CRM,Salesforce.com Inc,299.69,293396512768.0,Technology,Software—Application,0.09992,0.74336,0.231,15814000000,0.00988,28.411,0.05006,15774000128.0,9649999872.0,23538999296.0,NYQ,us_market,56.769,5.279114
7,ORCL,Oracle Corporation,95.94,262270238720.0,Technology,Software—Infrastructure,0.34163,0.8022,0.039,32624000000,0.08318,,3.097,82717999104.0,39310000128.0,40839999488.0,NYQ,us_market,-0.562,
8,CSCO,"Cisco Systems, Inc.",55.97,236060753920.0,Technology,Communication Equipment,0.21259,0.64021,0.08,31894000000,0.08945,30.755,0.26747,12693999616.0,24526999552.0,49818001408.0,NMS,us_market,9.788,5.718226
9,ACN,Accenture plc,356.32,226493743104.0,Technology,Information Technology Services,0.12051,0.31989,0.207,13976158000,0.11673,17.331,0.3221,3447866112.0,10013812736.0,47949377536.0,NYQ,us_market,28.802,12.371364


In [21]:
# we are difining the timeframe of our research
start = datetime.datetime(2011,1,1)
end = datetime.datetime(2021,1,1)

In [22]:

# create empty dataframe
df_history = pd.DataFrame()
# iterate over each ticker
for ticker in tqdm(tickers):  
    # print the symbol which is being downloaded
    print( str(tickers.index(ticker)) + str(' : ') + ticker, sep=',', end=',', flush=True)  
    
    try:
        # download the stock price 
        stock = []
        stock = yf.download(ticker,start=start, end=end, progress=False)
        
        # append the individual stock prices 
        if len(stock) == 0:
            None
        else:
            stock['Name']=ticker
            df_history = df_history.append(stock,sort=False)
    except Exception:
        None

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


  0%|          | 0/60 [00:00<?, ?it/s]

0 : AAPL,1 : MSFT,2 : NVDA,3 : TSM,4 : ASML,5 : ADBE,6 : CRM,7 : ORCL,8 : CSCO,9 : ACN,10 : INTC,11 : AVGO,12 : SHOP,13 : SAP,14 : TXN,15 : INTU,16 : QCOM,17 : SONY,18 : SQ,19 : AMAT,20 : IBM,21 : SNOW,22 : TEAM,23 : INFY,24 : ADI,25 : UBER,26 : DELL,27 : LRCX,28 : MU,29 : WDAY,30 : ADSK,31 : DOCU,32 : NXPI,33 : FTNT,34 : KLAC,35 : PLTR,36 : SNPS,37 : DDOG,38 : PANW,39 : WIT,40 : TEL,41 : CDNS,42 : APH,43 : XLNX,44 : STM,45 : ZS,46 : U,47 : DIDI,
1 Failed download:
- DIDI: Data doesn't exist for startDate = 1293832800, endDate = 1609452000
48 : MSI,49 : MCHP,50 : CTSH,51 : TTD,52 : ERIC,53 : OKTA,54 : HUBS,55 : EPAM,56 : APP,
1 Failed download:
- APP: Data doesn't exist for startDate = 1293832800, endDate = 1609452000
57 : HPQ,58 : MDB,59 : NOK,

# NASDAQ 

In [40]:
#collecting the historical data of nasdaq index
df_nasdaq = pd.DataFrame()
    
try:
        # download the stock price 
    nasdaq = []
    nasdaq = yf.download('^NDX',start=start, end=end, progress=False)
    df_nasdaq = df_nasdaq.append(nasdaq,sort=False)
        
except Exception:
    None

In [41]:
df_nasdaq

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-12-31,2223.280029,2225.000000,2209.379883,2217.860107,2217.860107,1026600000
2011-01-03,2238.659912,2268.300049,2237.790039,2254.229980,2254.229980,1919660000
2011-01-04,2261.649902,2263.030029,2237.729980,2251.459961,2251.459961,2015440000
2011-01-05,2245.399902,2271.080078,2243.879883,2270.399902,2270.399902,2060750000
2011-01-06,2272.330078,2279.449951,2268.590088,2277.510010,2277.510010,2095490000
...,...,...,...,...,...,...
2020-12-24,12668.200195,12732.549805,12665.799805,12711.009766,12711.009766,3305950000
2020-12-28,12813.969727,12861.910156,12747.049805,12838.860352,12838.860352,5076340000
2020-12-29,12909.860352,12925.530273,12816.589844,12843.490234,12843.490234,4680780000
2020-12-30,12900.030273,12917.450195,12828.790039,12845.360352,12845.360352,5292210000


# S&P 500

In [44]:
#collecting the historical data of s&p 500 index
df_sp500 = pd.DataFrame()
    
try:
        # download the stock price 
    sp500 = []
    sp500 = yf.download('^GSPC',start=start, end=end, progress=False)
    df_sp500 = df_sp500.append(sp500,sort=False)
        
except Exception:
    None

In [45]:
df_sp500

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-12-31,1256.760010,1259.339966,1254.189941,1257.640015,1257.640015,1799770000
2011-01-03,1257.619995,1276.170044,1257.619995,1271.869995,1271.869995,4286670000
2011-01-04,1272.949951,1274.119995,1262.660034,1270.199951,1270.199951,4796420000
2011-01-05,1268.780029,1277.630005,1265.359985,1276.560059,1276.560059,4764920000
2011-01-06,1276.290039,1278.170044,1270.430054,1273.849976,1273.849976,4844100000
...,...,...,...,...,...,...
2020-12-24,3694.030029,3703.820068,3689.320068,3703.060059,3703.060059,1885090000
2020-12-28,3723.030029,3740.510010,3723.030029,3735.360107,3735.360107,3527460000
2020-12-29,3750.010010,3756.120117,3723.310059,3727.040039,3727.040039,3387030000
2020-12-30,3736.189941,3744.629883,3730.209961,3732.040039,3732.040039,3145200000


## Exporting the csv files

In [32]:
df_stocks.to_csv('stock_data')

In [36]:
df_history.to_csv('stock_history_data')

In [42]:
df_nasdaq.to_csv('nasdaq100_data')

In [38]:
df_sp500.to_csv('sp500_data')