In [2]:
import pandas as pd

# to downlaod stock prices and other information 
from yahoofinancials import YahooFinancials
import yfinance as yf

# to handle dates 
from datetime import date, datetime

import os

# to parallelise things 
import ray

# useful util to check the time on running a function 
from timebudget import timebudget

In [3]:
# get tickers 
os.chdir("/Users/safishajjouz/Google Drive")
df_tickers = pd.read_csv('Portfolio-09-03-2021.csv')
df_tickers = df_tickers[['Symbol', 'Name', 'Portfolio', 'Currency']]

df_tickers = df_tickers[~df_tickers['Symbol'].isin(['^FTSE', '^FTAS', '^FTMC', 'DJI'])]
df_tickers = df_tickers[df_tickers['Currency']=='USD']
df_tickers = df_tickers.drop_duplicates('Symbol')
tickers = df_tickers['Symbol'].to_list()

start_date = '1990-01-01'
date_end = date.today()
date_end = date_end.strftime("%Y-%m-%d")
time_interval = ['daily', 'weekly','monthly'][0]

In [4]:
@ray.remote
def get_stock_data(ticker, start_date, end_date, time_interval):
    yahoo_financials = YahooFinancials(ticker)
    data = yahoo_financials.get_historical_price_data(start_date= start_date, 
                                                      end_date=end_date, 
                                                      time_interval=time_interval)
    # to get short name and type 
    temp = yahoo_financials.get_stock_quote_type_data()
    
    df_temp = pd.DataFrame(data[ticker]['prices'])
    
    # fix faulty yahoo data that jumps 100x
    jumps_up   = df_temp['adjclose'] / df_temp['adjclose'].shift() >  50
    jumps_down = df_temp['adjclose'] / df_temp['adjclose'].shift() < .02
    correction_factor = 100.**(jumps_down.cumsum() - jumps_up.cumsum())
    df_temp['adjclose'] *= correction_factor
    
    df_temp['Stock'] = temp[ticker]['shortName']
    df_temp['Type'] = temp[ticker]['quoteType']
    
    # use yahoo finance to get other information 
    tik_info = yf.Ticker(ticker)
    info = tik_info.info
    if "sector" in info:
        df_temp['Sector'] = info["sector"]
    else: 
        df_temp['Sector'] = 'unclassified'
            
    return df_temp


ray.init(ignore_reinit_error=True, num_cpus=6)
@timebudget
def download_my_stocks(operation, input):
    mydata = ray.get([operation.remote(ticker,start_date,date_end, time_interval) for ticker in input]) 
    return mydata

mydata=download_my_stocks(get_stock_data, tickers)
ray.shutdown()

df = pd.concat(mydata)
df = df.rename(columns={'formatted_date':'Date'})
df = df.drop('date', axis = 1)
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df = df.set_index('Date')
df.head()



download_my_stocks took 83.023sec


Unnamed: 0_level_0,high,low,open,close,volume,adjclose,Stock,Type,Sector
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-06-29,5.0,3.508,3.8,4.778,93831500,4.778,"Tesla, Inc.",EQUITY,Consumer Cyclical
2010-06-30,6.084,4.66,5.158,4.766,85935500,4.766,"Tesla, Inc.",EQUITY,Consumer Cyclical
2010-07-01,5.184,4.054,5.0,4.392,41094000,4.392,"Tesla, Inc.",EQUITY,Consumer Cyclical
2010-07-02,4.62,3.742,4.6,3.84,25699000,3.84,"Tesla, Inc.",EQUITY,Consumer Cyclical
2010-07-06,4.0,3.166,4.0,3.222,34334500,3.222,"Tesla, Inc.",EQUITY,Consumer Cyclical


In [5]:
set(df.Stock)

{'ASML Holding N.V. - New York Re',
 'Agilent Technologies, Inc.',
 'Alibaba Group Holding Limited',
 'Alphabet Inc.',
 'Amazon.com, Inc.',
 'American Tower Corporation (REI',
 'Apple Inc.',
 'Astrazeneca PLC',
 'Autodesk, Inc.',
 'Baidu, Inc.',
 'Berkshire Hathaway Inc. New',
 'CBOE Volatility Index',
 'Caribou Biosciences, Inc.',
 'Costco Wholesale Corporation',
 'Coupa Software Incorporated',
 'DexCom, Inc.',
 'Diageo plc',
 'Dow Jones Industrial Average',
 'Enphase Energy, Inc.',
 'Estee Lauder Companies, Inc. (T',
 'Facebook, Inc.',
 'Fiverr International Ltd.',
 'IDEXX Laboratories, Inc.',
 'Illumina, Inc.',
 'Invesco NASDAQ 100 ETF',
 'JP Morgan Chase & Co.',
 'Johnson & Johnson',
 'Lemonade, Inc.',
 'Linde plc',
 'MercadoLibre, Inc.',
 'Microsoft Corporation',
 'Moderna, Inc.',
 "Moody's Corporation",
 'NASDAQ Composite',
 'NIO Inc.',
 'NVIDIA Corporation',
 'Nike, Inc.',
 'Novo Nordisk A/S',
 'Pfizer, Inc.',
 'Pinduoduo Inc.',
 'Procter & Gamble Company (The)',
 'Redfin Corpor

In [None]:
@timebudget
def download_yahoo_data(tickers,start_date, end_date, time_interval, dataframe_format = 'long'):
    
    yahoo_financials = YahooFinancials(tickers)
    data = yahoo_financials.get_historical_price_data(start_date=start_date, 
                                                     end_date=end_date, 
                                                      time_interval=time_interval)
    temp = yahoo_financials.get_stock_quote_type_data()
    
    data_list = []
    for tik in tickers:
        prices = pd.DataFrame.from_dict(data[tik]['prices'])
        prices['Symbol'] = tik
        prices['Stock'] = temp[tik]['shortName']
        prices['Type'] = temp[tik]['quoteType']
        
        tik_info = yf.Ticker(tik)
        info = tik_info.info
        
        if "sector" in info:
            prices['Sector'] = info["sector"]
        else: 
            prices['Sector'] = 'unclassified'
            
        # fix faulty yahoo data that jumps 100x
        jumps_up   = prices['adjclose'] / prices['adjclose'].shift() >  50
        jumps_down = prices['adjclose'] / prices['adjclose'].shift() < .02
        correction_factor = 100.**(jumps_down.cumsum() - jumps_up.cumsum())
        prices['adjclose'] *= correction_factor
        #print(f"Fixed {sum(correction_factor != 1)}/{len(data)} for ticker {tik}"
        #      f" (min: {prices['adjclose'].min()}, max: {prices['adjclose'].max()})")
        data_list.append(prices)
    
    # collect 
    df = pd.concat(data_list)
    df = df.rename(columns={'formatted_date':'Date'})
    df = df.drop('date', axis = 1)
    df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
    df = df.set_index('Date')
    
    if dataframe_format == 'wide':
        df = df.pivot_table(index=["Date"], 
                    columns='Stock', 
                    values='adjclose')

    
    return df

In [None]:
# # Download Data
tickers = df_tickers['Symbol'].to_list() #['^GSPC'] # VIX


df = download_yahoo_data(tickers= tickers,
                   start_date = start_date, 
                   end_date = date_end, 
                   time_interval = time_interval,
                   dataframe_format = 'long') 
df.head()