In [1]:
import pandas_datareader.data as web
import datetime
import numpy as np
import pandas as pd
import statsmodels.formula.api as sm
%matplotlib inline

Class to handle the downloading of the OHLC data from Yahoo.com. Problem is sometimes the downloads fail for whatever reason (sucky internet connection in China???). So want to build a class that will handle the downloads for each symbol. If the download fails then keep retrying for a preset number of times.  At the end quit downloading if data was successfully retrieved or provide a report for what symbol was not able to be retrieved.

In [2]:
class ohlc_downloader(object):
    def __init__(self, ohlc_symbols, ohlc_end_date, ohlc_start_date, download_tries=5):
        
        self.symbols_download_status_dict = self.build_download_status_dict(symbols=ohlc_symbols, 
                                                                       download_tries=download_tries)
        self.ohlc_end_date = ohlc_end_date
        self.ohlc_start_date = ohlc_start_date
        self.download_tries = download_tries

    def build_download_status_dict(self, symbols, download_tries):
        symbols_download_status = {}
        for symbol in symbols:
            symbols_download_status[symbol] = {"downloaded": False, 
                                                "tries": download_tries}
            
        return symbols_download_status
        
    def continue_downloading(self):
        downloading_status = False
        
        for symbol in self.symbols_download_status_dict.keys():
            if self.symbols_download_status_dict[symbol]['downloaded'] is False:
                downloading_status = True
                break
                
        return downloading_status
    
    def download(self, ohlc_symbol):
        ohlc_data = None
        
#         print("self.symbols_download_status_dict[ohlc_symbol]['downloaded'] == {}".format(self.symbols_download_status_dict[ohlc_symbol]['downloaded']))
#         print("self.symbols_download_status_dict[ohlc_symbol]['tries'] == {}".format(self.symbols_download_status_dict[ohlc_symbol]['tries']))
        
        if self.symbols_download_status_dict[ohlc_symbol]['downloaded'] is False:
            if self.symbols_download_status_dict[ohlc_symbol]['tries'] > 0:
                print("Attempting to download {}".format(symbol))
                try:
                    ohlc_data = web.DataReader(name=ohlc_symbol, data_source='yahoo', start=self.ohlc_start_date, end=self.ohlc_end_date)
#                     if isinstance(ohlc_data, pd.DataFrame):
#                         print ("*** downloaded a df ***")
                    self.symbols_download_status_dict[ohlc_symbol]['downloaded'] = True
                    print("{} successfully downloaded\n".format(ohlc_symbol))
                except:
                    self.symbols_download_status_dict[ohlc_symbol]['tries'] -= 1
                    print("Problem downloading OHLC data for {} from yahoo.com".format(ohlc_symbol))
                    
                    if self.symbols_download_status_dict[ohlc_symbol]['tries'] > 0:
                        print("Don't worry. Will attempt to download {} more times\n".format(self.symbols_download_status_dict[ohlc_symbol]['tries']))
                    else:
                        self.symbols_download_status_dict[ohlc_symbol]['downloaded'] = True
                        print("Have failed to download {} after {} attempts.\n".format(ohlc_symbol, self.download_tries))
                    return ohlc_data
        
        return ohlc_data
            

In [3]:
symbols = ['USO', 'NFLX', 'EBAY', 'GOOG', 'TWTR', 'TSLA', 'CAT', 'MS', 'FB', 'IBM', 'AMZN', 'GM', 'UAL', 'X', 'BA', 'CMG', 'HAL', 'UAL', 'QCOM', 'MCD', 'BIDU', 'GILD', 'FSLR', 'MA']
# symbols = symbols[:1]
ohlc_data = {}
returns_df = pd.DataFrame()

my_ohlc_downloader = ohlc_downloader(ohlc_symbols=symbols,
                                     ohlc_end_date=datetime.datetime.today(),
                                     ohlc_start_date=(datetime.datetime.today()-datetime.timedelta(days=365)), 
                                     download_tries=5)
# download data for each symbol:
while my_ohlc_downloader.continue_downloading():
    for symbol in symbols:
        ohlc_data_status = my_ohlc_downloader.download(symbol)
        if isinstance(ohlc_data_status, pd.DataFrame):
            ohlc_data[symbol] = ohlc_data_status

# calculate log returns for each symbol:      
for symbol in symbols:
    if symbol in ohlc_data.keys():
        returns_df[symbol] = np.log(ohlc_data[symbol]['Adj Close'] / ohlc_data[symbol]['Adj Close'].shift(1))[1:]

# calculate corrolation:
returns_df.corr()

Attempting to download USO
USO successfully downloaded

Attempting to download NFLX
NFLX successfully downloaded

Attempting to download EBAY
EBAY successfully downloaded

Attempting to download GOOG
GOOG successfully downloaded

Attempting to download TWTR
Problem downloading OHLC data for TWTR from yahoo.com
Don't worry. Will attempt to download 4 more times

Attempting to download TSLA
TSLA successfully downloaded

Attempting to download CAT
CAT successfully downloaded

Attempting to download MS
MS successfully downloaded

Attempting to download FB
FB successfully downloaded

Attempting to download IBM
Problem downloading OHLC data for IBM from yahoo.com
Don't worry. Will attempt to download 4 more times

Attempting to download AMZN
Problem downloading OHLC data for AMZN from yahoo.com
Don't worry. Will attempt to download 4 more times

Attempting to download GM
GM successfully downloaded

Attempting to download UAL
UAL successfully downloaded

Attempting to download X
X successfull

Unnamed: 0,USO,NFLX,EBAY,GOOG,TWTR,TSLA,CAT,MS,FB,IBM,...,X,BA,CMG,HAL,QCOM,MCD,BIDU,GILD,FSLR,MA
USO,1.0,0.181233,0.205769,0.186533,0.043201,0.125324,0.346544,0.206383,0.20414,0.110379,...,0.219838,0.229266,0.173465,0.555437,0.14728,0.191828,0.146794,0.06429,0.181702,0.167596
NFLX,0.181233,1.0,0.277036,0.570716,0.295822,0.321009,0.334549,0.350973,0.501826,0.290317,...,0.272852,0.258607,0.136719,0.210465,0.336766,0.21672,0.346557,0.303768,0.26955,0.524942
EBAY,0.205769,0.277036,1.0,0.466984,0.271645,0.198993,0.29322,0.35789,0.325704,0.254828,...,0.264387,0.341485,-0.021632,0.316249,0.213929,0.214107,0.272288,0.30704,0.137691,0.464944
GOOG,0.186533,0.570716,0.466984,1.0,0.364984,0.361712,0.514771,0.526067,0.706982,0.434332,...,0.33302,0.428252,0.178483,0.330332,0.457115,0.301957,0.426021,0.405369,0.332644,0.672236
TWTR,0.043201,0.295822,0.271645,0.364984,1.0,0.183123,0.090912,0.094876,0.293499,0.124944,...,0.216151,0.032657,-0.02453,0.053696,0.172729,0.091288,0.063756,0.099976,0.155973,0.321086
TSLA,0.125324,0.321009,0.198993,0.361712,0.183123,1.0,0.305458,0.33418,0.338771,0.20944,...,0.203089,0.309455,0.100233,0.192345,0.208038,0.172667,0.325776,0.242289,0.103569,0.397692
CAT,0.346544,0.334549,0.29322,0.514771,0.090912,0.305458,1.0,0.586562,0.430485,0.307901,...,0.332357,0.571812,0.178604,0.40749,0.378136,0.44761,0.329333,0.274706,0.215052,0.520244
MS,0.206383,0.350973,0.35789,0.526067,0.094876,0.33418,0.586562,1.0,0.356685,0.438198,...,0.335806,0.485098,0.21845,0.337568,0.349371,0.312813,0.374907,0.367677,0.204288,0.542018
FB,0.20414,0.501826,0.325704,0.706982,0.293499,0.338771,0.430485,0.356685,1.0,0.31323,...,0.251484,0.265521,0.278972,0.34109,0.381772,0.297525,0.345643,0.292088,0.343685,0.611351
IBM,0.110379,0.290317,0.254828,0.434332,0.124944,0.20944,0.307901,0.438198,0.31323,1.0,...,0.240766,0.320875,0.194459,0.190488,0.293386,0.259429,0.281369,0.323023,0.041544,0.373812


In [None]:
returns_df.corr().sum() - 1

In [None]:
my_ohlc_downloader.symbols_download_status_dict