In [2]:
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt


def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    
    # Read from file
    dow_myfile = open('dowfile.pickle', 'rb')
    sp_myfile = open('spfile.pickle', 'rb')
    nasd_myfile = open('nasdtotalfile.pickle', 'rb')
    nyse_myfile = open('nysetotalfile.pickle', 'rb')
    feat_myfile = open('featfile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    dowJonesDict = pickle.load(dow_myfile)
    sp500Dict = pickle.load(sp_myfile)
    nasdDict = pickle.load(nasd_myfile)
    nyseDict = pickle.load(nyse_myfile)
    featuresDict = pickle.load(feat_myfile)
    
    # Close the files
    dow_myfile.close()
    sp_myfile.close()
    nasd_myfile.close()
    nyse_myfile.close()
    feat_myfile.close()
    
    return dowJonesDict, sp500Dict, featuresDict, nasdDict, nyseDict

dowJonesDict, sp500Dict, featuresDict, nasdDict, nyseDict = retrieve_data()
print "Done"

Done


In [175]:
def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)

def get_rolling_std(values, window):
    """Return rolling standard deviation of given values, using specified window size"""
    return pd.rolling_std(values, window=window)
    
def get_bollinger_bands(rm, rstd):
    """Return upper and lower Bollinger Bands."""
    upper_band = rm + rstd * 2
    lower_band = rm - rstd * 2
    return upper_band, lower_band
    
def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def normalize_data(df_dict):
    """Normalize the adjusted_closing_price dataframes"""
    normalize = []
    for each in df_dict:
        norm_df =  each / each.ix[0,:]
        normalize.append(norm_df)
    return normalize

def adj_for_stk_record_nums(nasdDict, nyseDict, sp500Dict, dowJonesDict):
    dateAdjNasdDict,dateAdjNyseDict = {},{}
    dateAdjSp500Dict,dateAdjDowDict = {},{}
    
    for each_nasd in nasdDict.keys():
        length = len(nasdDict[each_nasd])
        value = nasdDict[each_nasd]
        if length > 4000:
            dateAdjNasdDict[each_nasd] = value

    for each_nyse in nyseDict.keys():
        length2 = len(nyseDict[each_nyse])
        value2 = nyseDict[each_nyse]
        if length2 > 4000:
            dateAdjNyseDict[each_nyse] = value2

    for each_sp in sp500Dict.keys():
        length3 = len(sp500Dict[each_sp])
        value3 = sp500Dict[each_sp]
        if length3 > 4000:
            dateAdjSp500Dict[each_sp] = value3
            
    for each_dow in dowJonesDict.keys():
        length4 = len(dowJonesDict[each_dow])
        value4 = dowJonesDict[each_dow]
        if length4 > 4000:
            dateAdjDowDict[each_dow] = value4
            
    dateAdjustedDicts = [dateAdjDowDict, dateAdjSp500Dict, dateAdjNasdDict, dateAdjNyseDict]
    for each_dict in dateAdjustedDicts:
        for each in each_dict.keys():
            df = each_dict[each]
            ifnull = df.isnull().values.any()
            if ifnull == True:
                df = df.fillna(method="ffill",inplace="TRUE")
                df = df.fillna(method="bfill",inplace="TRUE")
                each_dict[each] = df
    
    return dateAdjustedDicts

In [176]:
def create_adj_vol_and_close_dfs(adjDicts, normDicts):
    """Take the dow jones dataframes and create dataframe which contain only the
       adjusted volume and adjusted closing price data. Each dataframe will contain
       all 30 companies going across, and time going down for the past 6 years of 
       data."""
    adjustedDicts,normalDicts = {},{}
    
    i,y = True, True
    for each_dict in adjDicts:
        for each in each_dict.keys():
            if i == True: # signifies first dictionary entry
                df = each_dict[each]
                adj_close_each_df = df[['Adj. Close']].rename(columns={'Adj. Close': each})
                adj_vol_each_df = df[['Adj. Volume']].rename(columns={'Adj. Volume': each})
                i = False
            else:
                df = each_dict[each]
                next_adj_close_each_df = df[['Adj. Close']].rename(columns={'Adj. Close': each})
                next_adj_vol_each_df = df[['Adj. Volume']].rename(columns={'Adj. Volume': each})
                adj_close_each_df = pd.merge(adj_close_each_df, next_adj_close_each_df, left_index=True, right_index=True)
                adj_vol_each_df = pd.merge(adj_vol_each_df, next_adj_vol_each_df, left_index=True, right_index=True)
        
        if y == True:
            adjustedDicts['adjDowDict'] = [adj_close_each_df,adj_vol_each_df]
            y = False
        else:
            adjustedDicts['adjSpDict'] = [adj_close_each_df,adj_vol_each_df]
        i = True
    
    i,y = True,True
    for each_norm_dict in normDicts:
        for each_sp in each_norm_dict.keys():
            if i == True:
                sp_df = each_norm_dict[each_sp]
                adj_close_sp_df = sp_df[['Close']].rename(columns={'Close': each_sp})
                adj_vol_sp_df = sp_df[['Volume']].rename(columns={'Volume': each_sp})
                i = False
            else:
                sp_df = each_norm_dict[each_sp]
                next_adj_close_sp_df = sp_df[['Close']].rename(columns={'Close': each_sp})
                next_adj_vol_sp_df = sp_df[['Volume']].rename(columns={'Volume': each_sp})
                adj_close_sp_df = pd.merge(adj_close_sp_df, next_adj_close_sp_df, left_index=True, right_index=True)
                adj_vol_sp_df = pd.merge(adj_vol_sp_df, next_adj_vol_sp_df, left_index=True, right_index=True)      
    
        if y == True:
            normalDicts['normNasdDict'] = [adj_close_sp_df, adj_vol_sp_df]
            y = False
        else:
            normalDicts['normNyseDict'] = [adj_close_sp_df, adj_vol_sp_df]
        i = True
            
    return adjustedDicts, normalDicts

dateAdjustedDicts = adj_for_stk_record_nums(nasdDict,nyseDict,sp500Dict,dowJonesDict)
adjCloseDicts = [dateAdjustedDicts[0],dateAdjustedDicts[1]]
normCloseDicts = [dateAdjustedDicts[2], dateAdjustedDicts[3]]
adjustedDicts,normalDicts = create_adj_vol_and_close_dfs(adjCloseDicts, normCloseDicts)
print "Done"

Done


In [74]:
def compute_bollinger_bands(adj_close_df_indexes, window):
    """Take the adjusted closing prices dataframe and compute the bollinger bands for each company,
       using 20 day windows for now. Will adjust the window date as needed. Computes both the upper
       and lower bands and stores them into dataframes.
       Note, first window is blank so you need to adjust for that by using data from 20 days forward."""
    
    eachIndexBolBandsDict = {}
    for each_index in adj_close_df_indexes:
        i = True
        for each in each_index:
            rm_company = get_rolling_mean(each_index[each], window)
            rstd_company = get_rolling_std(each_index[each], window)
            upper_band, lower_band = get_bollinger_bands(rm_company, rstd_company)

            if i == True:
                upper_band_df = pd.DataFrame(upper_band)
                lower_band_df = pd.DataFrame(lower_band)
                rm_company_df = pd.DataFrame(rm_company)
                i = False
            else:
                next_upper_df = pd.DataFrame(upper_band)
                next_lower_df = pd.DataFrame(lower_band)
                next_rm_df    = pd.DataFrame(rm_company)

                upper_band_df = pd.merge(upper_band_df, next_upper_df, left_index=True, right_index=True)
                lower_band_df = pd.merge(lower_band_df, next_lower_df, left_index=True, right_index=True)
                rm_company_df = pd.merge(rm_company_df, next_rm_df, left_index=True, right_index=True)
        
        if eachIndexBolBandsDict == {}:
            eachIndexBolBandsDict['dow'] = [upper_band_df, lower_band_df, rm_company_df]
        else:
            eachIndexBolBandsDict['sp500'] = [upper_band_df, lower_band_df, rm_company_df]

    return eachIndexBolBandsDict

def get_momentum(adj_close_df_indexes, window):
    i,y = 0,True
    temp_array = []
    eachIndexMomentumDict = {}
    for each_index in adj_close_df_indexes:
        for each_company in each_index.keys():
            each_column = each_index[each_company]
            each_column = each_column.as_matrix()
            for each in xrange(len(each_column)):
                if i > window:
                    temp = each_column[i]/each_column[i-window] - 1
                    temp_array.append(temp)
                else:
                    temp_array.append(0.)
                i += 1
            if y == True:
                df = pd.DataFrame(temp_array, index=each_index.index, columns=[each_company])
                y = False
            else:
                df2 = pd.DataFrame(temp_array, index=each_index.index, columns=[each_company])
                df = pd.merge(df, df2, left_index=True, right_index=True)
            temp_array = []
            i = 0
        y = True
        if eachIndexMomentumDict == {}:
            eachIndexMomentumDict['dow'] = df
        else:
            eachIndexMomentumDict['sp500'] = df

    return eachIndexMomentumDict

def pe_ratio(adj_close_df_indexes, window):
    i,y = 0,True
    pe_array = []
    eachIndexPeRatioDict = {}
    for each_index in adj_close_df_indexes:
        for each_comp in each_index.keys():
            each_column = each_index[each_comp]
            each_column = each_column.as_matrix()
            for each in xrange(len(each_column)):
                if i > window:
                    each_return = each_column[i] - each_column[i-window]
                    each_pe = each_column[i]/each_return
                    pe_array.append(each_pe)
                else:
                    pe_array.append(0.)
                i += 1
            if y == True:
                pe_df = pd.DataFrame(pe_array, index=each_index.index, columns=[each_comp])
                y = False
            else:
                pe_df2 = pd.DataFrame(pe_array, index=each_index.index, columns=[each_comp])
                pe_df = pd.merge(pe_df, pe_df2, left_index=True, right_index=True)
            pe_array = []
            i = 0
        y = True
        if eachIndexPeRatioDict == {}:
            eachIndexPeRatioDict['dow'] = pe_df
        else:
            eachIndexPeRatioDict['sp500'] = pe_df
        
    return eachIndexPeRatioDict

In [75]:
adj_close_df = [dow_adjusted[0],sp_adjusted[0]]
eachIndexBolBandsDict = compute_bollinger_bands(adj_close_df, 20)
eachIndexMomentumDict = get_momentum(adj_close_df, 20)
eachIndexPeRatioDict = pe_ratio(adj_close_df, 20)
print "DONE"

DONE


In [31]:
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.show()
    
def plot_bollinger(adj_close_df):
    """Takes the rolling means and the upper and lower bands and plots the data. Currently using only
       a 140 day graph, but can easily change to show less or more information by changing the adjusting
       closing price dataframe."""
    rm_SPY = get_rolling_mean(adj_close_df, window=20)
    rstd_SPY = get_rolling_std(adj_close_df, window=20)
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = adj_close_df[20:160].plot(title="Bollinger Bands", label='IBM')
    rm_SPY[20:].plot(label='Rolling mean', ax=ax)
    upper_band[20:].plot(label='upper band', ax=ax)
    lower_band[20:].plot(label='lower band', ax=ax)
    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()    
      
def compute_and_plot_daily_returns(adj_close_df):
    """Compute the daily return values for each company by calling compute_daily_returns() and then
       plot the returned values for a 20 day period. This 20 day period can be adjusted as necessary
       by increasing or decreasing the size of the adj_close_df when this function is called. """
    #Compute daily returns
    daily_returns = compute_daily_returns(adj_close_df)
    plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")
    
plot_bollinger(adj_close_df[0]['Nike'][:160])
compute_and_plot_daily_returns(adj_close_df[0][['IBM','Nike','Visa']][0:20])
print "Done"

Done


In [179]:
print len(adjustedDicts['adjDowDict'][0])

4003


False