In [1]:
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt


def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    
    # Read from file
    dow_myfile = open('dowfile.pickle', 'rb')
    sp_myfile = open('spfile.pickle', 'rb')
    nasd_myfile = open('nasdtotalfile.pickle', 'rb')
    nyse_myfile = open('nysetotalfile.pickle', 'rb')
    feat_myfile = open('featfile.pickle', 'rb')
    adj_closing = open('adjclosingpricesfile.pickle', 'rb')
    norm_closing = open('closingpricesfile.pickle', 'rb')
    adj_volume = open('adjvolumefile.pickle', 'rb')
    norm_volume = open('volumefile.pickle', 'rb')
    bol_bands = open('bolbandsfile.pickle', 'rb')
    momentum = open('momentumfile.pickle', 'rb')
    pe_ratio = open('peratiofile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    dowJonesDict = pickle.load(dow_myfile)
    sp500Dict = pickle.load(sp_myfile)
    nasdDict = pickle.load(nasd_myfile)
    nyseDict = pickle.load(nyse_myfile)
    featuresDict = pickle.load(feat_myfile)
    adjCloseDict = pickle.load(adj_closing)
    normCloseDict = pickle.load(norm_closing)
    adjVolumeDict = pickle.load(adj_volume)
    normVolumeDict = pickle.load(norm_volume)
    bolBandsDict = pickle.load(bol_bands)
    momentumDict = pickle.load(momentum)
    peRatioDict = pickle.load(pe_ratio)
    
    # Close the files
    dow_myfile.close()
    sp_myfile.close()
    nasd_myfile.close()
    nyse_myfile.close()
    feat_myfile.close()
    adj_closing.close()
    norm_closing.close()
    adj_volume.close()
    norm_volume.close()
    bol_bands.close()
    momentum.close()
    pe_ratio.close()
    
    index_dict_array = [dowJonesDict, sp500Dict, nasdDict, nyseDict]
    adjusted_dict_array = [adjCloseDict, normCloseDict, adjVolumeDict, normVolumeDict]
    technical_indicators = [bolBandsDict, momentumDict, peRatioDict]
    
    return index_dict_array, adjusted_dict_array, featuresDict, technical_indicators

index_dict_array, adjusted_dict_array, featuresDict, technical_indicators = retrieve_data()
print "Done"

Done


In [27]:
def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)

def get_rolling_std(values, window):
    """Return rolling standard deviation of given values, using specified window size"""
    return pd.rolling_std(values, window=window)
    
def get_bollinger_bands(rm, rstd):
    """Return upper and lower Bollinger Bands."""
    upper_band = rm + rstd * 2
    lower_band = rm - rstd * 2
    return upper_band, lower_band
    
def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def normalize_data(df_dict):
    """Normalize the adjusted_closing_price dataframes"""
    normalize = []
    for each in df_dict:
        norm_df =  each / each.ix[0,:]
        normalize.append(norm_df)
    return normalize

def retrieve_daily_returns(df):
    dailyReturnsDict = {}
    for each_dict in df:
        for each_key in each_dict.keys():
            each_index = each_dict[each_key]
            daily_returns = compute_daily_returns(each_index)
            dailyReturnsDict[each_key] = daily_returns
    return dailyReturnsDict

df = [adjusted_dict_array[0],adjusted_dict_array[1]]
dailyReturnsDict = retrieve_daily_returns(df)
print "Done"

Done


In [38]:
def compute_bollinger_bands(adjCloseDict, window):
    """Take the adjusted closing prices dataframe and compute the bollinger bands for each company,
       using 20 day windows for now. Will adjust the window date as needed. Computes both the upper
       and lower bands and stores them into dataframes.
       Note, first window is blank so you need to adjust for that by using data from 20 days forward."""
    
    eachIndexBolBandsDict = {}
    for each_key in adjCloseDict.keys():
        each_index = adjCloseDict[each_key]
        i = True
        for each in each_index.keys():
            rm_company = get_rolling_mean(each_index[each], window)
            rstd_company = get_rolling_std(each_index[each], window)
            upper_band, lower_band = get_bollinger_bands(rm_company, rstd_company)

            if i == True:
                upper_band_df = pd.DataFrame(upper_band)
                lower_band_df = pd.DataFrame(lower_band)
                rm_company_df = pd.DataFrame(rm_company)
                i = False
            else:
                next_upper_df = pd.DataFrame(upper_band)
                next_lower_df = pd.DataFrame(lower_band)
                next_rm_df    = pd.DataFrame(rm_company)

                upper_band_df = upper_band_df.join(next_upper_df, how='outer')
                lower_band_df = lower_band_df.join(next_lower_df, how='outer')
                rm_company_df = rm_company_df.join(next_rm_df, how='outer')

        eachIndexBolBandsDict[each_key] = [upper_band_df, lower_band_df, rm_company_df]

    return eachIndexBolBandsDict

def get_momentum(adjCloseDict, window):
    i,y = 0,True
    temp_array = []
    eachIndexMomentumDict = {}
    for each_dict in adjCloseDict:
        for each_key in each_dict.keys():
            each_index = each_dict[each_key]
            for each_company in each_index.keys():
                each_column = each_index[each_company]
                each_column = each_column.as_matrix()
                for each in xrange(len(each_column)):
                    if i > window:
                        temp = each_column[i]/each_column[i-window] - 1
                        temp_array.append(temp)
                    else:
                        temp_array.append(0.)
                    i += 1
                if y == True:
                    df = pd.DataFrame(temp_array, index=each_index.index, columns=[each_company])
                    y = False
                else:
                    df2 = pd.DataFrame(temp_array, index=each_index.index, columns=[each_company])
                    df = df.join(df2, how='outer')
                temp_array = []
                i = 0
            y = True

            eachIndexMomentumDict[each_key] = df

    return eachIndexMomentumDict

def pe_ratio(adjCloseDict, window):
    i,y = 0,True
    pe_array = []
    eachIndexPeRatioDict = {}
    for each_key in adjCloseDict.keys():
        each_index = adjCloseDict[each_key]
        for each_comp in each_index.keys():
            each_column = each_index[each_comp]
            each_column = each_column.as_matrix()
            for each in xrange(len(each_column)):
                if i > window:
                    each_return = each_column[i] - each_column[i-window]
                    each_pe = each_column[i]/each_return
                    pe_array.append(each_pe)
                else:
                    pe_array.append(0.)
                i += 1
            if y == True:
                pe_df = pd.DataFrame(pe_array, index=each_index.index, columns=[each_comp])
                y = False
            else:
                pe_df2 = pd.DataFrame(pe_array, index=each_index.index, columns=[each_comp])
                pe_df = pe_df.join(pe_df2, how='outer')
            pe_array = []
            i = 0
        y = True

        eachIndexPeRatioDict[each_key] = pe_df
  
    return eachIndexPeRatioDict

In [155]:
eachIndexBolBandsDict = compute_bollinger_bands(adjusted_dict_array[0], 20)
eachIndexMomentumDict = get_momentum(adjusted_dict_array[0], 10)
eachIndexPeRatioDict = pe_ratio(adjusted_dict_array[0], 20)
print "DONE"

DONE


In [169]:
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.show()
    
def plot_bollinger(adj_close_df):
    """Takes the rolling means and the upper and lower bands and plots the data. Currently using only
       a 140 day graph, but can easily change to show less or more information by changing the adjusting
       closing price dataframe."""
    rm_SPY = get_rolling_mean(adj_close_df, window=20)
    rstd_SPY = get_rolling_std(adj_close_df, window=20)
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = adj_close_df[20:160].plot(title="Bollinger Bands", label='IBM')
    rm_SPY[20:].plot(label='Rolling mean', ax=ax)
    upper_band[20:].plot(label='upper band', ax=ax)
    lower_band[20:].plot(label='lower band', ax=ax)
    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()    
      
def compute_and_plot_daily_returns(adj_close_df):
    """Compute the daily return values for each company by calling compute_daily_returns() and then
       plot the returned values for a 20 day period. This 20 day period can be adjusted as necessary
       by increasing or decreasing the size of the adj_close_df when this function is called. """
    #Compute daily returns
    daily_returns = compute_daily_returns(adj_close_df)
    plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")
    
plot_bollinger(adjusted_dict_array[0]['adjDowDict'][['IBM']][:160])
compute_and_plot_daily_returns(adjusted_dict_array[0]['adjDowDict'][0:20])
print "Done"

Done


In [32]:
def sort_by_timestep(technical_indicators):
    bolBandsDict = technical_indicators[0]
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    np1 = np.array(bolBandsDict['adjDowDict'][0]['IBM'][21:100])
    np2 = np.array(bolBandsDict['adjDowDict'][1]['IBM'][21:100]) 
    np3 = np.array(bolBandsDict['adjDowDict'][2]['IBM'][21:100])
    np4 = np.array(momentumDict['adjDowDict']['IBM'][21:100])
    np5 = np.array(peRatioDict['adjDowDict']['IBM'][21:100])
    
    t = {}
    step_size = np1.size/10
    np1.sort()
    np2.sort()
    np3.sort()
    np4.sort()
    np5.sort()

    for i in range(0, 10):
        upp = np1[(i+1)*step_size]
        down = np2[(i+1)*step_size]
        rol = np3[(i+1)*step_size]
        mom = np4[(i+1)*step_size]
        pe = np5[(i+1)*step_size]
        t[i] = [upp,down,rol,mom,pe]
    print t[0]
    print t[1]
    print t[2]
    print t[3]
    print t[4]
    print t[5]
    print t[6]
    print t[7]
    print t[8]
    print t[9]
    return t

def descretize_indicators(technical_indicators, t):
    bolBandsDict = technical_indicators[0]
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    
    upper = pd.DataFrame(bolBandsDict['adjDowDict'][0]['IBM'][21:100])
    lower = pd.DataFrame(bolBandsDict['adjDowDict'][1]['IBM'][21:100])
    rolling = pd.DataFrame(bolBandsDict['adjDowDict'][2]['IBM'][21:100])
    momentum = pd.DataFrame(momentumDict['adjDowDict']['IBM'][21:100])
    peratio = pd.DataFrame(peRatioDict['adjDowDict']['IBM'][21:100])
    
    daily_dicts = [adjusted_dict_array[0],adjusted_dict_array[1]]
    dailyReturnsDict = retrieve_daily_returns(daily_dicts)
    daily_reward = np.array(dailyReturnsDict['adjDowDict']['IBM'][21:100])
    
    grouped = upper.join(lower, how='outer', rsuffix='Low')
    grouped = grouped.join(rolling, how='outer', rsuffix='Roll')
    grouped = grouped.join(momentum, how='outer', rsuffix='Moment')
    grouped = grouped.join(peratio, how='outer', rsuffix='Pe')
    
    ranges = [((min(upper),t[0][0]),(t[0][0],t[1][0]),(t[1][0],t[2][0]), (t[2][0],t[3][0]),
               (t[3][0], t[4][0]),(t[4][0],t[5][0]),(t[5][0],t[6][0]),(t[6][0],t[7][0]),
               (t[7][0],t[8][0]),(t[8][0],max(upper))),
              ((min(lower),t[0][1]),(t[0][1],t[1][1]),(t[1][1],t[2][1]),(t[2][1],t[3][1]),
               (t[3][1], t[4][1]),(t[4][1],t[5][1]),(t[5][1],t[6][1]),(t[6][1],t[7][1]),
               (t[7][1],t[8][1]),(t[8][1],max(lower))),
              ((min(rolling),t[0][2]),(t[0][2],t[1][2]),(t[1][2],t[2][2]),(t[2][2],t[3][2]),
               (t[3][2], t[4][2]),(t[4][2],t[5][2]),(t[5][2],t[6][2]),(t[6][2],t[7][2]),
               (t[7][2],t[8][2]),(t[8][2],max(rolling))),
              ((min(momentum),t[0][3]),(t[0][3],t[1][3]),(t[1][3],t[2][3]),(t[2][3],t[3][3]),
               (t[3][3], t[4][3]),(t[4][3],t[5][3]),(t[5][3],t[6][3]),(t[6][3],t[7][3]),
               (t[7][3],t[8][3]),(t[8][3],max(momentum))),
              ((min(peratio),t[0][4]),(t[0][4],t[1][4]),(t[1][4],t[2][4]),(t[2][4],t[3][4]),
               (t[3][4], t[4][4]),(t[4][4],t[5][4]),(t[5][4],t[6][4]),(t[6][4],t[7][4]),
               (t[7][4],t[8][4]),(t[8][4],max(peratio)))]
    print " "
    thrshs, day_threshs = [],[]
    for num in xrange(0,5):
        test = grouped.ix[num]
        for each_variable,each_tuple_variable in zip(test,ranges):
            y = 0
            for each_tuple in each_tuple_variable:
                if each_tuple[0] <= each_variable <= each_tuple[1]:
                    thrshs.append(y)
                    break
                y += 1
        day_threshs.append(thrshs)
        thrshs = []
    return day_threshs
        
threshold = sort_by_timestep(technical_indicators)
inputs = descretize_indicators(technical_indicators, threshold)

[89.985726491515706, 78.486664848380968, 85.826299240907559, -0.1034482758620664, -31.789625360229916]
[92.604203450180762, 78.874848516994717, 86.000001476605775, -0.035400343015005609, -14.227642276423504]
[93.701800100819398, 79.797047304459781, 86.117924599698512, -0.010726518839016208, -6.0570071258907374]
[94.539710749857349, 80.836071097845718, 86.89559222314125, 0.011291706258814926, 6.9964726631394392]
[96.397676028540076, 81.685659504406047, 88.076834284364281, 0.026294606922453001, 7.8644067796614499]
[97.591023436402963, 81.929166404063849, 89.313851687432617, 0.064220183486235705, 8.6355810616930686]
[98.035790102565699, 82.55562663341604, 90.439716802109302, 0.082777159807190692, 10.464968152863836]
[98.694290802581079, 84.231786806633934, 91.322560784460165, 0.12320000000001596, 15.315930388220487]
[99.333739329110799, 85.754689038397871, 92.437459270939897, 0.13787820758329672, 20.79553903345758]
[102.61503659119364, 88.256809091582284, 95.435922841387963, 0.15435035187



In [33]:
#buy/sell/nothing == actions
#holding/shorting/bollinger/momentum/pe/rewardsinceentry == state
#dailyreturn == reward