In [6]:
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt
 

def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    
    # Read from file
    dow_myfile = open('dowfile.pickle', 'rb')
    sp_myfile = open('spfile.pickle', 'rb')
    feat_myfile = open('featfile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    dowJonesDict = pickle.load(dow_myfile)
    sp500Dict = pickle.load(sp_myfile)
    featuresDict = pickle.load(feat_myfile)
    
    # Close the files
    dow_myfile.close()
    sp_myfile.close()
    feat_myfile.close()
    
    return dowJonesDict, sp500Dict, featuresDict

dowJonesDict, sp500Dict, featuresDict = retrieve_data()
print "Done"

Done


In [7]:
def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)

def get_rolling_std(values, window):
    """Return rolling standard deviation of given values, using specified window size"""
    return pd.rolling_std(values, window=window)
    
def get_bollinger_bands(rm, rstd):
    """Return upper and lower Bollinger Bands."""
    upper_band = rm + rstd * 2
    lower_band = rm - rstd * 2
    return upper_band, lower_band
    
def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns
    
def create_adj_vol_and_close_dfs(dowJonesDict):
    """Take the dow jones dataframes and create dataframe which contain only the
       adjusted volume and adjusted closing price data. Each dataframe will contain
       all 30 companies going across, and time going down for the past 6 years of 
       data."""
    
    i = True
    for each in dowJonesDict.keys():
        if i == True: # signifies first dictionary entry
            df = dowJonesDict[each]
            adj_close_df = df[['Adj. Close']].rename(columns={'Adj. Close': each})
            adj_vol_df = df[['Adj. Volume']].rename(columns={'Adj. Volume': each})
            i = False
        else:
            df = dowJonesDict[each]
            next_adj_close_df = df[['Adj. Close']].rename(columns={'Adj. Close': each})
            next_adj_vol_df = df[['Adj. Volume']].rename(columns={'Adj. Volume': each})
            adj_close_df = pd.merge(adj_close_df, next_adj_close_df, left_index=True, right_index=True)
            adj_vol_df = pd.merge(adj_vol_df, next_adj_vol_df, left_index=True, right_index=True)
         
    #adj_close_df = (adj_close_df - adj_close_df.mean()) / (adj_close_df.max() - adj_close_df.min())
    #adj_vol_df = adj_vol_df.set_index(['Date'])
    #adj_vol_df = (adj_vol_df - adj_vol_df.mean()) / (adj_vol_df.max() - adj_vol_df.min())
            
    return adj_close_df, adj_vol_df

In [8]:
def compute_bollinger_bands(adj_close_df):
    """Take the adjusted closing prices dataframe and compute the bollinger bands for each company,
       using 20 day windows for now. Will adjust the window date as needed. Computes both the upper
       and lower bands and stores them into dataframes.
       Note, first window is blank so you need to adjust for that by using data from 20 days forward."""
    
    i = True
    #Compute Bollinger Bands
    for each in adj_close_df:
        # 1. Compute rolling mean
        rm_company = get_rolling_mean(adj_close_df[each], window=20)
        # 2. Compute rolling standard deviation
        rstd_company = get_rolling_std(adj_close_df[each], window=20)
        # 3. Compute upper and lower bands
        upper_band, lower_band = get_bollinger_bands(rm_company, rstd_company)
        
        #company_df = pd.DataFrame([rm_company, rstd_company, upper_band, lower_band])
        #perCompanyDataDict[each] = company_df
        
        if i == True:
            upper_band_df = pd.DataFrame(upper_band)
            lower_band_df = pd.DataFrame(lower_band)
            rm_company_df = pd.DataFrame(rm_company)
            i = False
        else:
            next_upper_df = pd.DataFrame(upper_band)
            next_lower_df = pd.DataFrame(lower_band)
            next_rm_df    = pd.DataFrame(rm_company)
            
            upper_band_df = pd.merge(upper_band_df, next_upper_df, left_index=True, right_index=True)
            lower_band_df = pd.merge(lower_band_df, next_lower_df, left_index=True, right_index=True)
            rm_company_df = pd.merge(rm_company_df, next_rm_df, left_index=True, right_index=True)
    
    upper_band_df = upper_band_df[20:]
    lower_band_df = lower_band_df[20:]

    return upper_band_df, lower_band_df, rm_company_df

def get_momentum(values, window):
    i,y = 0,True
    temp_array = []
    for each_company in values.keys():
        each_column = values[each_company]
        each_column = each_column.as_matrix()
        for each in xrange(len(each_column)):
            if i > window:
                temp = each_column[i]/each_column[i-window] - 1
                temp_array.append(temp)
            else:
                temp_array.append(0.)
            i += 1
        if y == True:
            df = pd.DataFrame(temp_array, index=values.index, columns=[each_company])
            y = False
        else:
            df2 = pd.DataFrame(temp_array, index=values.index, columns=[each_company])
            df = pd.merge(df, df2, left_index=True, right_index=True)
        temp_array = []
        i = 0

    return df

In [9]:
adj_close_df, adj_vol_df = create_adj_vol_and_close_dfs(dowJonesDict)
upper_band_df, lower_band_df, rm_company_df = compute_bollinger_bands(adj_close_df)
momentum_df = get_momentum(adj_close_df, 20)
print "DONE"

DONE


In [10]:
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.show()
    
def plot_bollinger(adj_close_df):
    """Takes the rolling means and the upper and lower bands and plots the data. Currently using only
       a 140 day graph, but can easily change to show less or more information by changing the adjusting
       closing price dataframe."""
    rm_SPY = get_rolling_mean(adj_close_df, window=20)
    rstd_SPY = get_rolling_std(adj_close_df, window=20)
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = adj_close_df[20:160].plot(title="Bollinger Bands", label='IBM')
    rm_SPY[20:].plot(label='Rolling mean', ax=ax)
    upper_band[20:].plot(label='upper band', ax=ax)
    lower_band[20:].plot(label='lower band', ax=ax)
    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()    
      
def compute_and_plot_daily_returns(adj_close_df):
    """Compute the daily return values for each company by calling compute_daily_returns() and then
       plot the returned values for a 20 day period. This 20 day period can be adjusted as necessary
       by increasing or decreasing the size of the adj_close_df when this function is called. """
    #Compute daily returns
    daily_returns = compute_daily_returns(adj_close_df)
    plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")
    
plot_bollinger(adj_close_df['Nike'][:160])
compute_and_plot_daily_returns(adj_close_df[['IBM','Nike','Visa']][0:20])

Done
