## Import Libraries

In [1]:
import pandas_datareader.data as pdr
import datetime
import numpy as np

# seems to be a problem with pandas_datareader
# use this fix to get data from Yahoo! Finance
import fix_yahoo_finance as yf
yf.pdr_override()

    Auto-overriding of pandas_datareader's get_data_yahoo() is deprecated and will be removed in future versions.
    Use pdr_override() to explicitly override it.


## Get the data

Download the price data from Yahoo and load it into memory

In [2]:
def download_data_yahoo(symbol,start_date,end_date):
    data = pdr.get_data_yahoo(symbol, start=start_date, end=end_date)
    return data

In [3]:
# test download_data
start = datetime.datetime(2010,1,1)
end = datetime.datetime(2011,1,1)
test_data = download_data_yahoo('SPY',start_date="2010-01-01",end_date="2011-01-01")

[*********************100%***********************]  1 of 1 downloaded

In [4]:
test_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,97.314163,118944600
2010-01-05,113.260002,113.68,112.849998,113.629997,97.571754,111579900
2010-01-06,113.519997,113.989998,113.43,113.709999,97.640457,116074400
2010-01-07,113.5,114.330002,113.18,114.190002,98.052658,131091100
2010-01-08,113.889999,114.620003,113.660004,114.57,98.378929,126402800


## Preprocess the data
Preprocess the data into inputs and outputs.
For example, for each time step, use the past 10 days as the input, and the price on the next day as the output.  Also, do any data cleaning that is necessary like filling in missing prices

In [7]:
def preprocess_data(yahoo_data,number_of_days=10):
    
    prices = yahoo_data['Adj Close']
    num_prices = len(prices)
    
    # only use dates with enough information
    # start with the first date with number_of_days in the past available
    # end with the next to last date since we are predicting 1 day in the future
    #valid_data = raw_data.iloc[number_of_days-1:-1]
    #valid_prices = prices.iloc[number_of_days-1:-1]
    start_index = number_of_days - 1
    end_index = num_prices - 1
    num_samples = end_index - start_index
    
    # build the inputs and outputs
    input_x = np.empty((num_samples,number_of_days),dtype=np.float)
    output_y = np.empty((num_samples),dtype=np.float)
    
    for sample_i in range(num_samples):
        # this is the index number of the current day
        # input data will be today's index with number of days in the past
        # output data will be tommorrow's price (today_i+1)
        today_i = sample_i + number_of_days - 1
        
        input_x[sample_i,:] = prices[today_i-number_of_days+1:today_i+1]
        output_y[sample_i] = prices[today_i+1]
        
    return input_x, output_y
        
input_x, output_y = preprocess_data(test_data)

In [15]:
list(range(10,-1,-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

In [20]:
# look at some of the data
def print_row(i,input_x,output_y):
    print("Row %d Input Prices (today's price %.2f):" % (i,input_x[i,-1]))
    print(input_x[i,:])
    print("Row %d Target (tomorrow's price): %f\n" % (i,output_y[i]))
    
def print_row(i,input_x,output_y):
    print("Row %d Raw Data" % (i))
    print("-------------------")
    print("Input:")
    print(input_x[i,:])
    print("Output:")
    print(output_y[i])
    print("")
    
    print("Data Interpretation")
    print("-------------------")
    num_inputs = input_x.shape[1]
    
    print("Today's Price : %f" % (input_x[i,-1]))
    
    for xi in range(num_inputs-2,-1,-1):
        days_in_past = num_inputs - xi - 1
        print("Today - %d days: %f" % (days_in_past,input_x[i,xi]))
        
    print("Tomorrow's Price (Price to Predict): %f\n" % (output_y[i]))
    

print_row(0,input_x,output_y)
print_row(1,input_x,output_y)
print_row(-1,input_x,output_y) # last row




Row 0 Raw Data
-------------------
Input:
[ 97.314163  97.571754  97.640457  98.052658  98.378929  98.516312
  97.597534  98.421867  98.688065  97.580353]
Output:
98.79969

Data Interpretation
-------------------
Today's Price : 97.580353
Today - 1 days: 98.688065
Today - 2 days: 98.421867
Today - 3 days: 97.597534
Today - 4 days: 98.516312
Today - 5 days: 98.378929
Today - 6 days: 98.052658
Today - 7 days: 97.640457
Today - 8 days: 97.571754
Today - 9 days: 97.314163
Tomorrow's Price (Price to Predict): 98.799690

Row 1 Raw Data
-------------------
Input:
[ 97.571754  97.640457  98.052658  98.378929  98.516312  97.597534
  98.421867  98.688065  97.580353  98.79969 ]
Output:
97.795021

Data Interpretation
-------------------
Today's Price : 98.799690
Today - 1 days: 97.580353
Today - 2 days: 98.688065
Today - 3 days: 98.421867
Today - 4 days: 97.597534
Today - 5 days: 98.516312
Today - 6 days: 98.378929
Today - 7 days: 98.052658
Today - 8 days: 97.640457
Today - 9 days: 97.571754
Tomor

## Divide data into training, test, and validation sets


In [None]:
def split_dataset(data):
    return train_data, test_data, validation_data

## Create a modular function that can be called to get data
Combine the previous functions into a simple function that can be called to get the data

In [1]:
def get_data(symbol,train_start_date,train_end_date,
             test_start_date,test_end_date,
             validation_start_date,validation_end_date):
    return train_data, test_data, validation_data

## Create your model
Setup whatever model you plan to use for price prediction. i.e. neural network, linear regression, etc.

In [2]:
def setup_model():
    return model

## Train your model
Define the function to train your model using the training data and the test data

In [6]:
def train(model,train_data,test_data):
    pass

## Evaluate your model on the validation set
Return the accuracy of your model

In [None]:
def evaluate(model,validation_data):
    pass

## Display the results
Show graphs and metrics of your model accuracy

In [None]:
def results():
    pass

## Train and evaluate your model here
Call the functions to get the data, train the model, and evaluate it on the validation data