Skip to content
Permalink
Browse files

added python files

  • Loading branch information...
VivekPa committed Sep 10, 2018
1 parent 6685499 commit e3f3ba86aef59409ca0b118827dd1093deffb0e4
Showing with 224 additions and 0 deletions.
  1. +45 −0 LSTM_model.py
  2. +38 −0 MLP_model.py
  3. +35 −0 backtest.py
  4. +63 −0 get_prices.py
  5. +43 −0 preprocessing.py
@@ -0,0 +1,45 @@
import pandas as pd
import numpy as np
import get_prices as hist
import tensorflow as tf
from preprocessing import DataProcessing
import pandas_datareader.data as pdr
import fix_yahoo_finance as fix
fix.pdr_override()

start = "2003-01-01"
end = "2018-01-01"

hist.get_stock_data("AAPL", start_date=start, end_date=end)
process = DataProcessing("stock_prices.csv", 0.9)
process.gen_test(10)
process.gen_train(10)

X_train = process.X_train.reshape((3379, 10, 1)) / 200
Y_train = process.Y_train / 200

X_test = process.X_test.reshape(359, 10, 1) / 200
Y_test = process.Y_test / 200

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(20, input_shape=(10, 1), return_sequences=True))
model.add(tf.keras.layers.LSTM(20))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.relu))

model.compile(optimizer="adam", loss="mean_squared_error")

model.fit(X_train, Y_train, epochs=50)

print(model.evaluate(X_test, Y_test))

data = pdr.get_data_yahoo("AAPL", "2017-12-19", "2018-01-03")
stock = data["Adj Close"]
X_predict = np.array(stock).reshape((1, 10, 1)) / 200

print(model.predict(X_predict)*200)

# If instead of a full backtest, you just want to see how accurate the model is for a particular prediction, run this:
# data = pdr.get_data_yahoo("AAPL", "2017-12-19", "2018-01-03")
# stock = data["Adj Close"]
# X_predict = np.array(stock).reshape((1, 10)) / 200
# print(model.predict(X_predict)*200)
@@ -0,0 +1,38 @@
import get_prices as hist
import tensorflow as tf
from preprocessing import DataProcessing
# import pandas_datareader.data as pdr if using the single test below
import fix_yahoo_finance as fix
fix.pdr_override()

start = "2003-01-01"
end = "2018-01-01"

hist.get_stock_data("AAPL", start_date=start, end_date=end)
process = DataProcessing("stock_prices.csv", 0.9)
process.gen_test(10)
process.gen_train(10)

X_train = process.X_train / 200
Y_train = process.Y_train / 200

X_test = process.X_test / 200
Y_test = process.Y_test / 200

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(100, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(100, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.relu))

model.compile(optimizer="adam", loss="mean_squared_error")

model.fit(X_train, Y_train, epochs=100)

print(model.evaluate(X_test, Y_test))

# If instead of a full backtest, you just want to see how accurate the model is for a particular prediction, run this:
# data = pdr.get_data_yahoo("AAPL", "2017-12-19", "2018-01-03")
# stock = data["Adj Close"]
# X_predict = np.array(stock).reshape((1, 10)) / 200
# print(model.predict(X_predict)*200)

@@ -0,0 +1,35 @@
import pandas_datareader.data as pdr
import fix_yahoo_finance as fix
import numpy as np
fix.pdr_override()


def back_test(strategy, seq_len, ticker, start_date, end_date, dim):
"""
A simple back test for a given date period
:param strategy: the chosen strategy. Note to have already formed the model, and fitted with training data.
:param seq_len: length of the days used for prediction
:param ticker: company ticker
:param start_date: starting date
:type start_date: "YYYY-mm-dd"
:param end_date: ending date
:type end_date: "YYYY-mm-dd"
:param dim: dimension required for strategy: 3dim for LSTM and 2dim for MLP
:type dim: tuple
:return: Percentage errors array that gives the errors for every test in the given date range
"""
data = pdr.get_data_yahoo(ticker, start_date, end_date)
stock_data = data["Adj Close"]
errors = []
for i in range((len(stock_data)//10)*10 - seq_len - 1):
x = np.array(stock_data.iloc[i: i + seq_len, 1]).reshape(dim) / 200
y = np.array(stock_data.iloc[i + seq_len + 1, 1]) / 200
predict = strategy.predict(x)
while predict == 0:
predict = strategy.predict(x)
error = (predict - y) / 100
errors.append(error)
total_error = np.array(errors)
print(f"Average error = {total_error.mean()}")
# If you want to see the full error list then print the following statement
# print(errors)
@@ -0,0 +1,63 @@
import pandas_datareader.data as pdr
import fix_yahoo_finance as fix
import time
fix.pdr_override()


def get_stock_data(ticker, start_date, end_date):
"""
Gets historical stock data of given tickers between dates
:param ticker: company, or companies whose data is to fetched
:type ticker: string or list of strings
:param start_date: starting date for stock prices
:type start_date: string of date "YYYY-mm-dd"
:param end_date: end date for stock prices
:type end_date: string of date "YYYY-mm-dd"
:return: stock_data.csv
"""
i = 1
try:
all_data = pdr.get_data_yahoo(ticker, start_date, end_date)
except ValueError:
print("ValueError, trying again")
i += 1
if i < 5:
time.sleep(10)
get_stock_data(ticker, start_date, end_date)
else:
print("Tried 5 times, Yahoo error. Trying after 2 minutes")
time.sleep(120)
get_stock_data(ticker, start_date, end_date)
stock_data = all_data["Adj Close"]
stock_data.to_csv(f"{ticker}_prices.csv")


def get_sp500(start_date, end_date):
"""
Gets sp500 price data
:param start_date: starting date for sp500 prices
:type start_date: string of date "Y-m-d"
:param end_date: end date for sp500 prices
:type end_date: string of date "Y-m-d"
:return: sp500_data.csv
"""
i = 1
try:
sp500_all_data = pdr.get_data_yahoo("SPY", start_date, end_date)
except ValueError:
print("ValueError, trying again")
i += 1
if i < 5:
time.sleep(10)
get_stock_data(start_date, end_date)
else:
print("Tried 5 times, Yahoo error. Trying after 2 minutes")
time.sleep(120)
get_stock_data(start_date, end_date)
sp500_data = sp500_all_data["Adj Close"]
sp500_data.to_csv("sp500_data.csv")


if __name__ == "__main__":
get_stock_data("AAPL", "2018-05-01", "2018-06-01")
# get_sp500("2018-05-01", "2018-06-01")
@@ -0,0 +1,43 @@
import pandas as pd
import numpy as np


class DataProcessing:
def __init__(self, file, train):
self.file = pd.read_csv(file)
self.train = train
self.i = int(self.train * len(self.file))
self.stock_train = self.file[0: self.i]
self.stock_test = self.file[self.i:]
self.input_train = []
self.output_train = []
self.input_test = []
self.output_test = []

def gen_train(self, seq_len):
"""
Generates training data
:param seq_len: length of window
:return: X_train and Y_train
"""
for i in range((len(self.stock_train)//seq_len)*seq_len - seq_len - 1):
x = np.array(self.stock_train.iloc[i: i + seq_len, 1])
y = np.array([self.stock_train.iloc[i + seq_len + 1, 1]], np.float64)
self.input_train.append(x)
self.output_train.append(y)
self.X_train = np.array(self.input_train)
self.Y_train = np.array(self.output_train)

def gen_test(self, seq_len):
"""
Generates test data
:param seq_len: Length of window
:return: X_test and Y_test
"""
for i in range((len(self.stock_test)//seq_len)*seq_len - seq_len - 1):
x = np.array(self.stock_test.iloc[i: i + seq_len, 1])
y = np.array([self.stock_test.iloc[i + seq_len + 1, 1]], np.float64)
self.input_test.append(x)
self.output_test.append(y)
self.X_test = np.array(self.input_test)
self.Y_test = np.array(self.output_test)

0 comments on commit e3f3ba8

Please sign in to comment.
You can’t perform that action at this time.