stockpricepredictorver5.py

# -*- coding: utf-8 -*-
"""StockPricePredictorVer5.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/18TaH5Pkq3Y6YtHj7tsPgt7at1AY1poB4
"""

!pip install --upgrade pandas # Upgrade of pandas is necessary to use DataReader
!pip install --upgrade pandas-datareader # Upgrade of pandas-datareader is necessary to use DataReader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data
import datetime
from datetime import date
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Insert Stock data
# start = datetime.now() - relativedelta(years=5) # Get date 5 years ago from today
start = datetime.now() - relativedelta(years=10) # Get date 10 years ago from today
end = date.today() # Get today's date

df = data.DataReader('TSLA', 'yahoo', start, end) # Run necessary pip updates inorder for this code to work
df.head() # Show first 5 rows of data

df.tail() # Show last 5 rows of data

df = df.reset_index() # Replace the date index with a number index starting from 0
df = df.drop(['Date', 'Adj Close'], axis = 1) # Remove Date and Adj Close columns
df.head()

plt.plot(df.Close) # Display stock as plot on a graph

df

# Rolling function for 100 day moving average
ma100 = df.Close.rolling(100).mean()
ma100

# Plot ma100 on closing graph
plt.figure(figsize=(12,6))
plt.plot(df.Close)
plt.plot(ma100, 'r')

# Rolling function for 200 day moving average
ma200 = df.Close.rolling(200).mean()
ma200

# Plot ma100 on closing graph
plt.figure(figsize=(12,6))
plt.plot(df.Close)
plt.plot(ma100, 'r')
plt.plot(ma200, 'g')

df.shape # Display number of rows and columns

# Split data into training and testing

# df['Close] is the column selected
# [0:split_percent] The 0 is the starting row index
# The colon symbol ':' tells the system the ending row
# The formula for the split_percent is to tell the row where to end
split_percent = int(len(df)*0.70) # Setting setting split_percent to 70%
max_percent = int(len(df))
data_training = pd.DataFrame(df['Close'][0:split_percent]) # Set training data to first 70 percent
data_testing = pd.DataFrame(df['Close'][split_percent:max_percent]) # Set testing data to last 30 percent

# Display shape of training data and testing data
# Both should add up to df.shape
print(data_training.shape)
print(data_testing.shape)

data_training.head()

data_testing.head()

from sklearn.preprocessing import MinMaxScaler

# Converts the data into a range between 0 to 1, it is a way to normalize the input features/variables 
scaler = MinMaxScaler(feature_range=(0,1))

data_training_array = scaler.fit_transform(data_training) # Transform the data into an array within the feature range
data_training_array

data_training_array.shape

len(data_training_array)

# Divide the data into training x_train and y_train
x_train = []
y_train = []

for i in range(100, len(data_training_array)): # 100 Steps
  x_train.append(data_training_array[i-100:i])
  y_train.append(data_training_array[i,0])

# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

x_train.shape

# Build the LSTM model
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential

model = Sequential()
# x_train.shape[1] is taking the value of index 1 in the x_train.shape which is 100
# The second 1 is just the number of columns were are working with
model.add(LSTM(units=50, activation = 'relu', return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))

model.add(LSTM(units=60, activation = 'relu', return_sequences=True))
model.add(Dropout(0.3))

model.add(LSTM(units=80, activation = 'relu', return_sequences=True))
model.add(Dropout(0.4))

model.add(LSTM(units=120, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(units=1))

model.summary()

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=50)

model.save('keras_model.h5')

data_testing.head()

data_training.tail(100)

past_100_days = data_training.tail(100)

final_df = past_100_days.append(data_testing, ignore_index=True)

final_df.head()

input_data = scaler.fit_transform(final_df)
input_data

input_data.shape

# Create the data sets x_test and y_test
x_test = []
y_test = []

for i in range(100, len(input_data)):
  x_test.append(input_data[i-100:i])
  y_test.append(input_data[i,0])

x_test, y_test = np.array(x_test), np.array(y_test)
print(x_test.shape)
print(y_test.shape)

# Make prediction

y_predicted = model.predict(x_test)

y_predicted.shape

y_test

y_predicted

scaler.scale_

scale_factor = 1/0.00093567
y_predicted = y_predicted * scale_factor
y_test = y_test * scale_factor

plt.figure(figsize=(12,6))
plt.plot(y_test, 'b', label = 'Original Price')
plt.plot(y_predicted, 'r', label = 'Predicted Price')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()