-
Notifications
You must be signed in to change notification settings - Fork 0
/
stockpricepredictorver3 (2).py
184 lines (149 loc) · 5.18 KB
/
stockpricepredictorver3 (2).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# -*- coding: utf-8 -*-
"""StockPricePredictorVer3.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1pc9He62ArZrtPHfnC5c0m_t4VwgnsOIP
"""
# This program predicts closing stock prices using Recurrent Neural Network (RNN), Long Short Term Memory (LSTM)
# Import necessary libraries
!pip install --upgrade pandas # Upgrade of pandas is necessary to use web.DataReader
!pip install --upgrade pandas-datareader # Upgrade of pandas-datareader is necessary to use web.DataReader
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
# Get the stock quote
df = web.DataReader('TSLA', data_source='yahoo', start='2012-01-01', end='2022-04-13')
df
# Get the number of rows and columns in the data set
df.shape
# Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title('Closing Price History')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()
# Create a new dataframe with only the close column
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = math.ceil(len(dataset) * .8)
training_data_len
# Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data
# Create the training data set
# Create the scaled training data set
train_data = scaled_data[0:training_data_len, :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i,0])
y_train.append(train_data[i,0])
if i<=61:
print(x_train)
print(y_train)
print()
# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
x_train.shape
# Reshape the data
# x_train = np.reshape(x_train, (2008, 60, 1))
# x_train.shape[0] = 2008
# x_train.shape[1] = 60
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
# Build the LSTM model
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)
# Create the testing dataset
# Create a new array containing scaled values from index 2008 to 2585
test_data = scaled_data[training_data_len - 60: , :] # 2010 to
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
print(training_data_len)
print(scaled_data.shape)
print(dataset.shape)
print(test_data.shape)
print(x_test.shape)
print(y_test.shape)
# Convert the data to a numpy array
x_test = np.array(x_test)
# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
# Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,8))
plt.title('Stock Price Forecast LSTM Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right') # loc = location
plt.show()
# Show the valid and predictions prices
valid
# Try to predict closing price for next closing price
# Get the quote
apple_quote = web.DataReader('TSLA', data_source='yahoo', start='2012-01-01', end='2022-04-12')
# Create a new dataframe
new_df = apple_quote.filter(['Close'])
# Get the last 60 days closing price values and convert the dataframe to an array
last_60_days = new_df[-60:].values
# Scale the data to be values between 0 and 1
last_60_days_scaled = scaler.transform(last_60_days)
# Create an empty list
X_test = []
# Append the past 60 days
X_test.append(last_60_days_scaled)
# Convert X_test dataset to a numpy array
X_test = np.array(X_test)
# Reshape the data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Get the predicted scaled price
predicted_price = model.predict(X_test)
# Undo the sdcaling
predicted_price = scaler.inverse_transform(predicted_price)
print(predicted_price)
# Get actual price
actual_price = web.DataReader('TSLA', data_source='yahoo', start='2022-04-12', end='2022-04-12')
print(actual_price['Close'])
scaler.scale_
plt.figure(figsize=(16,8))
plt.plot(y_test, 'b', label = 'Original Price')
y_predicted = model.predict(x_test)
scale_factor = 1/0.00081609
y_predicted = y_predicted * scale_factor
plt.plot(y_predicted, 'r', label = 'Predicted Price')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()