-
Notifications
You must be signed in to change notification settings - Fork 0
/
stockpricepredictorver5.py
182 lines (127 loc) · 4.83 KB
/
stockpricepredictorver5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# -*- coding: utf-8 -*-
"""StockPricePredictorVer5.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18TaH5Pkq3Y6YtHj7tsPgt7at1AY1poB4
"""
!pip install --upgrade pandas # Upgrade of pandas is necessary to use DataReader
!pip install --upgrade pandas-datareader # Upgrade of pandas-datareader is necessary to use DataReader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as data
import datetime
from datetime import date
from datetime import datetime
from dateutil.relativedelta import relativedelta
# Insert Stock data
# start = datetime.now() - relativedelta(years=5) # Get date 5 years ago from today
start = datetime.now() - relativedelta(years=10) # Get date 10 years ago from today
end = date.today() # Get today's date
df = data.DataReader('TSLA', 'yahoo', start, end) # Run necessary pip updates inorder for this code to work
df.head() # Show first 5 rows of data
df.tail() # Show last 5 rows of data
df = df.reset_index() # Replace the date index with a number index starting from 0
df = df.drop(['Date', 'Adj Close'], axis = 1) # Remove Date and Adj Close columns
df.head()
plt.plot(df.Close) # Display stock as plot on a graph
df
# Rolling function for 100 day moving average
ma100 = df.Close.rolling(100).mean()
ma100
# Plot ma100 on closing graph
plt.figure(figsize=(12,6))
plt.plot(df.Close)
plt.plot(ma100, 'r')
# Rolling function for 200 day moving average
ma200 = df.Close.rolling(200).mean()
ma200
# Plot ma100 on closing graph
plt.figure(figsize=(12,6))
plt.plot(df.Close)
plt.plot(ma100, 'r')
plt.plot(ma200, 'g')
df.shape # Display number of rows and columns
# Split data into training and testing
# df['Close] is the column selected
# [0:split_percent] The 0 is the starting row index
# The colon symbol ':' tells the system the ending row
# The formula for the split_percent is to tell the row where to end
split_percent = int(len(df)*0.70) # Setting setting split_percent to 70%
max_percent = int(len(df))
data_training = pd.DataFrame(df['Close'][0:split_percent]) # Set training data to first 70 percent
data_testing = pd.DataFrame(df['Close'][split_percent:max_percent]) # Set testing data to last 30 percent
# Display shape of training data and testing data
# Both should add up to df.shape
print(data_training.shape)
print(data_testing.shape)
data_training.head()
data_testing.head()
from sklearn.preprocessing import MinMaxScaler
# Converts the data into a range between 0 to 1, it is a way to normalize the input features/variables
scaler = MinMaxScaler(feature_range=(0,1))
data_training_array = scaler.fit_transform(data_training) # Transform the data into an array within the feature range
data_training_array
data_training_array.shape
len(data_training_array)
# Divide the data into training x_train and y_train
x_train = []
y_train = []
for i in range(100, len(data_training_array)): # 100 Steps
x_train.append(data_training_array[i-100:i])
y_train.append(data_training_array[i,0])
# Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
x_train.shape
# Build the LSTM model
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential
model = Sequential()
# x_train.shape[1] is taking the value of index 1 in the x_train.shape which is 100
# The second 1 is just the number of columns were are working with
model.add(LSTM(units=50, activation = 'relu', return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))
model.add(LSTM(units=60, activation = 'relu', return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=80, activation = 'relu', return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(units=120, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=50)
model.save('keras_model.h5')
data_testing.head()
data_training.tail(100)
past_100_days = data_training.tail(100)
final_df = past_100_days.append(data_testing, ignore_index=True)
final_df.head()
input_data = scaler.fit_transform(final_df)
input_data
input_data.shape
# Create the data sets x_test and y_test
x_test = []
y_test = []
for i in range(100, len(input_data)):
x_test.append(input_data[i-100:i])
y_test.append(input_data[i,0])
x_test, y_test = np.array(x_test), np.array(y_test)
print(x_test.shape)
print(y_test.shape)
# Make prediction
y_predicted = model.predict(x_test)
y_predicted.shape
y_test
y_predicted
scaler.scale_
scale_factor = 1/0.00093567
y_predicted = y_predicted * scale_factor
y_test = y_test * scale_factor
plt.figure(figsize=(12,6))
plt.plot(y_test, 'b', label = 'Original Price')
plt.plot(y_predicted, 'r', label = 'Predicted Price')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()