In [1]:
import pandas_datareader as web
import datetime as dt
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, LSTM

In [68]:
# 1. Prepare the data

In [3]:
stock = stock

In [4]:
train_start_date = dt.datetime(2012, 1, 1) 
train_end_date = dt.datetime(2021, 1, 1)

In [5]:
# Obtain stock data from yahoo finance
stock_prices = web.DataReader(stock, 'yahoo', train_start_date, train_end_date)

## Descriptive methods visualization

In [20]:
plt.plot(stock_prices["Close"])
plt.title(f"Line graph for {stock} stock prices")
plt.ylabel("Price")
plt.xlabel("Time")

In [2]:
plt.hist(stock_prices["Close"])
plt.title(f"Histogram for {stock} stock prices")
plt.ylabel("Number of days")
plt.xlabel("Price")

In [3]:
plt.bar(stock_prices.index, stock_prices["Close"])
plt.title(f"Bar graph for {stock} stock prices")
plt.ylabel("Price")
plt.xlabel("Time")

In [7]:
# Scale the data to fit between 0 and 1
scaler = MinMaxScaler(feature_range=(0,1))
scaled_close_prices = scaler.fit_transform(stock_prices['Close'].values.reshape(-1, 1))

In [8]:
# Create empty arrays for x and y training data
x_train = []
y_train = []

In [9]:
# Number of days model will look in the past when training
training_days = 30

In [10]:
# Load training data
for x in range(training_days, len(scaled_close_prices)):
    x_train.append(scaled_close_prices[x-training_days:x, 0])
    y_train.append(scaled_close_prices[x, 0]) # Predicted day

In [11]:
# Convert to numpy array
x_train = np.array(x_train)
y_train = np.array(y_train)

In [12]:
# Reshape x_train
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

In [26]:
# 2. Choose and build model

In [13]:
# Sequential model
model = Sequential()

In [14]:
# Build model
model.add(LSTM(units=45, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

## Training the model with the stock data

In [21]:
# 3. Fit the model

# Fit model to the training data
model.fit(x_train, y_train, epochs=20, batch_size=24)

In [None]:
# 4. Evaluate model test accuracy

In [16]:
# Analze accuracy of ML model on training data ~80% of total data

# y_train holds trained data

# y_inverse holds data inverse transformed
y_inverse = y_train
y_inverse = pd.DataFrame(y_inverse)
y_inverse = scaler.inverse_transform(y_inverse)

index_y = 0
index_close = 30
accuracy = 0
accuracy_array = []

# Add accuracy values to accuracy_array
for x in y_inverse:
    if (y_inverse[index_y] == stock_prices["Close"][index_close]):
        accuracy_array.append(1)
        index_y += 1
        index_close += 1
    else:
        accuracy_array.append(stock_prices["Close"][index_close]/y_inverse[index_y])
        index_y += 1
        index_close += 1
        
# Add accuracy values and divide by total
accuracy_index = 0

for x in accuracy_array:
    accuracy += accuracy_array[0]
    accuracy_index += 1
    
accuracy = accuracy / len(accuracy_array)


## Model accuracy for training and testing data sets

In [22]:
print(f"Training accuracy = {(accuracy)}")

In [17]:
# Run model on test data ~20% of total data

# Load test data
test_start_date = dt.datetime(2021, 1, 1)
test_end_date = dt.datetime.now()
test_data = web.DataReader(stock, 'yahoo', test_start_date, test_end_date)
actual_prices = test_data['Close'].values
total_price_set = pd.concat((stock_prices['Close'], test_data['Close']), axis=0)

# Scale the data
test_inputs = total_price_set[len(total_price_set) - len(test_data) - training_days:].values
test_inputs = test_inputs.reshape(-1, 1)
test_inputs = scaler.transform(test_inputs)

# Create empty array for x test 
x_test = []

# Training days = 30

# Load test data
for x in range(training_days, len(test_inputs)):
    x_test.append(test_inputs[x-training_days:x, 0])
    
# Convert to numpy array
x_test = np.array(x_test)

# Reshape x_test
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# model
predicted_test_prices = model.predict(x_test)

# Reverse the scaler
predicted_test_prices = scaler.inverse_transform(predicted_test_prices)

In [23]:
# Analyze model accuracy on test data

index_test = 0
accuracy_test = 0
accuracy_test_array = []

# Add accuracy values to accuracy array
for x in predicted_test_prices:
    if (predicted_test_prices[index_test] == actual_prices[index_test]):
        accuracy_test_array.append(1)
        index_test += 1
    else:
        if (predicted_test_prices[index_test] > actual_prices[index_test]):
            accuracy_test_array.append(actual_prices[index_test]/predicted_test_prices[index_test])
        if (predicted_test_prices[index_test] < actual_prices[index_test]):
            accuracy_test_array.append(predicted_test_prices[index_test]/actual_prices[index_test])
        index_test += 1
        
# Add accuracy values and divide by total
accuracy_test_index = 0

for x in accuracy_test_array:
    accuracy_test += accuracy_test_array[accuracy_test_index]
    accuracy_test_index += 1
    
accuracy_test = accuracy_test / 453 # Length of test data
print(f"Testing accuracy = {round(float(accuracy_test[0]), 2)}")

In [33]:
# Graph non-descriptive model

## Non-descriptive graph

In [7]:
plt.plot(predicted_test_prices, color="blue", label="Predicted Prices")
plt.plot(actual_prices, color="orange", label="Actual Prices")
plt.legend()
plt.title(f"Non-descriptive graph for predicted {stock} stock prices")
plt.xlabel("Time")
plt.ylabel("Price")

In [45]:
# 6. Experiment model by predicting tomorrows stock price

In [5]:
# Run model to predict tomorows stock price

# Get current prices and reshape
current_prices = [test_inputs[len(test_inputs) + 1 - training_days:len(test_inputs+1), 0]]
current_prices = np.array(current_prices)
current_prices = np.reshape(current_prices, (current_prices.shape[0], current_prices.shape[1], 1))

# Predict tomorrows stock price by fitting current prices to model
prediction = model.predict(current_prices)
prediction = scaler.inverse_transform(prediction)
tomorrows_price = float(prediction[0][0])

## Tomorrows Predicted Stock Price

In [66]:
print(f"Tomorrows stock price for {stock} is: ${round(tomorrows_price, 2)}")