In [1]:
# Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from google.cloud import storage
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# Function to get data

def get_stock_data_from_gcp(nrows=10000, local=False, optimize=False, **kwargs):
    """method to get the training data (or a portion of it) from google cloud bucket"""
    # Add Client() here
    client = storage.Client()
    if local:
        path = "data/data_data_10Mill.csv"
    else:
        path = "gs://stock-news-pred-bucket/sorted_final_stock_prices"
    df = pd.read_csv(path)#add nrows after if you want to select a specific number of row
    return df

In [3]:
df = get_stock_data_from_gcp()

In [4]:
df

Unnamed: 0,date,AAPL,MSFT,GOOGL,AMZN
0,1999-11-01,77.62,92.37,,69.13
1,1999-11-02,80.25,92.56,,66.44
2,1999-11-03,81.50,92.00,,65.81
3,1999-11-04,83.62,91.75,,63.06
4,1999-11-05,88.31,91.56,,64.94
...,...,...,...,...,...
5553,2021-11-24,161.94,337.91,2922.40,3580.41
5554,2021-11-26,156.81,329.68,2843.66,3504.56
5555,2021-11-29,160.24,336.63,2910.61,3561.57
5556,2021-11-30,165.30,330.59,2837.95,3507.07


In [5]:
# Function to scale data

def scale_stock_data(df, ticker):
    X = df[[ticker]]
    sc = MinMaxScaler().fit(X)
    X_sc = sc.fit_transform(X)
    return sc, X_sc

In [6]:
# Function to split scaled data

def split_stock_data(X_sc, train_size):
    index = round(train_size*X_sc.shape[0])
    X_sc_train = X_sc[:index]
    X_sc_test = X_sc[index:]
    return X_sc_train, X_sc_test

In [7]:
# Function to reshape scaled train data

def reshape_train_data(X_sc_train, obs):
    X_train = []
    y_train = []
    for i in range(obs, X_sc_train.shape[0]):
        X_train.append(X_sc_train[i-obs:i, 0])
        y_train.append(X_sc_train[i, 0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    return X_train, y_train

In [8]:
# Function to reshape scaled test data

def reshape_test_data(X_sc_test, obs):
    X_test = []
    y_test = []
    for i in range(obs, X_sc_test.shape[0]):
        X_test.append(X_sc_test[i-obs:i, 0])
        y_test.append(X_sc_test[i, 0])
    X_test, y_test = np.array(X_test), np.array(y_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    return X_test, y_test

In [9]:
# Function to inverse transform

def inverse_transformer(y):
    y = np.c_[y, np.ones(len(y))]
    y = scaler.inverse_transform(y)
    y = y[:,0]
    return y

In [10]:
# Function to build the lstm model

def lstm_model():
    model = Sequential()
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50, return_sequences = True))
    model.add(Dropout(0.2))
    model.add(LSTM(units = 50))
    model.add(Dropout(0.2))
    model.add(Dense(units = 1))
    model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    return model

In [11]:
# Function to train the model

def train_model(X_train, y_train):   
    model = lstm_model()
    es = EarlyStopping(patience=30, restore_best_weights=True)
    model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=32, callbacks=[es], verbose=1)
    return model

In [12]:
# Function to predict results

def predict_model(ticker):
    sc, X_sc = scale_stock_data(df, ticker) 
    X_sc_train, X_sc_test = split_stock_data(X_sc, 0.8)
    X_train, y_train = reshape_train_data(X_sc_train, 30)
    X_test, y_test = reshape_test_data(X_sc_test, 30)
    #sc = MinMaxScaler().fit(X)
    model = train_model(X_train, y_train)
    predicted_stock_price = model.predict(X_test)
    predicted_stock_price = sc.inverse_transform(predicted_stock_price)
    return predicted_stock_price

In [13]:
# New function to train model for then extracting predictions

def model_training(ticker):
    sc, X_sc = scale_stock_data(df, ticker) 
    X_sc_train, X_sc_test = split_stock_data(X_sc, 0.8)
    X_train, y_train = reshape_train_data(X_sc_train, 30)
    X_test, y_test = reshape_test_data(X_sc_test, 30)
    model = train_model(X_train, y_train)
    return model

In [14]:
# New reshape function for final predictions on X

def reshape_data(X_sc, obs):
    X_final = []
    for i in range(obs, X_sc.shape[0]):
        X_final.append(X_sc[i-obs:i, 0])
    X_final = np.array(X_final)
    X_final = np.reshape(X_final, (X_final.shape[0], X_final.shape[1], 1))
    return X_final

In [15]:
#model_amzn = model_training('AMZN')

In [16]:
#sc, X_sc = scale_stock_data(df, 'AMZN')

In [17]:
#X_sc.shape

In [18]:
#X_final = reshape_data(X_sc, 30)

In [19]:
#X_final.shape

In [20]:
#final_predictions = model.predict(X_final)

In [21]:
#final_predictions

In [22]:
#final_predictions = sc.inverse_transform(final_predictions)
final_predictions

NameError: name 'final_predictions' is not defined

In [None]:
#final_predictions.shape

In [None]:
#df = df.drop(df.head(30).index)

In [None]:
#df['predictions'] = final_predictions

In [None]:
#df

In [None]:
#df.to_csv('ticker_predictions.csv', index=False)

In [None]:
#plt.figure(figsize=(20, 6))
#plt.plot(df['AMZN'], color = 'black', label = 'AMZN Stock Price')
#plt.plot(df['predictions'], color = 'green', label = 'Predicted AMZN Stock Price')
#plt.title('AMZN Stock Price Prediction')
#plt.xlabel('Time')
#plt.ylabel('AMZN Stock Price')
#plt.legend()
#plt.show()

In [None]:
#from keras.models import load_model

#model.save('stock_price.h5')

In [None]:
#model_test = load_model('lstm_model_stock_price.h5')

In [None]:
#sc, X_sc = scale_stock_data(df, 'AMZN')

In [None]:
#X_sc_train, X_sc_test = split_stock_data(X_sc, 0.8)

In [None]:
#X_train, y_train = reshape_train_data(X_sc_train, 30)

In [None]:
#X_test, y_test = reshape_test_data(X_sc_test, 30)

In [None]:
#y_pred = model_test.predict(X_test)

In [None]:
#y_pred