In [7]:
import re
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [8]:
def get_stockDF(name):
    """
    For real-time, only need to modify this function to get real-time dataframe
    """
    df = pd.read_csv(name)
    df = df.iloc[::-1]
    del df['Unnamed: 0']
    return df
df = get_stockDF("GOOGL_day.csv")

In [9]:
def preprocess(df):
    scaler = MinMaxScaler(feature_range=(0, 1))
    y = df['close']
    y = y.values.reshape(-1,1)
    scaler = scaler.fit(y)
    y = scaler.transform(y)
    return y, scaler
    
stock, scaler = preprocess(df)

In [10]:
n_lookback = 60  # length of input sequences (lookback period)
n_forecast = 30  # length of output sequences (forecast period)

def generate_XY(stock):
    """
    This function will generate X,Y as X train and Y train
    Will be modified to be updated regularly
    """
    X = []
    Y = []

    for i in range(n_lookback, (len(stock)) - n_forecast + 1):
        X.append(stock[i - n_lookback: i])
        Y.append(stock[i: i + n_forecast])

    X = np.array(X)
    Y = np.array(Y)
    return X,Y

def train_test_split():
    from sklearn.model_selection import train_test_split
    X,Y = generate_XY(stock)
    trainX, testX, trainY, testY = train_test_split(X, Y, test_size = 0.025, random_state = 0)
    return trainX, testX, trainY, testY


In [11]:
def get_newStockDF():
    return None

In [12]:

def train(X,Y):
    """
    Train model
    """
    np.random.seed(1234)
    tf.random.set_seed(1234)
    model = Sequential()
    model.add(LSTM(units=50,return_sequences=True,input_shape=(n_lookback, 1), activation = 'relu'))
    model.add(LSTM(units=50, return_sequences= True, activation = 'relu'))
    model.add(LSTM(units=30,return_sequences=True, activation = 'relu'))
    model.add(LSTM(units=50, activation = 'relu'))
    model.add(Dense(n_forecast))
    model.compile(optimizer='adam',loss='mean_squared_error')
    model.fit(X, Y,epochs=5,batch_size=32)
    model.save('traintest.h5')

def eval_train(trainX, trainY):
    model = tf.keras.models.load_model('traintest.h5')
    model.evaluate(trainX, trainY, batch_size = 32)
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    testPredict = scaler.inverse_transform(testPredict)

def eval_test(testX, testY):
    model = tf.keras.models.load_model('traintest.h5')
    model.evaluate(testX, testY, batch_size = 32)

def run(stock):
    """
    Run the training algorithm on the historical data
    """
    trainX, testX, trainY, testY = train_test_split()
    train(trainX, trainY)

run(stock)

Epoch 1/5


2022-07-19 15:15:21.673527: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
# trainX, trainY, testX, testY = train_test_split()
# eval_train(trainX, trainY)

In [14]:
# eval_test(testX, testY)

In [15]:
def gen_forecasts(stock):
    """
    Generate forecasts from the updated model
    @stock: preprocessed stock variable
    @return: Y_ the forecast prices for the next 30 days
    """

    model = tf.keras.models.load_model('traintest.h5')

    X_ = stock[-n_lookback:]  # last available input sequence
    X_ = X_.reshape(1, n_lookback, 1)

    Y_ = model.predict(X_).reshape(-1, 1)
    Y_ = scaler.inverse_transform(Y_)

    return Y_

In [20]:
trainX, testX, trainY, testY = train_test_split()
testPredict = gen_forecasts(stock)
testPredict = testPredict.flatten()
testY = testY.flatten()
testY = testY.reshape(-1,1)


testY = scaler.inverse_transform(testY)
# print(testY)
testPredict = testPredict.reshape(-1,1)

print(len(testY))
# print('Test Root Mean Squared Error:',np.sqrt(mean_squared_error(testY, testPredict)))


900


In [None]:
# """Data frame of the forecasts
# #Append two of them to the variable results
# Plot both
# """
# def gen_dfPast(df):
#     df_past = df[['close']]
#     df_past['date'] = pd.to_datetime(df['date'])
#     df_past['forecast'] = np.nan
#     return df_past
# def gen_dfFuture(df_past, forecast):
#     df_future = pd.DataFrame(columns=['close', 'date', 'forecast'])
#     df_future['date'] = pd.date_range(start=df_past['date'].iloc[-1] + pd.Timedelta(minutes=1), periods=n_forecast)
#     df_future['forecast'] = forecast.flatten()
#     results = df_past.append(df_future).set_index('date')
#     return results, df_future

# def plot_pred(results):
#     plt.figure(figsize=(16,6))
#     plt.title('30 days predictions')
#     plt.plot(results)
#     #Uncomment this when real-time is fully implemented
#     # plt.savefig('real1.png', dpi = 300, bbox_inches = 'tight')

In [None]:
# def plot_pred2(df_future):
#     future = df_future.set_index('date')
#     future.plot()

In [None]:
# def run_stock():
#     forecast = gen_forecasts(stock)
#     dfPast = gen_dfPast(df)
#     results, dfFuture = gen_dfFuture(dfPast, forecast)
#     plot_pred(results)
#     plot_pred2(dfFuture)
# run_stock()