In [1]:
import pandas as pd
from etl_resources import sqlite_connection
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandasql as psql
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
pd.options.mode.chained_assignment = None 

2021-12-01 10:53:00.817396: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-01 10:53:00.817434: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
def ticker_list():
    
    con = sqlite_connection()
    cur = con.cursor()
    cur.execute('''
    select distinct w.ticker from weekly_prices_clean w
        inner join (select ticker from weekly_prices_clean
        w
        group by ticker
        having max(date) > '2021-01-01') t on t.ticker=w.ticker
        
    ''')
    res = cur.fetchall()
    res = [val[0] for val in res]
    
    return res

In [3]:
def base_df(ticker):
    
    '''
    This function returns the base time series dataframe (date and close)
    '''
    
    con = sqlite_connection()
        
    df = pd.read_sql(f'''select date,close 
    from weekly_prices_clean where ticker='{ticker}' --and date>'2017-12-31' 
    group by date,close
    order by date asc''',con=con)
    
    df = df.filter(['close'])
        
    return df

In [4]:
def mean_abs_perc_err(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [5]:
def perc_change(x, y):
    
    x_array_length = len(x)
    x_last_element = x[x_array_length - 1]
    
    y_array_length = len(y)
    y_last_element = y[y_array_length - 1]
    
    return (y_last_element - x_last_element) / x_last_element

In [6]:
class TFCallback(tf.keras.callbacks.Callback):
    
    def on_epoch_end(self, epochs, logs={}) :
        threshold = 0.002
        if(logs.get('loss') is not None and logs.get('loss') < threshold) :
            print('\nLoss dropped below {}, cancelling further training'.format(str(threshold)))
            self.model.stop_training = True


In [7]:
def build_lstm(x_train, y_train, x_test, y_test, scaler):
    
    
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    callbacks = TFCallback()
    
    # Train the model
    model.fit(x_train, y_train, batch_size=1, epochs=15, callbacks=[callbacks])
    
    # Convert the data to a numpy array
    x_test = np.array(x_test)

    # Reshape the data
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

    # Get the models predicted price values 
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)
    
    return predictions
    

In [8]:
def main():
    # following https://www.kaggle.com/faressayah/stock-market-analysis-prediction-using-lstm
    tickers = ticker_list()
    metrics_list = list()
    
    for ticker in tickers:
        
        print(f"Building model for {ticker}")
        
        metrics = dict()
        
        # Build the dataset and split
        df = base_df(ticker)
        dataset = df.values
        scaler = MinMaxScaler(feature_range=(0,1))
        scaled_data = scaler.fit_transform(dataset)
        training_data_len = int(np.ceil( len(dataset) * .95 ))
        train_data = scaled_data[0:int(training_data_len), :]
    
        # Split the data into x_train and y_train data sets
        x_train = []
        y_train = []
        #print([i for i in range(100, len(train_data))])
        for i in range(60, len(train_data)):
            x_train.append(train_data[i-60:i, 0])
            y_train.append(train_data[i, 0])
       
        x_train, y_train = np.array(x_train), np.array(y_train)
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

        test_data = scaled_data[training_data_len - 60: , :]
        
        # Create the data sets x_test and y_test
        x_test = []
        y_test = dataset[training_data_len:, :]
        for i in range(60, len(test_data)):
            x_test.append(test_data[i-60:i, 0])
    
        # Convert the data to a numpy array
        x_test = np.array(x_test)

        # Reshape the data
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
        
        # Train & Test LSTM
        predictions = build_lstm(x_train, y_train, x_test, y_test, scaler)
        
        train = df[:training_data_len]
        valid = df[training_data_len:]
        valid['Predictions'] = predictions
        
        metrics['ticker'] = ticker
        
        rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
        mape = mean_abs_perc_err(y_test, predictions)
        mse = mean_squared_error(y_true = y_test, y_pred = predictions)
        mae = mean_absolute_error(y_true = y_test, y_pred = predictions)
        #yhat_perc_change = perc_change(x_train, predictions)
        #y_perc_change = perc_change(x_train, valid)

        metrics['RMSE'] = rmse
        metrics['MAPE'] = mape
        metrics['MSE'] = mse
        metrics['MAE'] = mae
        #metrics['yhat_perc_change'] = yhat_perc_change
        #metrics['y_perc_change'] = y_perc_change
        
        metrics_list.append(metrics)
        
        print('\n',ticker,': RMSE:',round(rmse,2),'MAPE:',round(mape,2),'MSE:',round(mse,2),'MAE:',round(mae,2))
                                             
        # Visualize the data
        plt.figure(figsize=(16,6))
        plt.title('Model')
        plt.xlabel('Date', fontsize=18)
        plt.ylabel('Close Price USD ($)', fontsize=18)
        plt.plot(train['close'])
        plt.plot(valid[['close', 'Predictions']])
        plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
        plt.savefig(f'../data/visualization/lstm/{ticker}.png', facecolor='white', transparent=False)
        plt.close('all')
    
        total_metrics = pd.DataFrame(metrics_list)
        total_metrics.to_csv('forecast_metrics_lstm.csv')

main()      

Building model for SPG


2021-12-01 10:53:02.430075: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-12-01 10:53:02.430110: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-12-01 10:53:02.430131: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (jameslaptop-Lenovo-Yoga-2-Pro): /proc/driver/nvidia/version does not exist
2021-12-01 10:53:02.430396: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-01 10:53:02.920362: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] N

Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 SPG : RMSE: 8.33 MAPE: 5.94 MSE: 69.4 MAE: 6.7
Building model for SO
Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 SO : RMSE: 1.51 MAPE: 1.89 MSE: 2.28 MAE: 1.18
Building model for SBUX
Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 SBUX : RMSE: 5.17 MAPE: 3.9 MSE: 26.75 MAE: 4.24
Building model for TGT
Epoch 1/15

Loss dropped below 0.002, cancelling further training

 TGT : RMSE: 16.38 MAPE: 6.78 MSE: 268.34 MAE: 14.25
Building model for TXN
Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 TXN : RMSE: 5.39 MAPE: 2.4 MSE: 29.05 MAE: 4.3
Building model for ALL
Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 ALL : RMSE: 9.38 MAPE: 6.96 MSE: 87.96 MAE: 8.42
Building model for BA
Epoch 1/15
Epoch 2/15

Loss dropped below 0.002, cancelling further training

 BA : RMSE: 22.81 MAPE: 8.3 MSE