In [1]:
import pandas as pd
from etl_resources import sqlite_connection
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandasql as psql
from sklearn.preprocessing import MinMaxScaler
pd.options.mode.chained_assignment = None 

In [2]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM

2021-11-30 19:31:06.425648: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-30 19:31:06.425714: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
def ticker_list():
    
    con = sqlite_connection()
    cur = con.cursor()
    cur.execute('''
    select distinct w.ticker from weekly_prices_clean w
        inner join (select ticker from weekly_prices_clean
        group by ticker
        having max(date) > '2021-01-01') t on t.ticker=w.ticker
        
    ''')
    res = cur.fetchall()
    res = [val[0] for val in res]
    
    return res

In [4]:
def base_df(ticker):
    
    '''
    This function returns the base time series dataframe (date and close)
    '''
    
    con = sqlite_connection()
        
    df = pd.read_sql(f'''select date,close 
    from weekly_prices_clean where ticker='{ticker}' --and date>'2017-12-31' 
    group by date,close
    order by date asc''',con=con)
    
    df = df.filter(['close'])
        
    return df

In [5]:
def mean_abs_perc_err(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [6]:
class TFCallback(tf.keras.callbacks.Callback):
    
    def on_epoch_end(self, epochs, logs={}) :
        threshold = 0.002
        if(logs.get('loss') is not None and logs.get('loss') < threshold) :
            print('\nLoss dropped below {}, cancelling further training'.format(str(threshold)))
            self.model.stop_training = True


In [7]:
def build_lstm(x_train, y_train, x_test, y_test, scaler):
    
    
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    callbacks = TFCallback()
    
    # Train the model
    model.fit(x_train, y_train, batch_size=1, epochs=50, callbacks=[callbacks])
    
    # Convert the data to a numpy array
    x_test = np.array(x_test)

    # Reshape the data
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

    # Get the models predicted price values 
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)

    # Get the root mean squared error (RMSE)
    rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
    mape = mean_abs_perc_err(y_test, predictions)
    print('RMSE:',rmse)
    print('MAPE:',mape)
    
    return predictions
    

In [None]:
def main():
    # following https://www.kaggle.com/faressayah/stock-market-analysis-prediction-using-lstm
    tickers = ticker_list()
    #metrics_list = list()
    
    for ticker in tickers:
        
        print(f"Building model for {ticker}")
        
        #metrics = dict()
        
        # Build the dataset and split
        df = base_df(ticker)
        dataset = df.values
        scaler = MinMaxScaler(feature_range=(0,1))
        scaled_data = scaler.fit_transform(dataset)
        training_data_len = int(np.ceil( len(dataset) * .95 ))
        train_data = scaled_data[0:int(training_data_len), :]
    
        # Split the data into x_train and y_train data sets
        x_train = []
        y_train = []
        #print([i for i in range(100, len(train_data))])
        for i in range(60, len(train_data)):
            x_train.append(train_data[i-60:i, 0])
            y_train.append(train_data[i, 0])
       
        x_train, y_train = np.array(x_train), np.array(y_train)
        x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

        test_data = scaled_data[training_data_len - 60: , :]
        
        # Create the data sets x_test and y_test
        x_test = []
        y_test = dataset[training_data_len:, :]
        for i in range(60, len(test_data)):
            x_test.append(test_data[i-60:i, 0])
    
        # Convert the data to a numpy array
        x_test = np.array(x_test)

        # Reshape the data
        x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
        
        # Train & Test LSTM
        predictions = build_lstm(x_train, y_train, x_test, y_test, scaler)
        
        train = df[:training_data_len]
        valid = df[training_data_len:]
        valid['Predictions'] = predictions
        # Visualize the data
        plt.figure(figsize=(16,6))
        plt.title('Model')
        plt.xlabel('Date', fontsize=18)
        plt.ylabel('Close Price USD ($)', fontsize=18)
        plt.plot(train['close'])
        plt.plot(valid[['close', 'Predictions']])
        plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
        plt.savefig(f'../data/visualization/lstm/{ticker}.png', facecolor='white', transparent=False)
        plt.close('all')
        

main()      

Building model for MMM


2021-11-30 19:31:08.116100: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-11-30 19:31:08.116133: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2021-11-30 19:31:08.116151: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (jameslaptop-Lenovo-Yoga-2-Pro): /proc/driver/nvidia/version does not exist
2021-11-30 19:31:08.116339: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-30 19:31:08.569299: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] N

Epoch 1/50
Epoch 2/50

Loss dropped below 0.002, cancelling further training
RMSE: 5.03580128552323
MAPE: 2.2554948232578567
Building model for BKNG
Epoch 1/50
Epoch 2/50

Loss dropped below 0.002, cancelling further training
RMSE: 196.59756307470087
MAPE: 7.68166606268815
Building model for ABT
Epoch 1/50
Epoch 2/50

Loss dropped below 0.002, cancelling further training
RMSE: 7.758655694677558
MAPE: 5.885838584861403
Building model for ABBV
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50

Loss dropped below 0.002, cancelling further training
RMSE: 2.934389979022893
MAPE: 2.113946991875532
Building model for ACN
Epoch 1/50

Loss dropped below 0.002, cancelling further training
RMSE: 12.406388657225442
MAPE: 3.7185955047244477
Building model for ADBE
Epoch 1/50

Loss dropped below 0.002, cancelling further training
RMSE: 50.224806304511425
MAPE: 7.485850439559546
Building model for MO
Epoch 1/50
Epoch

  plt.figure(figsize=(16,6))


Epoch 1/50
Epoch 2/50

Loss dropped below 0.002, cancelling further training
RMSE: 43.91978731716733
MAPE: 4.622280107740823
Building model for CVX
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50

Loss dropped below 0.002, cancelling further training
RMSE: 4.100141998101349
MAPE: 3.2000552780271474
Building model for CSCO
Epoch 1/50

Loss dropped below 0.002, cancelling further training
RMSE: 2.2816881539163396
MAPE: 3.837694355870708
Building model for C
Epoch 1/50
Epoch 2/50
Epoch 3/50

Loss dropped below 0.002, cancelling further training
RMSE: 3.410352431463528
MAPE: 4.515474446485824
Building model for CL
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

Loss dropped below 0.0