In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import datetime
%matplotlib inline
import wandb

### Step 0. Loading dataset

In [None]:
run = wandb.init()
artifact = run.use_artifact('ytdteam/ytd-cassandra-forecast/meta-stock-price:v0', type='raw_data')
artifact_dir = artifact.download()

In [None]:
stock = pd.read_csv('artifacts/meta-stock-price-v0/meta.us.txt')
stock.columns = ['TICKER','PER','DATE','TIME','OPEN','HIGH','LOW','CLOSE','VOL','OPENINT']
stock['TIME'] = pd.to_datetime(stock['TIME'],format='%H%M%S').dt.time
stock['DATE'] = pd.to_datetime(stock['DATE'],format='%Y%m%d').dt.date

In [None]:

print(stock.isnull().sum())                     
stock.groupby(by=["TIME"]).count()
stock = stock[(stock.TIME!=datetime.time(15,0,0))]
stock['Date-time'] = pd.to_datetime(stock.DATE.astype(str) + ' ' + stock.TIME.astype(str))
stock=stock.set_index(pd.DatetimeIndex(stock['Date-time'].values))
stock.head()                        

In [None]:
data_to_use = stock['CLOSE'].values

In [None]:
print('Total number of data points in the dataset: {}'.format(len(data_to_use)))

### Step 1. Data preprocessing

#### Step 1.1 Scaling data

In [None]:
scaler = StandardScaler()

In [None]:
scaled_dataset = scaler.fit_transform(data_to_use.reshape(-1, 1))

In [None]:
plt.figure(figsize=(12,7), frameon=False, facecolor='brown', edgecolor='blue')
plt.title('Scaled META Stock Price Change from December 2021 to October 2022')
plt.ylabel('Scaled value of stocks')
plt.plot(scaled_dataset, label='Stocks data')
plt.legend()
plt.show()

In [None]:
def window_data(data, window_size):
    X = []
    y = []
    
    i = 0
    while (i + window_size) <= len(data) - 1:
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
        
        i += 1
    assert len(X) ==  len(y)
    return X, y

#### Step 1.2 Windowing the dataset

In [None]:
X, y = window_data(scaled_dataset, 1)

#### Step 1.3 Creating Train and Test sets

In [None]:
X_train  = np.array(X[:1155])
y_train = np.array(y[:1155])

X_test = np.array(X[1155:])
y_test = np.array(y[1155:])

print("X_train size: {}".format(X_train.shape))
print("y_train size: {}".format(y_train.shape))
print("X_test size: {}".format(X_test.shape))
print("y_test size: {}".format(y_test.shape))

### Metrics

In [None]:
def calculate_rmse(y_true, y_pred):
    """
    Calculate the Root Mean Squared Error (RMSE)  
    """
    rmse = np.sqrt(np.mean((y_true-y_pred)**2))                   
    return rmse

In [None]:
def calculate_mape(y_true, y_pred): 
    """
    Calculate the Mean Absolute Percentage Error (MAPE) %
    """
    y_pred, y_true = np.array(y_pred), np.array(y_true)    
    mape = np.mean(np.abs((y_true-y_pred) / y_true))*100    
    return mape

### Let's create the RNN

In [None]:
def LSTM_cell(hidden_layer_size, batch_size,number_of_layers, dropout=True, dropout_rate=0.8):
    
    layer = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(hidden_layer_size)
    
    if dropout:
        layer = tf.compat.v1.nn.rnn_cell.DropoutWrapper(layer, output_keep_prob=dropout_rate)
        
    cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([layer]*number_of_layers)
    
    init_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, init_state

In [None]:
def output_layer(lstm_output, in_size, out_size):
    
    x = lstm_output[:, -1, :]
    print(x)
    weights = tf.Variable(tf.compat.v1.random.truncated_normal([in_size, out_size], stddev=0.05), name='output_layer_weights')
    bias = tf.Variable(tf.zeros([out_size]), name='output_layer_bias')
    
    output = tf.matmul(x, weights) + bias
    return output

In [None]:
def opt_loss(logits, targets, learning_rate, grad_clip_margin, batch_size):
    
    losses = []
    for i in range(targets.get_shape()[0]):
        losses.append([(tf.pow(logits[i] - targets[i], 2))])
        
    loss = tf.reduce_sum(losses)/(2*batch_size)
    
    #Cliping the gradient loss
    gradients = tf.gradients(loss, tf.compat.v1.trainable_variables())
    clipper_, _ = tf.clip_by_global_norm(gradients, grad_clip_margin)
    optimizer = tf.optimizers.Adam(learning_rate)
    train_optimizer = optimizer.apply_gradients(zip(gradients, tf.compat.v1.trainable_variables()))
    return loss, train_optimizer

In [None]:
class StockPredictionRNN(object):
    
    def __init__(self, learning_rate=0.0005, batch_size=2, hidden_layer_size=512, number_of_layers=1, 
                 dropout=True, dropout_rate=0.8, number_of_classes=1, gradient_clip_margin=4, window_size=1):
    
        tf.compat.v1.disable_eager_execution()
        self.inputs = tf.compat.v1.placeholder(tf.float32, [batch_size, window_size, 1], name='input_data')
        self.targets = tf.compat.v1.placeholder(tf.float32, [batch_size, 1], name='targets')

        cell, init_state = LSTM_cell(hidden_layer_size, batch_size, number_of_layers, dropout, dropout_rate)

        outputs, states = tf.compat.v1.nn.dynamic_rnn(cell, self.inputs, initial_state=init_state)

        self.logits = output_layer(outputs, hidden_layer_size, number_of_classes)

        self.loss, self.opt = opt_loss(self.logits, self.targets, learning_rate, gradient_clip_margin, batch_size)

### Weight and Biases Experiments

In [None]:
sweep_config = {
    'method': 'random',
    'metric': {'goal': 'minimize', 'name': 'mape'},
    'parameters': {
        'batch_size': {
            'distribution': 'q_log_uniform_values',
            'max': 64,
            'min': 2,
            'q': 2
            },
        'dropout_rate': {'values': [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]},
        'epochs': {'values': [30, 40, 50, 60, 70, 80]},
        'learning_rate': {
            'distribution': 'uniform',
            'max': 0.0010,
            'min': 0
        },
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="tensorflow-lstm-sweep", entity="ytdteam")

In [None]:
def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        tf.compat.v1.reset_default_graph()
        model = StockPredictionRNN(
            learning_rate = config.learning_rate,
            batch_size = config.batch_size,
            dropout_rate = config.dropout_rate,
        )
        session = tf.compat.v1.Session()
        session.run(tf.compat.v1.global_variables_initializer())

        for i in range(config.epochs):
            traind_scores = []
            ii = 0
            epoch_loss = []
            while(ii + config.batch_size) <= len(X_train):
                X_batch = X_train[ii:ii+config.batch_size]
                y_batch = y_train[ii:ii+config.batch_size]
                
                o, c, _ = session.run([model.logits, model.loss, model.opt], feed_dict={model.inputs:X_batch, model.targets:y_batch})
                
                epoch_loss.append(c)
                traind_scores.append(o)
                ii += config.batch_size
            # Evaluate
            sup =[]
            for ii in range(len(traind_scores)):
                for jj in range(len(traind_scores[ii])):
                    sup.append(traind_scores[ii][jj])
            tests = []
            ii = 0
            while ii+config.batch_size <= len(X_test):                
                o = session.run([model.logits], feed_dict={model.inputs:X_test[ii:ii+config.batch_size]})
                ii += config.batch_size
                tests.append(o)
            tests_new = []
            for ii in range(len(tests)):
                for jj in range(len(tests[ii][0])):
                    tests_new.append(tests[ii][0][jj])
            test_results = []
            for ii in range(1446):
                if ii >= 1156:
                    test_results.append(tests_new[ii-1156])
                else:
                    test_results.append(None)
            # Plot
            # fig = plt.figure(figsize=(16, 7))
            # plt.plot(scaled_dataset, label='Original data')
            # plt.plot(sup, label='Training data')
            # plt.plot(test_results, label='Testing data')
            # plt.legend()
            # plt.show()
            # plt.close()
            # Metrics
            y_true = np.array(scaled_dataset[1158:1446])
            y_pred = np.array(test_results[1158:1446])
            mape_lstm = calculate_mape(y_true, y_pred)
            rmse_lstm = calculate_rmse(scaler.inverse_transform(y_true), scaler.inverse_transform(y_pred))
            # Wandb
            wandb.log({"loss": sum(epoch_loss)/len(epoch_loss), "epoch": i, "mape": mape_lstm, "rmse": rmse_lstm})
            print('Epoch {}/{}'.format(i, config.epochs), ' Current loss: {}'.format(np.mean(epoch_loss)))
            session.close()

In [None]:
wandb.agent(sweep_id, train, count=5)

### References
https://modelzoo.co/model/tesla-stocks-prediction  
https://github.com/lucko515/tesla-stocks-prediction  