In [477]:
import os
import csv
import sys
import copy
import time
import logging
import datetime
import numpy as np
#from stats import *
import pandas as pd
import seaborn as sns
from scipy import stats
#from Layer import Layer
import tensorflow as tf 
from pandas import DataFrame
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from sklearn.model_selection import train_test_split, KFold
from sklearn.datasets import fetch_california_housing, make_regression
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [478]:
def write_stats(samples, features, loss, epochs, lr, batch_size, cross_k, r2, mse_train, mae_train, residual_train):
    with open('../data/stats.csv', 'a') as f:
        newrow = [samples, features, loss, epochs, lr, batch_size, cross_k, r2, mse_train, mae_train, residual_train]
        writer = csv.writer(f)
        writer.writerow(newrow)

def fill_dataset(dataframe: DataFrame):
    for column in dataframe:
        if dataframe[column].dtype != 'object':
            dataframe[column] = dataframe[column].fillna(dataframe[column].mean())
    return dataframe

def normalize_dataset(X):
    return tf.keras.utils.normalize(X)

def remove_outliers(X, threshold=7):
    z = np.abs(stats.zscore(X))
    return X[(z<threshold).all(axis=1)][:, 0:-1], X[(z<threshold).all(axis=1)][: ,-1]

def make_dataset(X_data,y_data,k):
    X_data, y_data = remove_outliers(np.concatenate([X_data, y_data], axis=1))
    def gen():
        for train_index, test_index in KFold(k).split(X_data):
            X_train, X_test = X_data[train_index], X_data[test_index]
            XN_train, XN_test = normalize_dataset(X_data[train_index]), normalize_dataset(X_data[test_index])
            y_train, y_test = y_data[train_index], y_data[test_index]
            yield X_train,XN_train,y_train,X_test,XN_test,y_test

    return tf.data.Dataset.from_generator(gen, (tf.double,tf.double,tf.double,tf.double,tf.double,tf.double))


In [479]:
total_columns=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  
k = 2
batch_size = 120

#XR, yR = make_regression(n_samples=1000, n_features=2, n_informative=5, noise=50, random_state=5)
#X_train, X_test, y_train, y_test = train_test_split(XR, yR, test_size=0.20, random_state=2)
#columns = ["{0}".format(total_columns[i]) for i in range(XR.shape[1]+1)]
#dataframe = pd.DataFrame(np.concatenate([XR, np.reshape(yR, [-1, 1])], axis=1), columns=columns)

stocks = pd.read_csv("../data/datasets/all_stocks_5yr.csv")
stocks = pd.DataFrame(stocks)
filter = stocks["Name"]=="AAPL"
stocks = stocks.where(filter).dropna()
stocks.drop(['Name', "date"], axis=1, inplace=True)
stocks['target'] = stocks['open'].fillna(stocks['open'].mean())-stocks['close'].fillna(stocks['close'].mean())
#data['type'] = 1 if dataframe['open']-dataframe['close']>=0 else 0
rnnstocks = stocks.values[0:100, :]
n_features = len(rnnstocks[0])
n_rows = 10
n_ciclics = 10
data = np.array(rnnstocks).reshape(n_ciclics, n_rows, n_features)
X = data[:, 0:-1, :]
Y = data[:, -1, :]

stocks.head()

Unnamed: 0,open,high,low,close,volume,target
1259,67.7142,68.4014,66.8928,67.8542,158168416.0,-0.14
1260,68.0714,69.2771,67.6071,68.5614,129029425.0,-0.49
1261,68.5014,68.9114,66.8205,66.8428,151829363.0,1.6586
1262,66.7442,67.6628,66.1742,66.7156,118721995.0,0.0286
1263,66.3599,67.3771,66.2885,66.6556,88809154.0,-0.2957


In [480]:
vif_data = pd.DataFrame()
vif_data["features"] = stocks.columns
vif_data["VIF"] = [variance_inflation_factor(stocks.values, i) for i in range(len(stocks.columns))]
print(vif_data)

  features           VIF
0     open           inf
1     high  5.060050e+04
2      low  4.511802e+04
3    close           inf
4   volume  2.630125e+00
5   target           inf


  vif = 1. / (1. - r_squared_i)


In [481]:
class RNNLayer(tf.Module):
    def __init__(self, units, input_dim, output_dim):
        #Initialize weights matrices
        self.W_xh = tf.Variable(tf.random.uniform(shape=(units, input_dim)), trainable=True)
        self.W_hh = tf.Variable(tf.random.uniform(shape=(units, units)), trainable=True)
        self.W_hy = tf.Variable(tf.random.uniform(shape=(output_dim, units)), trainable=True)
        self.bias = tf.Variable(tf.ones(units), trainable=True)
        #Initialize hidden state (Memory)
        self.h = tf.zeros([units, 1])
    
    @tf.function(reduce_retracing=True)
    def __call__(self, x):
        print("[#] W_xh: {0}, W_hh: {1}, W_hy: {2}, W_bias: {3}".format(self.W_xh.shape, self.W_hh.shape, self.W_hy.shape, self.bias.shape))
        updated_input = tf.multiply(self.W_xh, x)
        updated_memory = tf.multiply(self.W_hh, self.h)
        print("[#] updated_input: {0}, updated_memory: {1}".format(updated_input.shape, updated_memory.shape))
        self.h = tf.nn.tanh(
            tf.add(tf.transpose(
                tf.matmul(updated_memory, updated_input)), 
                self.bias
            )
        )
        output = tf.multiply(self.W_hy, self.h)
        print("[#] h: {0}, output: {1}".format(self.h.shape, output.shape))
        return output, self.h        

In [482]:
class MarketAI(tf.Module):
    def __init__(self, layers, epochs=100, lr=0.01):
        self.epochs = epochs
        self.layers = layers
        self.Adam = tf.optimizers.Adam(lr)
        self.loss_history = [e for e in range(self.epochs)]

    @tf.function(reduce_retracing=True)
    def loss(self, y, predicted):
        return tf.losses.MSE(y, predicted)
    
    def history(self, e):
        self.loss_history[e] = self.loss

    def train(self, X):
        print("[#] X: {0}, Y: {1}".format(X.shape, Y.shape))
        X = tf.convert_to_tensor(X, dtype=tf.float32)
        self.vars = [self.layers[0].W_xh, self.layers[0].W_hh, self.layers[0].W_hy, self.layers[0].bias]
        for e in range(self.epochs):
            for layer in self.layers:
                with tf.GradientTape(watch_accessed_variables=True, persistent=True) as tape:
                    output, h = layer(X)
            losses = self.loss(X, tf.transpose(output))
            self.loss = tf.reduce_sum(losses)
            print("[#] Loss: {0}".format(self.loss))

            grads = tape.gradient(self.loss, self.vars)  
            self.Adam.apply_gradients(zip(grads, self.vars))
            self.history(e)


In [483]:
n_ciclics, n_rows, n_features = X.shape
epochs = 1
lr = 0.01
model = MarketAI([
    RNNLayer(n_rows, n_features, n_features)
], epochs, lr)
model.train(X[0])

[#] X: (9, 6), Y: (6,)
[#] W_xh: (9, 6), W_hh: (9, 9), W_hy: (6, 9), W_bias: (9,)
[#] updated_input: (9, 6), updated_memory: (9, 9)
[#] h: (6, 9), output: (6, 9)
[#] Loss: 2.245457005982515e+16


ValueError: No gradients provided for any variable: (['Variable:0', 'Variable:0', 'Variable:0', 'Variable:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'Variable:0' shape=(9, 6) dtype=float32, numpy=
array([[0.88725734, 0.5995213 , 0.1933645 , 0.5238522 , 0.06317687,
        0.3710934 ],
       [0.36416936, 0.00256801, 0.9992962 , 0.70852065, 0.8219024 ,
        0.7320446 ],
       [0.34955585, 0.76306975, 0.4321556 , 0.8407053 , 0.00300968,
        0.60346115],
       [0.559649  , 0.26802206, 0.8240807 , 0.66092885, 0.00311875,
        0.7176231 ],
       [0.07046938, 0.31595325, 0.609781  , 0.40504277, 0.4049238 ,
        0.60967994],
       [0.20363533, 0.4713906 , 0.6631266 , 0.5226294 , 0.9403013 ,
        0.24795556],
       [0.55395293, 0.8198019 , 0.56277215, 0.11933553, 0.12034929,
        0.5626768 ],
       [0.80094254, 0.14054179, 0.38852906, 0.11670983, 0.19844139,
        0.15899241],
       [0.337551  , 0.5983615 , 0.63147414, 0.7051363 , 0.9954115 ,
        0.7256198 ]], dtype=float32)>), (None, <tf.Variable 'Variable:0' shape=(9, 9) dtype=float32, numpy=
array([[0.3307606 , 0.00945663, 0.20713925, 0.33151186, 0.09123445,
        0.19223225, 0.7020471 , 0.9399637 , 0.19870114],
       [0.9222454 , 0.5660691 , 0.2424972 , 0.67506707, 0.30953145,
        0.6818143 , 0.8776636 , 0.03674424, 0.05496967],
       [0.17781079, 0.5663899 , 0.8069583 , 0.22616959, 0.06723011,
        0.6656265 , 0.15640712, 0.6028372 , 0.06410778],
       [0.8404925 , 0.41855812, 0.07419991, 0.3798039 , 0.1709181 ,
        0.13665378, 0.46457303, 0.04188311, 0.8394971 ],
       [0.08059514, 0.9196471 , 0.8023145 , 0.2743758 , 0.4729755 ,
        0.60347533, 0.70600677, 0.483505  , 0.26264012],
       [0.61654353, 0.5198692 , 0.17046595, 0.05148172, 0.20338058,
        0.8321955 , 0.53042686, 0.896618  , 0.58778095],
       [0.54212296, 0.26471233, 0.6563455 , 0.81151223, 0.87764883,
        0.37578297, 0.6802931 , 0.735088  , 0.5409715 ],
       [0.8927363 , 0.74577737, 0.40862393, 0.9608593 , 0.5893332 ,
        0.34054184, 0.07919669, 0.933022  , 0.60402036],
       [0.47236383, 0.43862855, 0.77626085, 0.24009776, 0.00291228,
        0.5353006 , 0.97053874, 0.59289396, 0.8790935 ]], dtype=float32)>), (None, <tf.Variable 'Variable:0' shape=(6, 9) dtype=float32, numpy=
array([[0.70982015, 0.27539325, 0.6011385 , 0.07611561, 0.9435061 ,
        0.5364251 , 0.72496164, 0.38103282, 0.58070695],
       [0.96982956, 0.03967583, 0.5653516 , 0.17442071, 0.7998333 ,
        0.2827803 , 0.5764619 , 0.33283865, 0.6365974 ],
       [0.01781499, 0.68678486, 0.5397886 , 0.9713186 , 0.04312575,
        0.20271134, 0.1596967 , 0.4220227 , 0.4085616 ],
       [0.52900815, 0.821771  , 0.9354985 , 0.7797735 , 0.09302151,
        0.9428786 , 0.8968643 , 0.57245517, 0.51524746],
       [0.01930261, 0.47409034, 0.24466646, 0.19588721, 0.68170893,
        0.08301294, 0.07731783, 0.05730796, 0.7885418 ],
       [0.7355459 , 0.8096719 , 0.18366814, 0.38870835, 0.09822619,
        0.7393323 , 0.6929877 , 0.24453783, 0.74897826]], dtype=float32)>), (None, <tf.Variable 'Variable:0' shape=(9,) dtype=float32, numpy=array([1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)>)).