In [3]:
import tensorflow as tf
import tensorflow.compat.v1 as tf1

import numpy as np
import pandas as pd

import quantutils.dataset.pipeline as ppl
import quantutils.dataset.ml as mlutils
from marketinsights.api.model import MarketInsightsModel
from marketinsights.remote.ml import MIAssembly

In [4]:
DATASET_ID1 = "4234f0f1b6fcc17f6458696a6cdf5101"  # DOW

assembly = MIAssembly(secret="marketinsights-k8s-cred")

# Training Set
dataset, descriptor = assembly.get_dataset_by_id(DATASET_ID1, debug=False)
train_x, train_y = ppl.splitCol(dataset, NUM_FEATURES)
train_x = tf.cast(train_x, tf.float32)
train_y = tf.cast(train_y, tf.float32)


In [17]:
NUM_FEATURES = (2 * 4) + 1
NUM_LABELS = 1

HIDDEN_UNITS = 32
# The random seed that defines initialization.
SEED = 42
# The stdev of the initialised random weights
STDEV = 0.1
# Network bias
BIAS = 0.1

class MLModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    # Initialize model parameters
    self.Theta1 = tf.Variable(tf.random.normal([HIDDEN_UNITS, NUM_FEATURES], stddev=STDEV, seed=SEED))
    self.Theta2 = tf.Variable(tf.random.normal([NUM_LABELS, HIDDEN_UNITS], stddev=STDEV, seed=SEED))
    self.bias = tf.Variable(tf.constant(BIAS, shape=[NUM_LABELS]))
    self.lam = tf.constant(0.001, tf.float32)

  @tf.function
  def call(self, x):
    layer1 = tf.nn.sigmoid(tf.matmul(x, tf.transpose(self.Theta1)))
    output = tf.nn.bias_add(tf.matmul(layer1, tf.transpose(self.Theta2)), self.bias)
    return output

  def loss(self, y_pred, y):
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred))
    
    # Regularization using L2 Loss function
    regularizer = tf.nn.l2_loss(self.Theta1) + tf.nn.l2_loss(self.Theta2)
    reg = (self.lam / tf.cast(tf.shape(y)[0], tf.float32)) * regularizer
    loss_reg = loss + reg
    
    return loss_reg

In [18]:
# Set training parameters
epochs = 1000
learning_rate = 3.0
losses = []

mlmodel = MLModel()

# Format training loop
for epoch in range(epochs):
    with tf.GradientTape() as tape:
      batch_loss = mlmodel.loss(mlmodel(train_x), train_y)
    # Update parameters with respect to the gradient calculations
    grads = tape.gradient(batch_loss, mlmodel.variables)
    for g,v in zip(grads, mlmodel.variables):
        v.assign_sub(learning_rate*g)
        
    # Keep track of model loss per epoch
    loss = mlmodel.loss(mlmodel(train_x), train_y)
    losses.append(loss)
    if epoch % 100 == 0:
        print(f'Mean squared error for step {epoch}: {loss.numpy():0.3f}')

Mean squared error for step 0: 1.419
Mean squared error for step 100: 0.693
Mean squared error for step 200: 0.692
Mean squared error for step 300: 0.692
Mean squared error for step 400: 0.692
Mean squared error for step 500: 0.692
Mean squared error for step 600: 0.692
Mean squared error for step 700: 0.691
Mean squared error for step 800: 0.691
Mean squared error for step 900: 0.691


In [19]:
mlutils.evaluate(ppl.onehot(tf.nn.sigmoid(mlmodel(train_x)).numpy()), ppl.onehot(train_y.numpy()), threshold=0.5)

Won : 699.0
Lost : 678.0
Total : 1377.0
Diff : 21.0
Edge : 1.5250544662309369%
Information Coefficient : 0.015250563621520996


0.5076253