## Model : marketdirection
### Description :
This model uses a Tensorflow neural network to predict the direction of a market in the next Y periods, based on the values of the previous X periods. 

### Model Attributes :
- FFNN
- Boosting
- Re-training of entire network for each additional period

### USP :
- Normalised market data (between 0 and 1) to highlight common patterns at any time scale.
- Utilises similar markets to increase size of training set


In [2]:
MODEL_ID = "3a491b1a-8af6-416d-aa14-f812cbd660bb"

MARKET1 = "DOW"
MARKET2 = "SPY"

PIPELINE_ID = "marketdirection"

In [7]:
#
# Get dataset from MI API #
#

import pandas
import sys
import gc

!pip install --upgrade git+https://github.com/cwilko/quantutils.git
import quantutils.dataset.pipeline as ppl
from quantutils.api.bluemix import ObjectStore, Metrics
from quantutils.api.marketinsights import MarketInsights

metrics = Metrics('cred/metrics_cred.json')
mi = MarketInsights('cred/MIOapi_cred.json')
objStore = ObjectStore('cred/object_storage_cred.json')

CONFIG = mi.get_model(MODEL_ID)

mkt1 = mi.get_dataset(MARKET1, PIPELINE_ID)
mkt2 = mi.get_dataset(MARKET2, PIPELINE_ID)

# Interleave (part of the "added insight" for this model)
mkt1, mkt2, isect = ppl.intersect(mkt1,mkt2)
dataset = ppl.interleave(mkt1,mkt2)

Collecting git+https://github.com/cwilko/quantutils.git
  Cloning https://github.com/cwilko/quantutils.git to /gpfs/global_fs01/sym_shared/YPProdSpark/user/sc1c-81b7dbb381fb6a-c4b9ad2fa578/notebook/tmp/pip-S6Z3sr-build
Installing collected packages: quantutils
  Found existing installation: quantutils 1.0.0
    Uninstalling quantutils-1.0.0:
      Successfully uninstalled quantutils-1.0.0
  Running setup.py install for quantutils ... [?25ldone
[?25hSuccessfully installed quantutils-1.0.0


In [4]:
testSetLength = 430
training_set = dataset[:-(testSetLength)]
test_set = dataset[-(testSetLength):]

In [5]:
import numpy as np
import os

# TODO : Pull out of pipeline config?
##### Specific to the data ##
NUM_FEATURES = (2 * 4) + 1
NUM_LABELS = 2
#############################

In [8]:
from quanutils.model.ml import Model
import quantutils.mode.utils as mlutils



ImportError: No module named quanutils.model.ml

In [None]:
##
## BOOTSTRAP/BOOSTING TRAINING WITH LOO
##

print "Training",
predictions = np.array([]).reshape(0,2)
bstrapTrainingSet = training_set
threshold = .0
_, test_y = ppl.splitCol(test_set, NUM_FEATURES)
initialTestValue = 0

bstrapTrainingSet = bstrapTrainingSet.append(pandas.DataFrame(test_set.values[:initialTestValue,:]))

try:
    for i in range(initialTestValue,len(test_set),2):

        test_rows = pandas.DataFrame(test_set.values[[i, i+1],:])
        success = False
        retry = 0
        while ((~success) & (retry<TRN_CNF['training_retries'])):
            try:
                ## CHOOSE BOOTSTRAP OR BOOST
                results = boostingTrain(bstrapTrainingSet, test_rows, TRN_CNF['lamda'], TRN_CNF['iterations'], CONFIG['debug'])
                #results = bootstrapTrain(bstrapTrainingSet, test_rows, TRN_CNF['lamda'], TRN_CNF['iterations'], CONFIG['debug'])
                predictions =  np.concatenate([predictions, np.nanmean(results["test_predictions"], axis=0)])    
                success = True
            except ValueError: 
                print "Value error"
                #log.emit_log( {'app_name': 'Experiment2','type': 'error','message': "ValueError - Retrying..."})
                retry = retry + 1
        
        if (~success):
            # TODO : Log this
            print "Failed to train after several retries"
            break
            
        bstrapTrainingSet = bstrapTrainingSet.append(test_rows)
        
        if (TRN_CNF['fixed_training_set_size']):
            # Window
            bstrapTrainingSet = bstrapTrainingSet[-len(training_set):]

        res = evaluate(predictions, test_y[initialTestValue:initialTestValue+len(predictions),:], threshold)
        msg = str("Results after %d iterations, %.2f precision, %.2f recall at %.2f threshold" % (i+2, res[0], res[1], threshold))
        print "."
        print msg

        #log.emit_log( {'app_name': 'Experiment2','type': 'result','message': msg})
        metrics.send([{'name':'MI.precision', 'value':res[0].tolist()},{'name':'MI.recall', 'value':res[1].tolist()}])

        pandas.DataFrame(predictions).to_csv("results.csv", header=False, index=False)
        objStore.put_file('Experiment2', "results2.csv", "results2.csv")

        # Try to free memory
        gc.collect()
except:
    print("Unexpected error: %s" % sys.exc_info()[0])
    #log.emit_log( {'app_name': 'Experiment2','type': 'error','message': str("Unexpected error: %s" % sys.exc_info()[0])})
    raise
    

In [250]:
##
## BOOTSTRAP TRAINING
##

print "Training",
_, test_y = ppl.splitCol(test_set, NUM_FEATURES)
results = bootstrapTrain(training_set, test_set, TRN_CNF['lamda'], TRN_CNF['iterations'], TRN_CNF['threshold'], True)
predictions2 =  np.nanmean(results["test_predictions"], axis=0)
evaluate(predictions2, test_y, .0)

Training . . . Iterations : 1 Lambda : 0.00, Threshold : 0.00
Training loss : 0.21+/-0.00, precision : 0.91+/-0.00, recall : 1.00+/-0.00, F : 0.96+/-0.00
Validation loss : 2.85+/-0.00, precision : 0.58+/-0.00, recall : 1.00+/-0.00, F : 0.73+/-0.00
Test loss : 3.25+/-0.00, precision : 0.56+/-0.00, recall : 1.00+/-0.00, F : 0.72+/-0.00
Iteration : 0 Lambda : 0.00, Threshold : 0.00
Training loss : 0.21+/-0.00, precision : 0.91+/-0.00, recall : 1.00+/-0.00, F : 0.96+/-0.00
Validation loss : 2.85+/-0.00, precision : 0.58+/-0.00, recall : 1.00+/-0.00, F : 0.73+/-0.00
Test loss : 3.25+/-0.00, precision : 0.56+/-0.00, recall : 1.00+/-0.00, F : 0.72+/-0.00


(0.5604651, 1.0, 0.71833083399477315)

In [258]:
##
## BOOTSTRAP TRAINING
##

print "Training",
_, test_y = ppl.splitCol(test_set, NUM_FEATURES)
results = bootstrapTrain(training_set, test_set, TRN_CNF['lamda'], TRN_CNF['iterations'], TRN_CNF['threshold'], True)
predictions2 =  np.nanmean(results["test_predictions"], axis=0)
evaluate(predictions2, test_y, .0)

Training . . . Iterations : 1 Lambda : 0.00, Threshold : 0.00
Training loss : 0.35+/-0.00, precision : 0.85+/-0.00, recall : 1.00+/-0.00, F : 0.92+/-0.00
Validation loss : 1.45+/-0.00, precision : 0.56+/-0.00, recall : 1.00+/-0.00, F : 0.72+/-0.00
Test loss : 1.70+/-0.00, precision : 0.52+/-0.00, recall : 1.00+/-0.00, F : 0.68+/-0.00
Iteration : 0 Lambda : 0.00, Threshold : 0.00
Training loss : 0.35+/-0.00, precision : 0.85+/-0.00, recall : 1.00+/-0.00, F : 0.92+/-0.00
Validation loss : 1.45+/-0.00, precision : 0.56+/-0.00, recall : 1.00+/-0.00, F : 0.72+/-0.00
Test loss : 1.70+/-0.00, precision : 0.52+/-0.00, recall : 1.00+/-0.00, F : 0.68+/-0.00


(0.51860464, 1.0, 0.68300151841054135)

In [194]:
t1 = Theta1.eval()
t2 = Theta2.eval()

In [210]:
range(0, 1)

[0]