## Model : marketdirection
### Description :
This model uses a Tensorflow neural network to predict the direction of a market in the next Y periods, based on the values of the previous X periods. 

### Model Attributes :
- FFNN
- Boosting
- Re-training of entire network for each additional period

### USP :
- Normalised market data (between 0 and 1) to highlight common patterns at any time scale.
- Utilises similar markets to increase size of training set


In [3]:
MODEL_ID = "a80a294f-12f8-4b27-8eac-9aad9a53ec8b"

MARKET1 = "DOW"
MARKET2 = "SPY"

PIPELINE_ID = "marketdirection"

# TODO : Pull out of pipeline config?
##### Specific to the data ##
NUM_FEATURES = (2 * 4) + 1
NUM_LABELS = 2
#############################

In [7]:
#
# Get dataset from MI API #
#

import pandas
import sys
import gc
import uuid
import numpy as np

#!pip install --upgrade git+https://github.com/cwilko/quantutils.git

import quantutils.dataset.pipeline as ppl
from quantutils.api.bluemix import ObjectStore, Metrics, Logger
from quantutils.api.marketinsights import MarketInsights
import quantutils.model.utils as mlutils
from quantutils.model.ml import Model

metrics = Metrics('cred/metrics_cred.json')
mi = MarketInsights('cred/MIOapi_cred.json')
objStore = ObjectStore('cred/object_storage_cred.json')
log = Logger('MarketInsights-ML','cred/logging_cred.json')

# Logging helper function
tag = lambda x,y : "".join(["(", x, ":", str((y+2)/2), ") "])

CONFIG = mi.get_model(MODEL_ID)
TRN_CNF = CONFIG['training']

mkt1 = mi.get_dataset(MARKET1, PIPELINE_ID)
mkt2 = mi.get_dataset(MARKET2, PIPELINE_ID)

# Interleave (part of the "added insight" for this model)
mkt1, mkt2, isect = ppl.intersect(mkt1,mkt2)
dataset = ppl.interleave(mkt1,mkt2)

testSetLength = 430
training_set = dataset[:-(testSetLength)]
test_set = dataset[-(testSetLength):]

# Create ML model
ffnn = Model(NUM_FEATURES, NUM_LABELS, CONFIG)

print("Done")

In [8]:
##
## BOOTSTRAP/BOOSTING TRAINING WITH LOO
##


predictions = np.array([]).reshape(0,2)
bstrapTrainingSet = training_set
threshold = .0
_, test_y = ppl.splitCol(test_set, NUM_FEATURES)


bstrapTrainingSet = bstrapTrainingSet.append(pandas.DataFrame(test_set.values[:initialTestValue,:]))

# Train thread id
train_id = str(uuid.uuid1())[:8]
#train_id = "0b4045ec"
initialTestValue = 0
resultIndex = isect[-(testSetLength/2):]

log.info("".join(["(", train_id, ")", " Training model: ", CONFIG['model_desc'], "(",MODEL_ID,") , Pipeline: ", PIPELINE_ID]))
                  
try:
    
    for i in range(initialTestValue,len(test_set),2):
        print "Training",
        test_rows = pandas.DataFrame(test_set.values[[i, i+1],:])
        success = False
        prediction = [-1, -1]
        retry = 0
        while ((not success) & (retry<TRN_CNF['training_retries'])):
            try:
                ## CHOOSE BOOTSTRAP OR BOOST
                results = mlutils.boostingTrain(ffnn, bstrapTrainingSet, test_rows, TRN_CNF['lamda'], TRN_CNF['iterations'], CONFIG['debug'])
                #esults = mlutils.bootstrapTrain(ffnn, bstrapTrainingSet, test_rows, TRN_CNF['lamda'], TRN_CNF['iterations'], TRN_CNF['threshold'], CONFIG['debug'])
                prediction = np.nanmean(results["test_predictions"], axis=0)
                predictions =  np.concatenate([predictions, prediction])    
                success = True
            except ValueError: 
                print "Value error"
                log.error("".join([tag(train_id, i), "ValueError - Retrying..."]))
                retry = retry + 1
        
        if (not success):
            log.error("Failed to train after several retries")
            break
            
        print "."
        bstrapTrainingSet = bstrapTrainingSet.append(test_rows)
        
        if (TRN_CNF['fixed_training_set_size']):
            # Window
            bstrapTrainingSet = bstrapTrainingSet[-len(training_set):]

        # Extract predictions and store them
        p1, p2 = [pandas.DataFrame([mkt], index=resultIndex[i/2:(i/2)+1]) for mkt in prediction]
        
        if (True):
            log.debug("".join([tag(train_id, i), resultIndex[i/2].isoformat(), " ", MARKET1, ": ", str(p1.values[0])]))
            log.debug("".join([tag(train_id, i), resultIndex[i/2].isoformat(), " ", MARKET2, ": ", str(p2.values[0])]))
            
        # Progress statistics
        res = mlutils.evaluate(predictions, test_y[initialTestValue:initialTestValue+len(predictions),:], threshold)
        log.info("".join([tag(train_id, i), str("Results after %d iterations, %.2f precision, %.2f recall at %.2f threshold" % (i+2, res[0], res[1], threshold))]))   
        metrics.send([{'name':'MI.precision', 'value':res[0].tolist()},{'name':'MI.recall', 'value':res[1].tolist()}])

        # Backup precictions to filestore (deprecated)
        x = 1
        for mkt in ppl.deinterleave(pandas.DataFrame(predictions)):
            mkt.index = isect[-(testSetLength/2):-(testSetLength/2)+len(mkt)]
            mkt.to_csv("results.csv", header=False)
            objStore.put_file('Experiment2', "results.csv", "".join([MODEL_ID, "_", str(x), ".csv"]) )
            x = x + 1

        # Try to free memory
        gc.collect()
except:
    log.error("".join([tag(train_id, i), str("Unexpected error: %s" % sys.exc_info()[0])]))
    raise
    