## Model : marketdirection
### Description :
This model uses a Tensorflow neural network to predict the direction of a market in the next Y periods, based on the values of the previous X periods. 

### Model Attributes :
- FFNN
- Boosting
- Re-training of entire network for each additional period

### USP :
- Normalised market data (between 0 and 1) to highlight common patterns at any time scale.
- Utilises similar markets to increase size of training set


In [20]:
#
# Get dataset from MI API #
#

import pandas

!pip install git+https://github.com/cwilko/quantutils.git
import quantutils.dataset.pipeline as ppl
from quantutils.cloud.bluemix import ObjectStore, MarketInsights

mi = MarketInsights('cred/MIOapi_cred.json')

dow = mi.jsontocsv(mi.get_dataset("DOW", "marketdirection"))
spy = mi.jsontocsv(mi.get_dataset("SPY", "marketdirection"))

# Interleave (part of the "added insight" for this model)
dataset = pandas.concat([spy,dow]).sort_index().reset_index(drop=False)


In [36]:
# LEGACY - Load from objectstore #
import pandas 
import json
import gc
import sys
from bluemix import *

objStore = ObjectStore('object_storage_cred.json')
#logging_cred = json.load(open('logging_cred.json'))
#log = get_logging_client(logging_cred)

dataset = pandas.read_csv(objStore.get_file('Experiment2', 'Experiment2_zero.csv'), header=None)


In [None]:
testSetLength = 430
training_set = dataset[:-(testSetLength)]
test_set = dataset[-(testSetLength):]

In [4]:

import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt

%matplotlib inline

##### Specific to the data ##
NUM_FEATURES = (2 * 4) + 1
NUM_LABELS = 2
#############################


##### #Define the architecture
HIDDEN_UNITS = 32

# The random seed that defines initialization.
SEED = 42

# This is where training samples and labels are fed to the graph.
# These placeholder nodes will be fed a batch of training data at each
# training step, which we'll write once we define the graph structure.
train_data_node = tf.placeholder(tf.float32, shape=(None, NUM_FEATURES))
train_labels_node = tf.placeholder(tf.float32, shape=(None, NUM_LABELS))
lam = tf.placeholder(tf.float32)

# The variables below hold all the trainable weights. For each, the
# parameter defines how the variables will be initialized. 
# TODO : These should be pulled from a config file

Theta1 = tf.Variable( tf.truncated_normal([HIDDEN_UNITS, (NUM_FEATURES)], stddev=0.1))

Theta2 = tf.Variable( tf.truncated_normal([NUM_LABELS, HIDDEN_UNITS],stddev=0.1))
bias2 = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))
    
print('Done')

Done


In [5]:
def model(X, Theta1, Theta2, bias):
    """The Model definition."""
    # Perceptron
    
    layer1 = tf.nn.sigmoid(tf.matmul(X, tf.transpose(Theta1)))
                        
    output = tf.nn.bias_add(tf.matmul(layer1, tf.transpose(Theta2)),bias)

    return output
    
print('Done')

Done


In [6]:
yhat = model(train_data_node, Theta1, Theta2, bias2)

# Change the weights by subtracting derivative with respect to that weight
loss =  tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=train_labels_node, logits=yhat))
# Regularization using L2 Loss function 
regularizer = tf.nn.l2_loss(Theta1) + tf.nn.l2_loss(Theta2)
reg = (lam / tf.to_float(tf.shape(train_labels_node)[0])) * regularizer
loss_reg = loss + reg

# Optimizer: 

# Gradient Descent
optimizer = tf.contrib.opt.ScipyOptimizerInterface(loss_reg, options={'maxiter':4000})
#update_weights = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

# Predictions
train_prediction = tf.sigmoid(yhat)

print('Done')

Done


In [7]:
# Keep track of the loss at each iteration so we can chart it later
J = []

def loss_callback():
    print "Recalc..."
    
def step_callback(params):
    J.append(params)
    
def split(data, num_features):    
    return data.values[:,:num_features], data.values[:,num_features:]
    
def sample(training_set, method="RANDOM", prop=.9, loo=0, boost = []): 
    if (method == "RANDOM"):
        training_set = training_set.sample(frac=1).reset_index(drop=True)
        idx = np.arange(0,len(training_set)) / float(len(training_set))
        return [training_set[idx<prop], training_set[idx>=prop]]
    elif (method == "LOO"):
        idx = np.array(range(0,len(training_set)))
        return [training_set[idx!=loo], training_set[idx==loo]]
    elif (method == "BOOTSTRAP"):
        idx = np.array(range(0,len(training_set)))
        sample = np.random.choice(idx, len(training_set), replace=True)
        return pandas.DataFrame(training_set.values[sample,:]), training_set[~np.in1d(idx, sample)]
    elif (method == "BOOSTING"):
        idx = np.array(range(0,len(training_set)))
        sample = np.random.choice(idx, len(training_set), replace=True, p=boost)
        return pandas.DataFrame(training_set.values[sample,:]), training_set[~np.in1d(idx, sample)]
    
def minimize(feed_dict, train=True):
    
    #optimizer.minimize(feed_dict=feed_dict, fetches=[loss_reg], loss_callback=loss_callback)
    if (train):
        optimizer.minimize(feed_dict=feed_dict)

    return loss.eval(feed_dict), train_prediction.eval(feed_dict)

def evaluate(predictions, data_y, threshold):
    a = np.argmax(predictions,axis=1) 
    b = np.argmax(data_y,axis=1) 
    a = a[(predictions > threshold).any(axis=1)]
    b = b[(predictions > threshold).any(axis=1)]
    precision = np.float32(np.sum(a == b) / np.float32(b.shape[0]))
    recall = np.float32(np.sum(a == b) / np.float32(data_y.shape[0])) # Correct Recall
    recall = np.float32(b.shape[0]) / data_y.shape[0] # Number of Days traded
    F_score = (2.0 * precision * recall) / (precision + recall)
    return precision, recall, F_score, predictions

def predict(data_X, data_y, lam1, threshold):    
    loss, predictions = minimize({train_data_node: data_X, train_labels_node: data_y, lam: lam1}, train=False)
    precision, recall, F_score, predictions = evaluate(predictions, data_y, threshold)
    return loss, precision, recall, F_score, predictions
    

def train(train_dict, val_dict, test_dict, threshold, iterations=50, debug=True):
    
    tf.logging.set_verbosity(tf.logging.ERROR)
    
    metrics = {
        "train_loss":[],
        "train_precision":[],
        "train_recall":[],
        "train_f":[],
        "val_loss":[],
        "val_precision":[],
        "val_recall":[],
        "val_f":[],
        "test_loss":[],
        "test_precision":[],
        "test_recall":[],
        "test_f":[],
        "test_predictions":[]
    }
    
    for i in range(0,iterations):
        
        for j in range(0, 50):
            
            # Create a new interactive session that we'll use in
            # subsequent code cells.
            s = tf.InteractiveSession()
            s.as_default()

            # Initialize all the variables we defined above.
            tf.initialize_all_variables().run()

            minimize(train_dict)
            train_loss, train_precision, train_recall, train_f, _ = predict(train_dict[train_data_node], train_dict[train_labels_node], train_dict[lam], threshold)

            if (train_loss < .65):
                print ".",
                metrics["train_loss"].append(train_loss)
                metrics["train_precision"].append(train_precision)
                metrics["train_recall"].append(train_recall)
                metrics["train_f"].append(train_f)

                val_loss, val_precision, val_recall, val_f, _= predict(val_dict[train_data_node], val_dict[train_labels_node], val_dict[lam], threshold)

                metrics["val_loss"].append(val_loss)
                metrics["val_precision"].append(val_precision)
                metrics["val_recall"].append(val_recall)
                metrics["val_f"].append(val_f)
                
                test_loss, test_precision, test_recall, test_f, test_predictions = predict(test_dict[train_data_node], test_dict[train_labels_node], test_dict[lam], threshold)

                metrics["test_loss"].append(test_loss)
                metrics["test_precision"].append(test_precision)
                metrics["test_recall"].append(test_recall)
                metrics["test_f"].append(test_f)
                metrics["test_predictions"] = test_predictions # Return the last set of predictions (could return the one with the best val score)
                del s
                break;
            else:
                del s
        
        if (j >= 50):
            print("ERROR : Failed to minimise function")
            
    results = {
        "train_loss": {"mean":np.nanmean(metrics["train_loss"]), "std":np.nanstd(metrics["train_loss"]), "values":metrics["train_loss"]},
        "train_precision": {"mean":np.nanmean(metrics["train_precision"]), "std":np.nanstd(metrics["train_precision"]), "values":metrics["train_precision"]},
        "train_recall": {"mean":np.nanmean(metrics["train_recall"]), "std":np.nanstd(metrics["train_recall"]), "values":metrics["train_recall"]},
        "train_f": {"mean":np.nanmean(metrics["train_f"]), "std":np.nanstd(metrics["train_f"]), "values":metrics["train_f"]},
        "val_loss": {"mean":np.nanmean(metrics["val_loss"]), "std":np.nanstd(metrics["val_loss"]), "values":metrics["val_loss"]},
        "val_precision":{"mean":np.nanmean(metrics["val_precision"]), "std":np.nanstd(metrics["val_precision"]), "values":metrics["val_precision"]},
        "val_recall": {"mean":np.nanmean(metrics["val_recall"]), "std":np.nanstd(metrics["val_recall"]), "values":metrics["val_recall"]},
        "val_f": {"mean":np.nanmean(metrics["val_f"]), "std":np.nanstd(metrics["val_f"]), "values":metrics["val_f"]},
        "test_loss": {"mean":np.nanmean(metrics["test_loss"]), "std":np.nanstd(metrics["test_loss"]), "values":metrics["test_loss"]},
        "test_precision":{"mean":np.nanmean(metrics["test_precision"]), "std":np.nanstd(metrics["test_precision"]), "values":metrics["test_precision"]},
        "test_recall": {"mean":np.nanmean(metrics["test_recall"]), "std":np.nanstd(metrics["test_recall"]), "values":metrics["test_recall"]},
        "test_f": {"mean":np.nanmean(metrics["test_f"]), "std":np.nanstd(metrics["test_f"]), "values":metrics["test_f"]},
        "test_predictions": metrics["test_predictions"],
    }
    
    print ".",
    if debug:
        print("Iterations : %d Lambda : %.2f, Threshold : %.2f" % (iterations, val_dict[lam], threshold))
        print("Training loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["train_loss"]["mean"], results["train_loss"]["std"],
               results["train_precision"]["mean"], results["train_precision"]["std"],
               results["train_recall"]["mean"], results["train_recall"]["std"],
               results["train_f"]["mean"], results["train_f"]["std"]))
        print("Validation loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["val_loss"]["mean"], results["val_loss"]["std"],
               results["val_precision"]["mean"], results["val_precision"]["std"],
               results["val_recall"]["mean"], results["val_recall"]["std"],
               results["val_f"]["mean"], results["val_f"]["std"]))
        print("Test loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["test_loss"]["mean"], results["test_loss"]["std"],
               results["test_precision"]["mean"], results["test_precision"]["std"],
               results["test_recall"]["mean"], results["test_recall"]["std"],
               results["test_f"]["mean"], results["test_f"]["std"]))

    return results

In [8]:
### 
### CROSS-VAL RANDOM SAMPLING
###

def bootstrapTrain(training_set, test_set, lamda, iterations, threshold, debug=False):

    metrics = {
        "train_loss":[],
        "train_precision":[],
        "train_recall":[],
        "train_f":[],
        "val_loss":[],
        "val_precision":[],
        "val_recall":[],
        "val_f":[],
        "test_loss":[],
        "test_precision":[],
        "test_recall":[],
        "test_f":[],
        "test_predictions":[]
    }
    
    test_X, test_y = split(test_set, NUM_FEATURES)

    for i in range(0, iterations):
        
        print ".",

        train_sample, val_sample = sample(training_set, method="BOOTSTRAP", loo=i)

        train_sample_X, train_sample_y = split(train_sample, NUM_FEATURES)
        val_sample_X, val_sample_y = split(val_sample, NUM_FEATURES)        

        results = train({train_data_node: train_sample_X, train_labels_node: train_sample_y, lam: lamda}, {train_data_node: val_sample_X, train_labels_node: val_sample_y, lam: lamda}, {train_data_node: test_X, train_labels_node: test_y, lam: lamda}, threshold, 1, False)

        metrics["train_loss"].append(results["train_loss"]["mean"])
        metrics["train_precision"].append(results["train_precision"]["mean"])
        metrics["train_recall"].append(results["train_recall"]["mean"])
        metrics["train_f"].append(results["train_f"]["mean"])
        metrics["val_loss"].append(results["val_loss"]["mean"])
        metrics["val_precision"].append(results["val_precision"]["mean"])
        metrics["val_recall"].append(results["val_recall"]["mean"])
        metrics["val_f"].append(results["val_f"]["mean"])
        metrics["test_loss"].append(results["test_loss"]["mean"])
        metrics["test_precision"].append(results["test_precision"]["mean"])
        metrics["test_recall"].append(results["test_recall"]["mean"])
        metrics["test_f"].append(results["test_f"]["mean"])
        metrics["test_predictions"].append(results["test_predictions"])  


    results = {
        "train_loss": {"mean":np.nanmean(metrics["train_loss"]), "std":np.nanstd(metrics["train_loss"]), "values":metrics["train_loss"]},
        "train_precision": {"mean":np.nanmean(metrics["train_precision"]), "std":np.nanstd(metrics["train_precision"]), "values":metrics["train_precision"]},
        "train_recall": {"mean":np.nanmean(metrics["train_recall"]), "std":np.nanstd(metrics["train_recall"]), "values":metrics["train_recall"]},
        "train_f": {"mean":np.nanmean(metrics["train_f"]), "std":np.nanstd(metrics["train_f"]), "values":metrics["train_f"]},
        "val_loss": {"mean":np.nanmean(metrics["val_loss"]), "std":np.nanstd(metrics["val_loss"]), "values":metrics["val_loss"]},
        "val_precision":{"mean":np.nanmean(metrics["val_precision"]), "std":np.nanstd(metrics["val_precision"]), "values":metrics["val_precision"]},
        "val_recall": {"mean":np.nanmean(metrics["val_recall"]), "std":np.nanstd(metrics["val_recall"]), "values":metrics["val_recall"]},
        "val_f": {"mean":np.nanmean(metrics["val_f"]), "std":np.nanstd(metrics["val_f"]), "values":metrics["val_f"]},
        "test_loss": {"mean":np.nanmean(metrics["test_loss"]), "std":np.nanstd(metrics["test_loss"]), "values":metrics["test_loss"]},
        "test_precision":{"mean":np.nanmean(metrics["test_precision"]), "std":np.nanstd(metrics["test_precision"]), "values":metrics["test_precision"]},
        "test_recall": {"mean":np.nanmean(metrics["test_recall"]), "std":np.nanstd(metrics["test_recall"]), "values":metrics["test_recall"]},
        "test_f": {"mean":np.nanmean(metrics["test_f"]), "std":np.nanstd(metrics["test_f"]), "values":metrics["test_f"]},
        "test_predictions": metrics["test_predictions"],
    }

    if debug:
        print("Iteration : %d Lambda : %.2f, Threshold : %.2f" % (i, lamda, threshold))
        print("Training loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["train_loss"]["mean"], results["train_loss"]["std"],
               results["train_precision"]["mean"], results["train_precision"]["std"],
               results["train_recall"]["mean"], results["train_recall"]["std"],
               results["train_f"]["mean"], results["train_f"]["std"]))
        print("Validation loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["val_loss"]["mean"], results["val_loss"]["std"],
               results["val_precision"]["mean"], results["val_precision"]["std"],
               results["val_recall"]["mean"], results["val_recall"]["std"],
               results["val_f"]["mean"], results["val_f"]["std"]))
        print("Test loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["test_loss"]["mean"], results["test_loss"]["std"],
               results["test_precision"]["mean"], results["test_precision"]["std"],
               results["test_recall"]["mean"], results["test_recall"]["std"],
               results["test_f"]["mean"], results["test_f"]["std"]))

    return results


In [9]:
### 
### BOOSTING
###

def boostingTrain(training_set, test_set, lamda, iterations, debug=False):

    metrics = {
        "train_loss":[],
        "train_precision":[],
        "train_recall":[],
        "train_f":[],
        "val_loss":[],
        "val_precision":[],
        "val_recall":[],
        "val_f":[],
        "test_loss":[],
        "test_precision":[],
        "test_recall":[],
        "test_f":[],
        "test_predictions":[]
    }
    
    test_X, test_y = split(test_set, NUM_FEATURES)
    train_X, train_y = split(training_set, NUM_FEATURES)
    threshold = 0 # For boosting to work this must be 0
    boost = np.array([1.0/len(training_set)] * len(training_set))

    for i in range(0, iterations):
        
        print ".",

        train_sample, val_sample = sample(training_set, method="BOOSTING", boost=boost)

        train_sample_X, train_sample_y = split(train_sample, NUM_FEATURES)
        val_sample_X, val_sample_y = split(val_sample, NUM_FEATURES)        

        results = train({train_data_node: train_sample_X, train_labels_node: train_sample_y, lam: lamda}, {train_data_node: val_sample_X, train_labels_node: val_sample_y, lam: lamda}, {train_data_node: test_X, train_labels_node: test_y, lam: lamda}, threshold, 1, False)

        #Evaluate the results and calculate the odds of misclassification
        _, _, _, _, train_predictions = predict(train_X, train_y, lamda, threshold)
        precision = np.argmax(train_predictions,axis=1) == np.argmax(train_y,axis=1)
        epsilon = sum(boost[~precision]) 
        delta = epsilon / (1.0 - epsilon)
        boost[precision] = boost[precision] * delta
        boost = boost / sum(boost)
        
        
        metrics["train_loss"].append(results["train_loss"]["mean"])
        metrics["train_precision"].append(results["train_precision"]["mean"])
        metrics["train_recall"].append(results["train_recall"]["mean"])
        metrics["train_f"].append(results["train_f"]["mean"])
        metrics["val_loss"].append(results["val_loss"]["mean"])
        metrics["val_precision"].append(results["val_precision"]["mean"])
        metrics["val_recall"].append(results["val_recall"]["mean"])
        metrics["val_f"].append(results["val_f"]["mean"])
        metrics["test_loss"].append(results["test_loss"]["mean"])
        metrics["test_precision"].append(results["test_precision"]["mean"])
        metrics["test_recall"].append(results["test_recall"]["mean"])
        metrics["test_f"].append(results["test_f"]["mean"])
        metrics["test_predictions"].append(results["test_predictions"])
        


    results = {
        "train_loss": {"mean":np.nanmean(metrics["train_loss"]), "std":np.nanstd(metrics["train_loss"]), "values":metrics["train_loss"]},
        "train_precision": {"mean":np.nanmean(metrics["train_precision"]), "std":np.nanstd(metrics["train_precision"]), "values":metrics["train_precision"]},
        "train_recall": {"mean":np.nanmean(metrics["train_recall"]), "std":np.nanstd(metrics["train_recall"]), "values":metrics["train_recall"]},
        "train_f": {"mean":np.nanmean(metrics["train_f"]), "std":np.nanstd(metrics["train_f"]), "values":metrics["train_f"]},
        "val_loss": {"mean":np.nanmean(metrics["val_loss"]), "std":np.nanstd(metrics["val_loss"]), "values":metrics["val_loss"]},
        "val_precision":{"mean":np.nanmean(metrics["val_precision"]), "std":np.nanstd(metrics["val_precision"]), "values":metrics["val_precision"]},
        "val_recall": {"mean":np.nanmean(metrics["val_recall"]), "std":np.nanstd(metrics["val_recall"]), "values":metrics["val_recall"]},
        "val_f": {"mean":np.nanmean(metrics["val_f"]), "std":np.nanstd(metrics["val_f"]), "values":metrics["val_f"]},
        "test_loss": {"mean":np.nanmean(metrics["test_loss"]), "std":np.nanstd(metrics["test_loss"]), "values":metrics["test_loss"]},
        "test_precision":{"mean":np.nanmean(metrics["test_precision"]), "std":np.nanstd(metrics["test_precision"]), "values":metrics["test_precision"]},
        "test_recall": {"mean":np.nanmean(metrics["test_recall"]), "std":np.nanstd(metrics["test_recall"]), "values":metrics["test_recall"]},
        "test_f": {"mean":np.nanmean(metrics["test_f"]), "std":np.nanstd(metrics["test_f"]), "values":metrics["test_f"]},
        "test_predictions": metrics["test_predictions"],
        "weights":boost
    }

    if debug:
        print("Iteration : %d Lambda : %.2f, Threshold : %.2f" % (i, lamda, threshold))
        print("Training loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["train_loss"]["mean"], results["train_loss"]["std"],
               results["train_precision"]["mean"], results["train_precision"]["std"],
               results["train_recall"]["mean"], results["train_recall"]["std"],
               results["train_f"]["mean"], results["train_f"]["std"]))
        print("Validation loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["val_loss"]["mean"], results["val_loss"]["std"],
               results["val_precision"]["mean"], results["val_precision"]["std"],
               results["val_recall"]["mean"], results["val_recall"]["std"],
               results["val_f"]["mean"], results["val_f"]["std"]))
        print("Test loss : %.2f+/-%.2f, precision : %.2f+/-%.2f, recall : %.2f+/-%.2f, F : %.2f+/-%.2f" % 
              (results["test_loss"]["mean"], results["test_loss"]["std"],
               results["test_precision"]["mean"], results["test_precision"]["std"],
               results["test_recall"]["mean"], results["test_recall"]["std"],
               results["test_f"]["mean"], results["test_f"]["std"]))

    return results


In [None]:
##
## BOOTSTRAP/BOOSTING TRAINING WITH LOO
##

print "Training",
predictions = np.array([]).reshape(0,2)
bstrapTrainingSet = training_set
threshold = .0
_, test_y = split(test_set, NUM_FEATURES)
initialTestValue = 394

bstrapTrainingSet = bstrapTrainingSet.append(pandas.DataFrame(test_set.values[:initialTestValue,:]))
#print bstrapTrainingSet

try:
    for i in range(initialTestValue,len(test_set),2):

        test_rows = pandas.DataFrame(test_set.values[[i, i+1],:])
        success = False
        retry = 0
        while ((~success) & (retry<5)):
            try:
                ## CHOOSE BOOTSTRAP OR BOOST
                results = boostingTrain(bstrapTrainingSet, test_rows, .01, 20, False)
                #results = bootstrapTrain(bstrapTrainingSet, test_rows, .01, 20, threshold, False)
                predictions =  np.concatenate([predictions, np.nanmean(results["test_predictions"], axis=0)])    
                success = True
            except ValueError:  
                log.emit_log( {'app_name': 'Experiment2','type': 'error','message': "ValueError - Retrying..."})
                retry = retry + 1
                
            
        bstrapTrainingSet = bstrapTrainingSet.append(test_rows)
        # Window
        bstrapTrainingSet = bstrapTrainingSet[-len(training_set):]

        res = evaluate(predictions, test_y[initialTestValue:initialTestValue+len(predictions),:], threshold)
        msg = str("Results after %d iterations, %.2f precision, %.2f recall at %.2f threshold" % (i+2, res[0], res[1], threshold))
        print "."
        print msg

        log.emit_log( {'app_name': 'Experiment2','type': 'result','message': msg})
        # 15/02/18 - Bluemix no longer using logmet for metrics
        #metrics.emit_metric(name='Experiment2.precision', value=res[0])
        #metrics.emit_metric(name='Experiment2.recall', value=res[1])

        pandas.DataFrame(predictions).to_csv("results_new2.csv", header=False, index=False)
        put_file('Experiment2', "results_new2.csv")

        # Try to free memory
        gc.collect()
except:
    print("Unexpected error: %s" % sys.exc_info()[0])
    log.emit_log( {'app_name': 'Experiment2','type': 'error','message': str("Unexpected error: %s" % sys.exc_info()[0])})
    raise
    

Training . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Results after 396 iterations, 0.00 precision, 1.00 recall at 0.00 threshold

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Results after 398 iterations, 0.50 precision, 1.00 recall at 0.00 threshold

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Results after 400 iterations, 0.67 precision, 1.00 recall at 0.00 threshold

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Results after 402 iterations, 0.75 precision, 1.00 recall at 0.00 threshold

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Results after 404 iterations, 0.60 precision, 1.00 recall at 0.00 thresho

In [11]:
##
## BOOTSTRAP TRAINING
##

print "Training",
_, test_y = split(test_set, NUM_FEATURES)
results = bootstrapTrain(training_set, test_set, .1, 20, .0, True)
predictions2 =  np.nanmean(results["test_predictions"], axis=0)
evaluate(predictions2, test_y, .5)

Training . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . Iteration : 19 Lambda : 0.10, Threshold : 0.00
Training loss : 0.61+/-0.02, precision : 0.66+/-0.02, recall : 1.00+/-0.00, F : 0.79+/-0.02
Validation loss : 0.72+/-0.02, precision : 0.54+/-0.02, recall : 1.00+/-0.00, F : 0.70+/-0.01
Test loss : 0.73+/-0.02, precision : 0.54+/-0.02, recall : 1.00+/-0.00, F : 0.70+/-0.02


(0.54883718, 1.0, 0.70870868836909162, array([[ 0.56349051,  0.43598217],
        [ 0.55803794,  0.44130236],
        [ 0.5493713 ,  0.45034656],
        [ 0.55059552,  0.4489843 ],
        [ 0.49326783,  0.50540805],
        [ 0.45126668,  0.54793656],
        [ 0.65567821,  0.34172443],
        [ 0.60610175,  0.39114138],
        [ 0.48215455,  0.51353896],
        [ 0.46498594,  0.53102314],
        [ 0.37359807,  0.62640458],
        [ 0.42384404,  0.57626975],
        [ 0.5093075 ,  0.49005389],
        [ 0.55047226,  0.44859108],
        [ 0.49225932,  0.50419605],
        [ 0.51646715,  0.47996092],
        [ 0.79734296,  0.20354548],
        [ 0.62736219,  0.3730047 ],
        [ 0.6468395 ,  0.35361773],
        [ 0.62354386,  0.37692541],
        [ 0.49336094,  0.50491095],
        [ 0.56665027,  0.43155614],
        [ 0.59447235,  0.40554476],
        [ 0.68440908,  0.31626263],
        [ 0.60287625,  0.39587271],
        [ 0.35190681,  0.64677334],
        [ 0.37363994,  0.6