In [None]:
from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib.tensor_forest.python import tensor_forest

# Ignore all GPUs, tf random forest does not benefit from it.
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

from create_train_test_val_maps import *

In [None]:
selected_codes = [0, 45021, 44004, 43004, 45008, 45002, 45007]

In [None]:
# get global index from (row, col) index
def sub2ind(array_shape, row, col):
    ind = row*array_shape[1] + col
    if row < 0 or row >= array_shape[0]:
        ind = -1
    if col < 0 or col >= array_shape[1]:
        ind = -1
    return ind

# get (row, col) index from global index
def ind2sub(array_shape, ind):
    row = int(ind) / array_shape[1]
    col = ind % array_shape[1]
    if ind < 0:
        row = -1
        col = -1
    if ind >=  array_shape[0]*array_shape[1]:
        row = -1
        col = -1
    return (row, col)

def one_hot_labels(labels, num_classes):
    one_hot_labels = np.zeros((labels.size, num_classes))
    one_hot_labels[np.arange(labels.size),labels.astype(int)] = 1
    return one_hot_labels

def get_data(selected_codes, data_map, num_windows, window_size):
    X = []
    Y = []
    for vehicleID in data_map.keys():
        for ATA6code in data_map[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for window in data_map[vehicleID][ATA6code].keys():
                for sequence in data_map[vehicleID][ATA6code][window]:
                    Y.extend(list(np.ones((window_size))*sub2ind((len(selected_codes),num_windows),selected_codes.index(ATA6code), window)))
                    X.extend(sequence.as_matrix())
    X = np.array(X).astype(float)
    Y = np.array(Y).astype(int)
    return X, Y

In [None]:
num_epochs = 2
batch_size = 100
num_classes = 70
num_features = 61
num_trees = 70
max_nodes = 10000

data_sizes = [(10, 10)]#, (5,20), (5,50)]
trees = [10, 50, 70, 100, 200]

train_acc_all = {}
val_acc_all = {}
train_loss_all = {}

for num_windows, window_size in data_sizes:
    train_combined, val_combined, test_combined = load_maps(num_windows, window_size)

    train_acc_all[(num_windows,window_size)] = {}
    val_acc_all[(num_windows,window_size)] = {}
    train_loss_all[(num_windows,window_size)] = {}
    
    train_acc_trees = train_acc_all[(num_windows,window_size)]
    train_loss_trees = train_loss_all[(num_windows,window_size)]
    val_acc_trees = val_acc_all[(num_windows,window_size)]
    
    X_train, labels_train = get_data(selected_codes, train_combined, num_windows, window_size)
    X_val, labels_val = get_data(selected_codes, val_combined, num_windows, window_size)
    X_test, labels_test = get_data(selected_codes, test_combined, num_windows, window_size)
    
    num_iters = max(X_train.shape[0]//batch_size,1)
    
    for num_trees in trees:
        print("Training tree {} with num {} and size {}".format(num_trees, num_windows, window_size))

        train_acc_trees[num_trees] = []
        train_loss_trees[num_trees] = []

        train_acc = train_acc_trees[num_trees]
        train_loss = train_loss_trees[num_trees]

        tf.reset_default_graph()

        # Input and Target data
        X = tf.placeholder(tf.float32, shape=[None, num_features])
        # For random forest, labels must be integers (the class id)
        Y = tf.placeholder(tf.int32, shape=[None])

        # Random Forest Parameters
        hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                              num_features=num_features,
                                              num_trees=num_trees,
                                              max_nodes=max_nodes).fill()

        # Build the Random Forest
        forest_graph = tensor_forest.RandomForestGraphs(hparams)
        # Get training graph and loss
        train_op = forest_graph.training_graph(X, Y)
        loss_op = forest_graph.training_loss(X, Y)

        # Measure the accuracy
        infer_op = forest_graph.inference_graph(X)
        correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # Initialize the variables (i.e. assign their default value)
        init_vars = tf.global_variables_initializer()

        # Start TensorFlow session
        sess = tf.Session()

        # Run the initializer
        sess.run(init_vars)

        # Training
        for e in range(num_epochs):
            for i in range(num_iters):
                start = i*batch_size
                end = (i+1)*batch_size
                batch_x = X_train[start:end]
                batch_y = labels_train[start:end]
                _, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
                if i % 100 == 0:
                    acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
                    train_acc.append([i,acc])
                    train_loss.append([i,l])
                    #print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))

        val_acc_trees[num_trees] = sess.run(accuracy_op, feed_dict={X: X_val, Y: labels_val})