## Getting tensorflow libraries

In [None]:
from __future__ import print_function
import numpy as np
import sklearn
import pandas as pd
import tensorflow as tf
from tensorflow.contrib.tensor_forest.python import tensor_forest
from tensorflow.python.ops import resources
from IPython.display import clear_output
from matplotlib import pyplot as plt



## Import & examine the data

In [None]:
data = pd.read_csv('../sample10k.csv')
data.describe()

In [None]:
data.f1.hist(bins=20)
plt.show()
data.target.value_counts().plot(kind='barh')
plt.show()

## Split the Data

In [None]:
from sklearn.model_selection import train_test_split
input_x = data.iloc[:, 2:].values
input_y = data.iloc[:, 1].values


X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, test_size = 0.40, random_state = 0)
print("Size of training:   ", len(X_train))
print("Size of evaluation: ",len(X_test))
print("\nFirst row of training predictors (numpy) array:")
print(X_train[1])

## Set up the tensorflow RF graph 

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Parameters
num_steps = 50 # Total steps to train
num_classes = 2 
num_features = 12
num_trees = 500 
max_nodes = 20 

# Input and Target placeholders 
X = tf.placeholder(tf.float32, shape=[None, num_features])
Y = tf.placeholder(tf.int64, shape=[None])

# Random Forest Parameters
hparams = tensor_forest.ForestHParams(num_classes=num_classes, 
  num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill()

# Build the Random Forest
#tf.reset_default_graph()
forest_graph = tensor_forest.RandomForestGraphs(hparams)

# Get training graph and loss
train_op = forest_graph.training_graph(X, Y)
loss_op = forest_graph.training_loss(X, Y)



In [None]:
# Measure the accuracy
infer_op, _, _ = forest_graph.inference_graph(X)
correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


## Training the Model

In [None]:
# Initialize the variables (i.e. assign their default value) and forest resources
init_vars = tf.group(tf.global_variables_initializer(),
    resources.initialize_resources(resources.shared_resources()))

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
sess.run(init_vars)

for i in range(1, num_steps + 1):
    _, l = sess.run([train_op, loss_op], feed_dict={X: X_train, Y: y_train})
    if i % 10 == 0 or i == 1:
        acc = sess.run(accuracy_op, feed_dict={X: X_train, Y: y_train})
        print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))


## Evaluating Accuracy on holdout set

In [None]:
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: X_test, Y: y_test}))
