In [6]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys
import tempfile
import tensorflow as tf
import numpy as np

# pylint: disable=g-backslash-continuation
from tensorflow.contrib.learn.python.learn\
        import metric_spec
from tensorflow.contrib.learn.python.learn.estimators\
        import estimator
from tensorflow.contrib.tensor_forest.client\
        import eval_metrics
from tensorflow.contrib.tensor_forest.client\
        import random_forest
from tensorflow.contrib.tensor_forest.python\
        import tensor_forest

# Cleaned Data sets
BCW_TRAINING = "breast_cancer_wisconsin_orig_train_filt.csv"
BCW_TEST = "breast_cancer_wisconsin_orig_test.csv"

sess = tf.Session()

model_dir="/tmp/bcwRF_model"
data_dir="/tmp/data"
train_steps=1000
batch_size=100
num_trees=50
max_nodes=100
use_training_loss=False


def build_rf(model_dir):
    params = tensor_forest.ForestHParams(
        num_classes=2, num_features=9,
        num_trees=num_trees, max_nodes=max_nodes)
    graph_builder_class=tensor_forest.RandomForestGraphs
    if use_training_loss:
        graph_builder_class = tensor_forest.TrainingLossForest
    # Use the SKCompat wrapper, which gives us a convenient way to split
    # in-memory data like MNIST into batches.
    return estimator.SKCompat(random_forest.TensorForestEstimator(
        params, graph_builder_class=graph_builder_class,
        model_dir=model_dir))

def get_train_inputs():
    x = tf.constant(training_set.data)
    y = tf.constant(training_set.target)

    return x, y
def get_test_inputs():
    x = tf.constant(test_set.data)
    y = tf.constant(test_set.target)

    return x, y

In [7]:
# Create and train model
model_dir = tempfile.mkdtemp() if not model_dir else model_dir
print('model directory = %s' % model_dir)

est = build_estimator(model_dir)

# Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=BCW_TRAINING,
    target_dtype=np.int,
    features_dtype=np.float32)

test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=BCW_TEST,
    target_dtype=np.int,
    features_dtype=np.float32)

# Specify that all features have real-value data
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=9)]

x, y = get_train_inputs()
x, y = sess.run([x, y])

model directory = /tmp/bcwRF_model
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x10fce7a10>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': ''}


In [8]:
est.fit(x=x, y=y,
      batch_size=batch_size)

INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 3, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 100, 'num_features': 9, 'num_trees': 50, 'num_splits_to_consider': 9, 'base_random_seed': 0, 'num_outputs': 1, 'dominate_fraction': 0.99, 'max_fertile_nodes': 50, 'bagged_num_features': 9, 'dominate_method': 'bootstrap', 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 2}
INFO:tensorflow:training graph for tree: 0
INFO:tensorflow:training graph for tree: 1
INFO:tensorflow:training graph for tree: 2
INFO:tensorflow:training graph for tree: 3
INFO:tensorflow:training graph for tree: 4
INFO:tensorflow:training graph for tree: 5
INFO:tensorflow:training graph for tree: 6
INFO:tensorflow:training graph for tree: 7
INFO:tensorflow:training graph for tree: 8
INFO:tensorflow:training graph for tree: 9
INFO

SKCompat()

In [9]:
vx, vy = get_test_inputs()
vx, vy = sess.run([vx, vy])

metric_name = 'accuracy'
metric = {metric_name:
    metric_spec.MetricSpec(
        eval_metrics.get_metric(metric_name),
        prediction_key=eval_metrics.get_prediction_key(metric_name))}

results = est.score(x=vx, y=vy,
                    batch_size=batch_size,
                    metrics=metric)
for key in sorted(results):
    print('%s: %s' % (key, results[key]))
sess.close()

INFO:tensorflow:Constructing forest with params = 
INFO:tensorflow:{'valid_leaf_threshold': 1, 'split_after_samples': 250, 'num_output_columns': 3, 'feature_bagging_fraction': 1.0, 'split_initializations_per_input': 1, 'bagged_features': None, 'min_split_samples': 5, 'max_nodes': 100, 'num_features': 9, 'num_trees': 50, 'num_splits_to_consider': 9, 'base_random_seed': 0, 'num_outputs': 1, 'dominate_fraction': 0.99, 'max_fertile_nodes': 50, 'bagged_num_features': 9, 'dominate_method': 'bootstrap', 'bagging_fraction': 1.0, 'regression': False, 'num_classes': 2}
INFO:tensorflow:Starting evaluation at 2017-06-06-15:51:46
INFO:tensorflow:Restoring parameters from /tmp/bcwRF_model/model.ckpt-281
INFO:tensorflow:Finished evaluation at 2017-06-06-15:51:55
INFO:tensorflow:Saving dict for global step 281: accuracy = 0.979592, global_step = 281, loss = -98.0
accuracy: 0.979592
global_step: 281
loss: -98.0
