In [1]:
import tensorflow as tf
import numpy as np

from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
from tensorflow.contrib.boosted_trees.proto.learner_pb2 import LearnerConfig
from tensorflow.contrib.learn import learn_runner

tf.logging.set_verbosity(tf.logging.INFO)

tf.VERSION

'1.4.1'

In [2]:
help(GradientBoostedDecisionTreeClassifier)

Help on class GradientBoostedDecisionTreeClassifier in module tensorflow.contrib.boosted_trees.estimator_batch.estimator:

class GradientBoostedDecisionTreeClassifier(tensorflow.contrib.learn.python.learn.estimators.estimator.Estimator)
 |  An estimator using gradient boosted decision trees.
 |  
 |  Method resolution order:
 |      GradientBoostedDecisionTreeClassifier
 |      tensorflow.contrib.learn.python.learn.estimators.estimator.Estimator
 |      tensorflow.contrib.learn.python.learn.estimators.estimator.BaseEstimator
 |      tensorflow.contrib.learn.python.learn.estimators._sklearn._BaseEstimator
 |      tensorflow.contrib.learn.python.learn.evaluable.Evaluable
 |      tensorflow.contrib.learn.python.learn.trainable.Trainable
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, learner_config, examples_per_layer, n_classes=2, num_trees=None, feature_columns=None, weight_column_name=None, model_dir=None, config=None, label_keys=None, feature_engineering

In [3]:
def input_fn(dataset_split,
             batch_size,
             capacity=10000,
             min_after_dequeue=3000):

    images_batch, labels_batch = tf.train.shuffle_batch(
        tensors=[dataset_split.images,
                 dataset_split.labels.astype(np.int32)],
        batch_size=batch_size,
        capacity=capacity,
        min_after_dequeue=min_after_dequeue,
        enqueue_many=True,
        num_threads=4)
    features_map = {"images": images_batch}
    return features_map, labels_batch

def gbt_classifier(output_dir,
                   learning_rate,
                   num_classes,
                   regularization_l1,
                   regularization_l2,
                   max_tree_depth,
                   examples_per_layer,
                   num_trees):

    learner_config = LearnerConfig()

    learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
    learner_config.num_classes = num_classes
    learner_config.regularization.l1 = regularization_l1
    learner_config.regularization.l2 = regularization_l2 / examples_per_layer
    learner_config.constraints.max_tree_depth = max_tree_depth

    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    # Create a TF Boosted trees estimator that can take in custom loss.
    return GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=examples_per_layer,
        model_dir=output_dir,
        num_trees=num_trees,
        center_bias=False,
        config=run_config)


def gbt_mnist(output_dir,
              train_batch_size,
              eval_batch_size,
              num_eval_steps,
              gbt_params):

    data = tf.contrib.learn.datasets.mnist.load_mnist()
    train_input_fn = lambda: input_fn(data.train, train_batch_size)
    eval_input_fn = lambda: input_fn(data.validation, eval_batch_size)

    return tf.contrib.learn.Experiment(
        estimator=gbt_classifier(output_dir, **gbt_params),
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=None,
        eval_steps=num_eval_steps,
        eval_metrics=None)

In [4]:
gbt_params = dict(
    learning_rate=0.1,
    num_classes=10,
    regularization_l1=0.,
    regularization_l2=1.,
    max_tree_depth=4,
    examples_per_layer=1000,
    num_trees=10,
)

experiment_fn = lambda output_dir: gbt_mnist(output_dir,
                                             train_batch_size=1000,
                                             eval_batch_size=1000,
                                             num_eval_steps=1,
                                             gbt_params=gbt_params)

learn_runner.run(
    experiment_fn=experiment_fn,
    output_dir='gbt_minist',
    schedule="train_and_evaluate")

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa2d48d4e48>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 300, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'gbt_minist'}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.


NameError: name 'xrange' is not defined