In [1]:
import six
from tensorflow.python.framework import dtypes

import tensorflow as tf
import tensorflow_ranking as tfr

tf.enable_eager_execution()
tf.executing_eagerly()

True

In [2]:
from tensorflow.python.lib.io.tf_record import TFRecordCompressionType, TFRecordOptions

_ZLIB_COMPRESSION_TYPE = TFRecordOptions.get_compression_type_string(TFRecordOptions(TFRecordCompressionType.ZLIB))

_N_FEATURES = 7836 # this is probably wrong

_LOSS = "pairwise_logistic_loss"

_LABEL_KEY = "labels"

_BATCH_SIZE=1
_LIST_SIZE=4
_HIDDEN_LAYER_DIMS=["20", "10"]

_INPUT_PATH = '/Users/benl/dev/ranking/tensorflow_ranking/examples/data/features-v16-sequence-train.tfrecords.deflate'

In [3]:
context_feature_spec={
    'context_indices': tf.io.VarLenFeature(dtype=dtypes.int64),
    'context_values': tf.io.VarLenFeature(dtype=dtypes.float32)
}
sequence_feature_spec={
    'indices':tf.io.VarLenFeature(dtype=dtypes.int64),
    'values':tf.io.VarLenFeature(dtype=dtypes.float32),
    'labels':tf.io.VarLenFeature(dtype=dtypes.int64),
    'weights':tf.io.VarLenFeature(dtype=dtypes.float32)
}


In [1]:
def make_input_fn(path):
    
    def _split_labels_and_features(features_dict):
        labels = tf.sparse.to_dense(features_dict['labels'])
        flat_labels = tf.to_float(tf.layers.flatten(labels))
        del features_dict['labels']
        print('here is the feature dict',features_dict)
        return features_dict, flat_labels
    
    def _input_fn():
        batched_dataset = tfr.data.read_batched_sequence_example_dataset(
            path, 
            _BATCH_SIZE, 
            _LIST_SIZE, 
            context_feature_spec, 
            sequence_feature_spec,
            reader_args=[_ZLIB_COMPRESSION_TYPE]
        )
    
        new_dataset = batched_dataset.map(_split_labels_and_features)
        return new_dataset
    
    return _input_fn

In [None]:
# copied from tf_ranking_libsvm example
# TODO: get dense shape somehow

def make_score_fn():
    
    def _score_fn(context_features, group_features, mode, params, config):        
        # ok this is the challenging part
        # this might be right
        example_input = [
            tf.sparse_to_dense(
                group_features["indices"], 
                [_LIST_SIZE, _N_FEATURES], 
                group_features["values"]
            )
        ]
        input_layer = tf.concat(example_input, 1)
        
        cur_layer = input_layer
        for i, layer_width in enumerate(int(d) for d in _HIDDEN_LAYER_DIMS):
            cur_layer = tf.layers.dense(
                cur_layer,
                units=layer_width,
                activation="tanh"
            )

        logits = tf.layers.dense(cur_layer, units=4)
        return logits

    return _score_fn

In [4]:
def eval_metric_fns():
  """Returns a dict from name to metric functions.

  This can be customized as follows. Care must be taken when handling padded
  lists.

  def _auc(labels, predictions, features):
    is_label_valid = tf_reshape(tf.greater_equal(labels, 0.), [-1, 1])
    clean_labels = tf.boolean_mask(tf.reshape(labels, [-1, 1], is_label_valid)
    clean_pred = tf.boolean_maks(tf.reshape(predictions, [-1, 1], is_label_valid)
    return tf.metrics.auc(clean_labels, tf.sigmoid(clean_pred), ...)
  metric_fns["auc"] = _auc

  Returns:
    A dict mapping from metric name to a metric function with above signature.
  """
  metric_fns = {}
  metric_fns.update({
      "metric/ndcg@%d" % topn: tfr.metrics.make_ranking_metric_fn(
          tfr.metrics.RankingMetricKey.NDCG, topn=topn)
      for topn in [1, 3, 5, 10]
  })

  return metric_fns

In [33]:
def get_simple_transform_fn():
    separate_features = tfr.feature.make_identity_transform_fn(context_feature_spec.keys())
    
    def _transform_fn(features, mode):
        context_features, sequence_features = separate_features(features, mode)

        dense_context_features = {}
        dense_sequence_features = {}
        for name, values in six.iteritems(context_features):
            dense_context_features[name] = tf.sparse.to_dense(values)

        for name, values in six.iteritems(sequence_features):
            dense_sequence_features[name] = tf.sparse.to_dense(values)
        
        return dense_context_features, dense_sequence_features

    return _transform_fn


def get_ambitious_transform_fn():
    separate_features = tfr.feature.make_identity_transform_fn(context_feature_spec.keys())
    
    def _transform_fn(features, mode):
        context_features, sequence_features = separate_features(features, mode)

        context_indices = tf.sparse.to_dense(context_features['context_indices'])
        context_values = tf.slice(
            tf.sparse.to_dense(context_features['context_values']),
            [0,0],
            [1,-1]
        )[0]
        
        
        
        print('context_indices',context_indices)
        print('context_values',context_values)
        
        ctx_sparse_tensor = tf.sparse.SparseTensor(
            indices = context_indices,
            values = context_values,
            dense_shape = [_N_FEATURES]
        )
        
        new_context_features = {
            "context_features": ctx_sparse_tensor
        }
        
        sequence_indices = tf.sparse.to_dense(sequence_features['indices'])
        sequence_values = tf.sparse.to_dense(sequence_features['values'])
        sequence_weights = tf.sparse.to_dense(sequence_features['weights'])
        
        print('sequence_indices',sequence_indices)
        print('sequence_values',sequence_values)
        
        sequence_sparse_tensor = tf.sparse.SparseTensor(
            indices = sequence_indices,
            values = sequence_values,
            dense_shape = [_LIST_SIZE, _N_FEATURES]
        )
        new_sequence_features = {
            "sequence_features": sequence_sparse_tensor,
            "sequence_weights": sequence_weights
        }
        
        return new_context_features, new_sequence_features
    
    return _transform_fn

In [22]:
def get_estimator(hparams):
  """Create a ranking estimator.

  Args:
    hparams: (tf.contrib.training.HParams) a hyperparameters object.

  Returns:
    tf.learn `Estimator`.
  """
  def _train_op_fn(loss):
    """Defines train op used in ranking head."""
    return tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.train.get_global_step(),
        learning_rate=hparams.learning_rate,
        optimizer="Adagrad")

  ranking_head = tfr.head.create_ranking_head(
      loss_fn=tfr.losses.make_loss_fn(_LOSS),
      eval_metric_fns=eval_metric_fns(),
      train_op_fn=_train_op_fn)

  return tf.estimator.Estimator(
      model_fn=tfr.model.make_groupwise_ranking_fn(
          group_score_fn=make_score_fn(),
          group_size=4,
          transform_fn=get_ambitious_transform_fn(),
          ranking_head=ranking_head),
      params=hparams)

In [9]:
# copied from tf_ranking_libsvm example

# TODO: get dense shape somehow

def make_score_fn():
    
    def _score_fn(context_features, group_features, mode, params, config):
        {'indices': Tensor, 'values': Tensor}
        {'indices': []}
        # ok this is the challenging part
        example_input = [
            tf.sparse_to_dense(
                group_features["indices"], 
                [_LIST_SIZE, _N_FEATURES], 
                group_features["values"]
            )
        ]
        input_layer = tf.concat(example_input, 1)
        
        cur_layer = input_layer
        for i, layer_width in enumerate(int(d) for d in _HIDDEN_LAYER_DIMS):
            cur_layer = tf.layers.dense(
                cur_layer,
                units=layer_width,
                activation="tanh"
            )

        logits = tf.layers.dense(cur_layer, units=4)
        return logits

    return _score_fn

In [36]:
hparams = tf.contrib.training.HParams(learning_rate=0.05)
ranker = get_estimator(hparams)
ranker.train(input_fn=make_input_fn(_INPUT_PATH), steps=100)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/95/g4r2021d7hxbhwbhhpmhc9th0000gp/T/tmpvzz7s2z7', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12461df28>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
here is the feature dict {'context_indices': <tensorflow.python.framework.sparse_tensor.SparseTensor

ValueError: Shape (?, ?, ?) must have rank 2