# Learn-to-Rank Keras Example

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_ranking as tfr
import glob

 The versions of TensorFlow you are currently using is 2.5.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
tf.__version__

'2.5.0'

In [3]:
tfr.__version__

'0.4.0.dev'

In [4]:
# !pip install tensorflow_ranking --upgrade

### Keras example

https://github.com/tensorflow/ranking/blob/master/tensorflow_ranking/examples/keras/keras_dnn_tfrecord.py

In [5]:
from typing import Dict, Tuple
from absl import app
from absl import flags
import tensorflow as tf
import tensorflow_ranking as tfr

In [6]:
# Arguments
num_features = 136

# The document relevance label.
_LABEL_FEATURE = "utility"

# Padding labels are set negative so that the corresponding examples can be
# ignored in loss and metrics.
_PADDING_LABEL = -1.
_MASK = "example_list_mask"

def _create_feature_spec() -> Tuple[Dict[str, tf.io.FixedLenFeature], Dict[
    str, tf.io.FixedLenFeature], Tuple[str, tf.io.FixedLenFeature]]:
    """Create context and example feature spec for data parsing.
    Returns:
    (context feature specs, example feature specs, label spec).
    """
    context_feature_spec = {}
    example_feature_spec = {
      "custom_features_{}".format(i + 1):
      tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=0.0)
      for i in range(0, num_features)
    }
    label_spec = (_LABEL_FEATURE,
                tf.io.FixedLenFeature(
                    shape=(1,), dtype=tf.float32, default_value=_PADDING_LABEL))
    return context_feature_spec, example_feature_spec, label_spec

In [7]:
context_feature_spec, example_feature_spec, label_spec = _create_feature_spec()

In [8]:
context_feature_spec

{}

In [9]:
# example_feature_spec

In [10]:
example_feature_spec['custom_features_1']

FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=0.0)

In [11]:
label_spec

('utility', FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=-1.0))

In [12]:
from tensorflow_ranking.python.keras.pipeline import DatasetHparams

In [13]:
# Arguments
train_input_pattern = "/data/train_numerical_elwc.tfrecord"
valid_input_pattern = "/data/vali_numerical_elwc.tfrecord"
train_batch_size = 32
valid_batch_size = 32
list_size = None
convert_labels_to_binary = False

# Get dataset hyperparams
dataset_hparams = tfr.python.keras.pipeline.DatasetHparams(
    train_input_pattern=train_input_pattern,
    valid_input_pattern=valid_input_pattern,
    train_batch_size=train_batch_size,
    valid_batch_size=valid_batch_size,
    list_size=list_size,
    dataset_reader=tf.data.TFRecordDataset,
    convert_labels_to_binary=convert_labels_to_binary)

In [14]:
dataset_hparams

DatasetHparams(train_input_pattern='/data/train_numerical_elwc.tfrecord', valid_input_pattern='/data/vali_numerical_elwc.tfrecord', train_batch_size=32, valid_batch_size=32, list_size=None, valid_list_size=None, dataset_reader=<class 'tensorflow.python.data.ops.readers.TFRecordDatasetV2'>, convert_labels_to_binary=False)

In [15]:
# dir(dataset_hparams)

In [16]:
# Arguments
model_dir = "/tmp/output" # f"/outputs/tf-ranking-{datetime.now().strftime('%d-%m-%Y-%H-%M-%S')}"
num_epochs = 3
num_train_steps = 1000
num_valid_steps = 100
loss = "approx_ndcg_loss"
optimizer = "adagrad"
learning_rate = 0.005
steps_per_execution = 10
export_best_model = False
strategy = "MirroredStrategy"

pipeline_hparams = tfr.keras.pipeline.PipelineHparams(
      model_dir=model_dir,
      num_epochs=num_epochs,
      steps_per_epoch=(num_train_steps // num_epochs),
      validation_steps=num_valid_steps,
      loss=loss,
      loss_reduction=tf.losses.Reduction.AUTO,
      optimizer=optimizer,
      learning_rate=learning_rate,
      steps_per_execution=steps_per_execution,
      export_best_model=export_best_model,
      strategy=strategy)

In [17]:
pipeline_hparams

PipelineHparams(model_dir='/tmp/output', num_epochs=3, steps_per_epoch=333, validation_steps=100, learning_rate=0.005, loss='approx_ndcg_loss', loss_reduction='auto', optimizer='adagrad', loss_weights=None, steps_per_execution=10, automatic_reduce_lr=False, use_weighted_metrics=False, export_best_model=False, best_exporter_metric_higher_better=False, best_exporter_metric='loss', strategy='MirroredStrategy', tpu='')

In [18]:
# Arguments
use_log1p = False

preprocess_dict = {}
if use_log1p:
    preprocess_dict = {
        fname: lambda t: tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
        for fname in example_feature_spec.keys()
    }

In [19]:
hidden_layer_dims = "64,32,16"
use_batch_norm = True
batch_norm_moment = 0.99
dropout = 0.4

dnn_scorer = tfr.keras.model.DNNScorer(
    hidden_layer_dims=map(int, hidden_layer_dims.split(",")),
    output_units=1,
    activation=tf.nn.relu,
    input_batch_norm=use_batch_norm,
    use_batch_norm=use_batch_norm,
    batch_norm_moment=batch_norm_moment,
    dropout=dropout)

In [20]:
dnn_scorer

<tensorflow_ranking.python.keras.model.DNNScorer at 0x7f3b12d39940>

In [21]:
model_builder = tfr.keras.model.ModelBuilder(
    input_creator=tfr.keras.model.FeatureSpecInputCreator(
      context_feature_spec, 
        example_feature_spec),
    preprocessor=tfr.keras.model.PreprocessorWithSpec(preprocess_dict),
    scorer=dnn_scorer,
    mask_feature_name=_MASK,
    name="keras_dnn_model")

In [22]:
model_builder

<tensorflow_ranking.python.keras.model.ModelBuilder at 0x7f3b12d28190>

In [23]:
# dir(model_builder)

In [24]:
ranking_pipeline = tfr.keras.pipeline.SimplePipeline(
    model_builder=model_builder,
    dataset_builder=tfr.keras.pipeline.SimpleDatasetBuilder(
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        mask_feature_name=_MASK,
        label_spec=label_spec,
        hparams=dataset_hparams),
    hparams=pipeline_hparams)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [25]:
ranking_pipeline.train_and_validate(verbose=1)

Epoch 1/3
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


InvalidArgumentError:  Name: <unknown>, Key: utility, Index: 0.  Data types don't match. Data type: int64 but expected type: float
	 [[{{node ParseExample/ParseExampleV2}}]]
	 [[MultiDeviceIteratorGetNextFromShard]]
	 [[RemoteCall]]
	 [[while/body/_1/while/IteratorGetNext]] [Op:__inference_train_function_24023]

Function call stack:
train_function
