In [2]:
import tensorflow as tf
import tensorflow_ranking as tfr
from dataset_builder import ZlibDatasetBuilder

In [3]:
_LABEL_FEATURE = 'label'
_MASK = '_mask'
_PADDING_LABEL = -1.0
_FEATURE_NAMES = {
    1: "covered_query_term_number_body",
    2: "covered_query_term_number_anchor",
    3: "covered_query_term_number_title",
    4: "covered_query_term_number_url",
    5: "covered_query_term_number_whole_document",
    6: "covered_query_term_ratio_body",
    7: "covered_query_term_ratio_anchor",
    8: "covered_query_term_ratio_title",
    9: "covered_query_term_ratio_url",
    10: "covered_query_term_ratio_whole_document",
    11: "stream_length_body",
    12: "stream_length_anchor",
    13: "stream_length_title",
    14: "stream_length_url",
    15: "stream_length_whole_document",
    16: "idf_body",
    17: "idf_anchor",
    18: "idf_title",
    19: "idf_url",
    20: "idf_whole_document",
    21: "sum_of_term_frequency_body",
    22: "sum_of_term_frequency_anchor",
    23: "sum_of_term_frequency_title",
    24: "sum_of_term_frequency_url",
    25: "sum_of_term_frequency_whole_document",
    26: "min_of_term_frequency_body",
    27: "min_of_term_frequency_anchor",
    28: "min_of_term_frequency_title",
    29: "min_of_term_frequency_url",
    30: "min_of_term_frequency_whole_document",
    31: "max_of_term_frequency_body",
    32: "max_of_term_frequency_anchor",
    33: "max_of_term_frequency_title",
    34: "max_of_term_frequency_url",
    35: "max_of_term_frequency_whole_document",
    36: "mean_of_term_frequency_body",
    37: "mean_of_term_frequency_anchor",
    38: "mean_of_term_frequency_title",
    39: "mean_of_term_frequency_url",
    40: "mean_of_term_frequency_whole_document",
    41: "variance_of_term_frequency_body",
    42: "variance_of_term_frequency_anchor",
    43: "variance_of_term_frequency_title",
    44: "variance_of_term_frequency_url",
    45: "variance_of_term_frequency_whole_document",
    46: "sum_of_stream_length_normalized_term_frequency_body",
    47: "sum_of_stream_length_normalized_term_frequency_anchor",
    48: "sum_of_stream_length_normalized_term_frequency_title",
    49: "sum_of_stream_length_normalized_term_frequency_url",
    50: "sum_of_stream_length_normalized_term_frequency_whole_document",
    51: "min_of_stream_length_normalized_term_frequency_body",
    52: "min_of_stream_length_normalized_term_frequency_anchor",
    53: "min_of_stream_length_normalized_term_frequency_title",
    54: "min_of_stream_length_normalized_term_frequency_url",
    55: "min_of_stream_length_normalized_term_frequency_whole_document",
    56: "max_of_stream_length_normalized_term_frequency_body",
    57: "max_of_stream_length_normalized_term_frequency_anchor",
    58: "max_of_stream_length_normalized_term_frequency_title",
    59: "max_of_stream_length_normalized_term_frequency_url",
    60: "max_of_stream_length_normalized_term_frequency_whole_document",
    61: "mean_of_stream_length_normalized_term_frequency_body",
    62: "mean_of_stream_length_normalized_term_frequency_anchor",
    63: "mean_of_stream_length_normalized_term_frequency_title",
    64: "mean_of_stream_length_normalized_term_frequency_url",
    65: "mean_of_stream_length_normalized_term_frequency_whole_document",
    66: "variance_of_stream_length_normalized_term_frequency_body",
    67: "variance_of_stream_length_normalized_term_frequency_anchor",
    68: "variance_of_stream_length_normalized_term_frequency_title",
    69: "variance_of_stream_length_normalized_term_frequency_url",
    70: "variance_of_stream_length_normalized_term_frequency_whole_document",
    71: "sum_of_tf_idf_body",
    72: "sum_of_tf_idf_anchor",
    73: "sum_of_tf_idf_title",
    74: "sum_of_tf_idf_url",
    75: "sum_of_tf_idf_whole_document",
    76: "min_of_tf_idf_body",
    77: "min_of_tf_idf_anchor",
    78: "min_of_tf_idf_title",
    79: "min_of_tf_idf_url",
    80: "min_of_tf_idf_whole_document",
    81: "max_of_tf_idf_body",
    82: "max_of_tf_idf_anchor",
    83: "max_of_tf_idf_title",
    84: "max_of_tf_idf_url",
    85: "max_of_tf_idf_whole_document",
    86: "mean_of_tf_idf_body",
    87: "mean_of_tf_idf_anchor",
    88: "mean_of_tf_idf_title",
    89: "mean_of_tf_idf_url",
    90: "mean_of_tf_idf_whole_document",
    91: "variance_of_tf_idf_body",
    92: "variance_of_tf_idf_anchor",
    93: "variance_of_tf_idf_title",
    94: "variance_of_tf_idf_url",
    95: "variance_of_tf_idf_whole_document",
    96: "boolean_model_body",
    97: "boolean_model_anchor",
    98: "boolean_model_title",
    99: "boolean_model_url",
    100: "boolean_model_whole_document",
    101: "vector_space_model_body",
    102: "vector_space_model_anchor",
    103: "vector_space_model_title",
    104: "vector_space_model_url",
    105: "vector_space_model_whole_document",
    106: "bm25_body",
    107: "bm25_anchor",
    108: "bm25_title",
    109: "bm25_url",
    110: "bm25_whole_document",
    111: "lmir_abs_body",
    112: "lmir_abs_anchor",
    113: "lmir_abs_title",
    114: "lmir_abs_url",
    115: "lmir_abs_whole_document",
    116: "lmir_dir_body",
    117: "lmir_dir_anchor",
    118: "lmir_dir_title",
    119: "lmir_dir_url",
    120: "lmir_dir_whole_document",
    121: "lmir_jm_body",
    122: "lmir_jm_anchor",
    123: "lmir_jm_title",
    124: "lmir_jm_url",
    125: "lmir_jm_whole_document",
    126: "number_of_slash_in_url",
    127: "length_of_url",
    128: "inlink_number",
    129: "outlink_number",
    130: "page_rank",
    131: "site_rank",
    132: "quality_score",
    133: "quality_score_2",
    134: "query_url_click_count",
    135: "url_click_count",
    136: "url_dwell_time",
}

In [5]:
def train_and_validate(config):
    example_feature_spec = {}
    for feature in _FEATURE_NAMES.values():
        example_feature_spec[feature] = tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=0.0)

    context_feature_spec = {}
    label_spec = (_LABEL_FEATURE, tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=_PADDING_LABEL))
    dataset_hparams = tfr.keras.pipeline.DatasetHparams(
        train_input_pattern=config['train_input'],
        valid_input_pattern=config['vali_input'],
        train_batch_size=config['train_batch_size'],
        valid_batch_size=config['valid_batch_size'],
        list_size=config['list_size']
    )

    pipeline_hparams = tfr.keras.pipeline.PipelineHparams(
        model_dir=config['model_dir'],
        num_epochs=config['num_epochs'],
        steps_per_epoch=config['steps_per_epoch'],
        validation_steps=config['validation_steps'],
        loss=config['loss'],
        optimizer=config['optimizer'],
        learning_rate=config['learning_rate'],
        strategy=config['strategy']
    )
    
    dnn_scorer = tfr.keras.model.DNNScorer(
        hidden_layer_dims = config['hidden_layer_dims'],
        output_units=1,
        activation = tf.nn.relu,
        input_batch_norm = True,
        dropout = config['dropout']
    )

    model_builder = tfr.keras.model.ModelBuilder(
        input_creator=tfr.keras.model.FeatureSpecInputCreator(
            context_feature_spec, example_feature_spec),
        preprocessor=tfr.keras.model.PreprocessorWithSpec(),
        scorer=dnn_scorer,
        mask_feature_name=_MASK,
        name=config['name']
    )

    dataset_builder = ZlibDatasetBuilder(
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        mask_feature_name=_MASK,
        label_spec=label_spec,
        hparams=dataset_hparams
    )

    ranking_pipeline = tfr.keras.pipeline.SimplePipeline(
        model_builder=model_builder,
        dataset_builder=dataset_builder,
        hparams=pipeline_hparams
    )

    ranking_pipeline.train_and_validate(verbose=1)

In [4]:
# 基线模型，参数未调优
config = {
    'train_input': '/home/guhangsong/Data/MSLR/Fold1/train.tfrecord',
    'vali_input': '/home/guhangsong/Data/MSLR/Fold1/vali.tfrecord',
    'test_input': '/home/guhangsong/Data/MSLR/Fold1/test.tfrecord',
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/base_model',
    'num_epochs': 3,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'approx_ndcg_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_base_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


2022-03-20 14:02:26.694245: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:02:26.752900: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:02:26.753215: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:02:26.754007: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow wi

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


Epoch 1/3


2022-03-20 14:02:41.101033: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/3
Epoch 3/3


2022-03-20 14:04:52.077768: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: mslr_keras/base_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/base_model/export/latest_model/assets


In [33]:
# 损失函数为pairwise_hinge
config = {
    'train_input': '/home/guhangsong/Data/MSLR/Fold1/train.tfrecord',
    'vali_input': '/home/guhangsong/Data/MSLR/Fold1/vali.tfrecord',
    'test_input': '/home/guhangsong/Data/MSLR/Fold1/test.tfrecord',
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/pairwise_hinge_model',
    'num_epochs': 3,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'pairwise_hinge_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_pairwise_hinge_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Epoch 1/3
Epoch 2/3
Epoch 3/3
INFO:tensorflow:Assets written to: mslr_keras/pairwise_hinge_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/pairwise_hinge_model/export/latest_model/assets


In [6]:
# 损失函数为pairwise_logistic
config = {
    'train_input': '/home/guhangsong/Data/MSLR/Fold1/train.tfrecord',
    'vali_input': '/home/guhangsong/Data/MSLR/Fold1/vali.tfrecord',
    'test_input': '/home/guhangsong/Data/MSLR/Fold1/test.tfrecord',
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/pairwise_logistic_model',
    'num_epochs': 3,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'pairwise_logistic_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_pairwise_logistic_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


2022-03-20 14:34:07.064035: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:34:07.083340: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:34:07.083660: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-03-20 14:34:07.084198: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow wi

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


Epoch 1/3


2022-03-20 14:34:22.889835: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/3
Epoch 3/3


2022-03-20 14:36:33.606922: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: mslr_keras/pairwise_logistic_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/pairwise_logistic_model/export/latest_model/assets


In [35]:
# 损失函数为list_mle_loss
config = {
    'train_input': '/home/guhangsong/Data/MSLR/Fold1/train.tfrecord',
    'vali_input': '/home/guhangsong/Data/MSLR/Fold1/vali.tfrecord',
    'test_input': '/home/guhangsong/Data/MSLR/Fold1/test.tfrecord',
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/list_mle_model',
    'num_epochs': 3,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'list_mle_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_list_mle_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Epoch 1/3
Epoch 2/3
Epoch 3/3
INFO:tensorflow:Assets written to: mslr_keras/list_mle_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/list_mle_model/export/latest_model/assets


In [36]:
# 损失函数为gumbel_approx_ndcg_loss
config = {
    'train_input': '/home/guhangsong/Data/MSLR/Fold1/train.tfrecord',
    'vali_input': '/home/guhangsong/Data/MSLR/Fold1/vali.tfrecord',
    'test_input': '/home/guhangsong/Data/MSLR/Fold1/test.tfrecord',
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/gumbel_approx_ndcg_model',
    'num_epochs': 3,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'gumbel_approx_ndcg_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_gumbel_approx_ndcg_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Epoch 1/3
Epoch 2/3
Epoch 3/3
INFO:tensorflow:Assets written to: mslr_keras/gumbel_approx_ndcg_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/gumbel_approx_ndcg_model/export/latest_model/assets


In [6]:
# 大数据量验证
config = {
    'train_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/train.tfrecord' for i in range(5)],
    'vali_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/vali.tfrecord' for i in range(5)],
    'test_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/test.tfrecord' for i in range(5)],
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/bigdata_model',
    'num_epochs': 2,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'approx_ndcg_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_bigdata_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


























































































































































Epoch 1/2


2022-03-20 14:16:53.992630: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/2
INFO:tensorflow:Assets written to: mslr_keras/bigdata_model/export/latest_model/assets


INFO:tensorflow:Assets written to: mslr_keras/bigdata_model/export/latest_model/assets


In [7]:
# 大数据量+gumbel approx ndcg loss验证
config = {
    'train_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/train.tfrecord' for i in range(5)],
    'vali_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/vali.tfrecord' for i in range(5)],
    'test_input': [f'/home/guhangsong/Data/MSLR/Fold{i+1}/test.tfrecord' for i in range(5)],
    'train_batch_size': 32,
    'valid_batch_size': 32,
    'list_size': None,
    'model_dir': 'mslr_keras/gumbel_bigdata_model',
    'num_epochs': 2,
    'steps_per_epoch': 1000,
    'validation_steps': 100,
    'learning_rate': 0.05,
    'optimizer': 'adam',
    'loss': 'gumbel_approx_ndcg_loss',
    'strategy': 'MirroredStrategy',
    'hidden_layer_dims': [64, 32, 16],
    'dropout': 0.5,
    'name': 'mslr_dnn_gumber_bigdata_model',
}
train_and_validate(config)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


Epoch 1/2
Epoch 2/2