In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_ranking as tfr

from utils import read_data, get_ndcg

from tqdm import tqdm

from tensorflow_serving.apis import input_pb2

In [None]:
# df_train, y_train, group_size_train, query_id_train = read_data('Fold1', 'train')
df_vali, y_vali, group_size_vali, query_id_vali = read_data('Fold1', 'vali')
# df_test, y_test, group_size_test, query_id_test = read_data('Fold1', 'test')

# X_columns = df_train.columns

In [None]:
df_train

In [None]:
def _float_feature(value):
 """Returns a float_list from a float / double."""
 return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
 """Returns an int64_list from a bool / enum / int / uint."""
 return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def create_records(df, y, query_id, labels, name):
    options_ = tf.io.TFRecordOptions(compression_type='GZIP')
    writer = tf.io.TFRecordWriter(f"./{name}.record")
    
    elwc = input_pb2.ExampleListWithContext()
    last_query_id = None

    i = 0
    for qid, relevance, row in tqdm(zip(query_id, y, df.iterrows()), total=df.shape[0]):
        # print(qid, relevance, row[1].values)

        example_dict = {
           f'f{feat_name}': _float_feature(feat_val)
           for feat_name, feat_val in zip(labels, row[1].values)
        }

        example_dict['relevance_label'] = _int64_feature(int(relevance))

        example_ = tf.train.Example(features=tf.train.Features(feature=example_dict))

        if qid != last_query_id:
            if last_query_id != None:
                writer.write(elwc.SerializeToString())
            last_query_id = qid
            elwc = input_pb2.ExampleListWithContext()
            elwc.examples.append(example_)
        else:
            elwc.examples.append(example_)

    writer.write(elwc.SerializeToString())

In [None]:
labels = df_vali.columns

In [None]:
del df_vali

In [None]:
create_records(df_train, y_train, query_id_train, labels, "train") 

In [None]:
create_records(df_test, y_test, query_id_test, labels, "test")

In [None]:
create_records(df_vali, y_vali, query_id_vali, labels, "vali")

In [None]:
context_spec_ = {}
example_spec_ = {f'f{feat}': tf.io.FixedLenFeature(shape=(1,), \
                        dtype=tf.float32, default_value=0.0) \
                        for feat in labels}
label_spec_ = ('relevance_label', \
                tf.io.FixedLenFeature(shape=(1,), \
                dtype=tf.int64, \
                default_value=-1))

# Define Input Creator
input_creator= tfr.keras.model.FeatureSpecInputCreator(
        context_feature_spec={},
        example_feature_spec=example_spec_)

In [None]:
preprocess_spec = {
    **{name: lambda t: tf.math.log1p(t * tf.sign(t)) * tf.sign(t)
       for name in example_spec_.keys()}
}

In [None]:
preprocessor=tfr.keras.model.PreprocessorWithSpec(preprocess_spec)
# preprocessor=tfr.keras.model.PreprocessorWithSpec()

In [None]:
scorer = tfr.keras.model.DNNScorer(hidden_layer_dims=[1024,512,256],
                                    output_units=1,
                                    activation=tf.nn.relu,
                                    use_batch_norm=True,
                                    batch_norm_moment=0.999,
                                    dropout=0.5)

In [None]:
model_builder = tfr.keras.model.ModelBuilder(
                                    input_creator=input_creator,
                                    preprocessor=preprocessor,
                                    scorer=scorer,
                                    mask_feature_name="example_list_mask", # TODO: QUE ISSO?
                                    name="model_builder"
)

In [None]:
dataset_hparams = tfr.keras.pipeline.DatasetHparams(
                              train_input_pattern="./train.record",
                              valid_input_pattern="./vali.record",
                              train_batch_size=128,
                              valid_batch_size=128,
                              list_size=200,
                              dataset_reader=tfr.keras.pipeline.DatasetHparams.dataset_reader
)

In [None]:
    dataset_builder = tfr.keras.pipeline.SimpleDatasetBuilder(
                                              {},
                                              example_spec_,
                                              mask_feature_name="example_list_mask",
                                              label_spec=label_spec_,
                                              hparams=dataset_hparams,
                                              sample_weight_spec=None
   )

In [None]:
    pipeline_hparams = tfr.keras.pipeline.PipelineHparams(
                                          model_dir="./model/", # The model_dir argument is the path where TFR will save the model and training data, including Tensorboard files for training visualization.
                                          num_epochs=2,
                                          steps_per_epoch=5000,
                                          validation_steps=125,
                                          learning_rate=0.05,
                                          loss='approx_ndcg_loss',
                                          optimizer='adagrad')

In [None]:

ranking_pipeline = tfr.keras.pipeline.SimplePipeline(
                                            model_builder,
                                            dataset_builder=dataset_builder,
                                            hparams=pipeline_hparams
) 

In [None]:
ranking_pipeline.train_and_validate(verbose=1)

In [None]:
ranking_pipeline.

In [None]:
loaded_model = tf.keras.models.load_model("./model/export/latest_model")

In [None]:
loaded_model

In [None]:
ds_test = dataset_builder.build_valid_dataset()

In [None]:
for x, y in ds_test.take(1):
  break

In [None]:
x

In [None]:
scores = loaded_model.predict(x)

In [None]:
scores

In [None]:
min_score = tf.reduce_min(scores)

In [None]:
min_score

In [None]:
scores = tf.where(tf.greater_equal(y, 0.), scores, min_score - 1e-5)

In [None]:
scores

In [None]:
# Prep data
ds = tfds.load("mslr_web/10k_fold1", split="train")

In [None]:
ds

In [None]:
ds = ds.take(1)  # Only take a single example

for example in ds:
    print(list(example.keys()))
    print(example)


In [None]:
ds = ds.map(lambda feature_map: {
    "_mask": tf.ones_like(feature_map["label"], dtype=tf.bool),
    **feature_map
})
ds = ds.shuffle(buffer_size=1000).padded_batch(batch_size=32)
ds = ds.map(lambda feature_map: (
    feature_map, tf.where(feature_map["_mask"], feature_map.pop("label"), -1.)))

In [None]:
ds = ds.take(1)  # Only take a single example

for example in ds:
    # print(list(example.keys()))
    print(example)


In [None]:
# Create a model
inputs = {
    name: tf.keras.Input(shape=(None, 1), dtype=tf.float32, name=name)
    for name in ds.element_spec[0]
    if name != "_mask"
}


In [None]:
inputs

In [None]:
norm_inputs = [tf.keras.layers.BatchNormalization()(x) for x in inputs.values()]
x = tf.concat(norm_inputs, axis=-1)
for layer_width in [128, 64, 32]:
    x = tf.keras.layers.Dense(units=layer_width)(x)
    x = tf.keras.layers.Activation(activation=tf.nn.relu)(x)
scores = tf.squeeze(tf.keras.layers.Dense(units=1)(x), axis=-1)

In [None]:
model = tf.keras.Model(inputs=inputs, outputs=scores)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tfr.keras.losses.SoftmaxLoss(),
    metrics=tfr.keras.metrics.get("ndcg", topn=5, name="NDCG@5"))
model.fit(ds, epochs=3)