In [1]:
import os
import glob
import torch 
import numpy as np
import matplotlib.pyplot as plt
import types

from transformers4rec import torch as tr
from transformers4rec.torch.ranking_metric import NDCGAt, AvgPrecisionAt, RecallAt
from transformers4rec.torch.utils.examples_utils import wipe_memory
from transformers4rec.config.trainer import T4RecTrainingArguments
from transformers4rec.torch import Trainer
from merlin_standard_lib import Schema

from mlflow_databricks import MLFlowWrapper
from transformers.integrations import MLflowCallback, TrainerCallback

import pandas as pd

In [2]:
def get_test_dataloader(self, test_path=None):
        from transformers4rec.torch.utils.data_utils import T4RecDataLoader
        """
        Set the test dataloader to use by Trainer.
        It supports user defined data-loader set as an attribute in the constructor.
        When the attribute is None, The data-loader is defined using eval_dataset
        and the `data_loader_engine` specified in Training Arguments.
        
        JN: needed to hotfix this into the trainer, the original one is missing this method and it breaks the predict function. 
        """

        assert self.schema is not None, "schema is required to generate Test Dataloader"
        return T4RecDataLoader.parse(self.args.data_loader_engine).from_schema(
            self.schema,
            test_path,
            self.args.per_device_eval_batch_size,
            max_sequence_length=self.args.max_sequence_length,
            drop_last=False,
            shuffle=False,
            shuffle_buffer_size=self.args.shuffle_buffer_size,
        )

In [3]:
SCHEMA_PATH = "sample_schema2.pb"
OVERWRITE_SCHEMA = False
mapping_path="maps"
category_cols = ["user_session","category_code","brand","user_id","product_id"]
group_cols = ["category_code","brand","product_id","rel_time"]

In [4]:
schema = Schema().from_proto_text(SCHEMA_PATH)

In [5]:
inputs = tr.TabularSequenceFeatures.from_schema(
        schema,
        max_sequence_length=20,
        continuous_projection=64,
        d_output=128,
        masking="mlm",
        embedding_dims={"category_code":32,"product_id":64, "brand":32}
)

In [7]:

# Define XLNetConfig class and set default parameters for HF XLNet config  
transformer_config = tr.XLNetConfig.build(
    d_model=64, n_head=4, n_layer=2, total_seq_length=20
)
# Define the model block including: inputs, masking, projection and transformer block.
body = tr.SequentialBlock(
    inputs, tr.MLPBlock([64]), tr.TransformerBlock(transformer_config, masking=inputs.masking)
)

# Defines the evaluation top-N metrics and the cut-offs
metrics = [NDCGAt(top_ks=[20, 40], labels_onehot=True),  
           RecallAt(top_ks=[20, 40], labels_onehot=True)]

# Define a head related to next item prediction task 
head = tr.Head(
    body,
    tr.NextItemPredictionTask(weight_tying=True, hf_format=True, 
                              metrics=metrics),
    inputs=inputs,
)

# Get the end-to-end Model class 
model = tr.Model(head)

In [8]:
# Set hyperparameters for training 
train_args = T4RecTrainingArguments(data_loader_engine='pyarrow', 
                                    dataloader_drop_last = True,
                                    report_to = ["mlflow"], 
                                    gradient_accumulation_steps = 1,
                                    per_device_train_batch_size = 256, 
                                    per_device_eval_batch_size = 32,
                                    output_dir = "./tmp", 
                                    learning_rate=0.0005,
                                    lr_scheduler_type='cosine', 
                                    learning_rate_num_cosine_cycles_by_epoch=1.5,
                                    num_train_epochs=2,
                                    max_sequence_length=40, 
                                    no_cuda=False,
                                    compute_metrics_each_n_steps=1000,
                                    # validate_every=100, code docstr makes it seem like this should exist but it doesnt....
                                    save_steps=2000)

In [9]:
mlf = MLFlowWrapper("scratch",server="databricks",run_name="transformer4rec")
mlf.pre_trainer(model_type="huggingface")

Getting into vault authentication --->
VAULT_ROLE_ENV :=dev
VAULT_ROLE :=pdo-digpersistent
Vault URL :https://vault.vaultenterprisedev.aws.gartner.com
[INFO|mlflow_wrapper.py:93] 2022-01-13 15:51:40,598 >> Starting up mlflow


In [11]:
trainer = Trainer(
    model=model,
    schema=schema,
    args=train_args,
    train_dataset_or_path="sample_train.parquet",
    eval_dataset_or_path="sample_val.parquet",
    compute_metrics=True,
    #callbacks=[MLflowCallback],
    #incremental_logging=True
)

In [12]:
#trainer works mainly like huggingface, but this method is missing for the prediction step
trainer.get_test_dataloader =  types.MethodType( get_test_dataloader, trainer )

In [13]:
trainer.train()

***** Running training *****
  Num examples = 500224
  Num Epochs = 2
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 3908


Step,Training Loss
500,10.1098
1000,9.5353
1500,9.2868
2000,9.2085
2500,9.744
3000,9.359
3500,9.1827


Saving model checkpoint to ./tmp/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3908, training_loss=9.439782539922115, metrics={'train_runtime': 461.9858, 'train_samples_per_second': 0.004, 'train_steps_per_second': 8.459, 'total_flos': 0.0, 'train_loss': 9.439782539922115})

In [14]:
eval_metrics = trainer.evaluate(metric_key_prefix='eval')



	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /opt/conda/conda-bld/pytorch_1603729009598/work/torch/csrc/utils/python_arg_parser.cpp:882.)
  rel_indices = (num_relevant != 0).nonzero()


In [15]:
eval_metrics

{'eval/next-item/ndcg_at_20': 0.02318193018436432,
 'eval/next-item/ndcg_at_40': 0.02807333879172802,
 'eval/next-item/recall_at_20': 0.05078125,
 'eval/next-item/recall_at_40': 0.07421875,
 'eval/loss': 9.721333503723145,
 'eval_runtime': 287.2914,
 'eval_samples_per_second': 1741.841,
 'eval_steps_per_second': 54.433}

In [46]:
mapping_files = [os.path.join(mapping_path,x) for x in os.listdir(mapping_path)]


In [47]:
mlf.post_trainer(model=trainer.model,model_type='huggingface',metrics=eval_metrics,
                artifacts=[SCHEMA_PATH]+mapping_files)

[INFO|mlflow_wrapper.py:154] 2021-12-20 21:04:31,692 >> MLFLOW: begining to log model
[INFO|mlflow_wrapper.py:171] 2021-12-20 21:04:42,685 >> MLFLOW: begining to log metrics
[INFO|mlflow_wrapper.py:180] 2021-12-20 21:04:43,982 >> MLFLOW: begining to log artifacts
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:04:43,983 >> MLFLOW: Logging file sample_schema2.pb
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:04:44,350 >> MLFLOW: Logging file maps/user_session.csv
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:05:16,126 >> MLFLOW: Logging file maps/category_code.csv
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:05:47,108 >> MLFLOW: Logging file maps/brand.csv
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:06:15,178 >> MLFLOW: Logging file maps/user_id.csv
[INFO|mlflow_wrapper.py:183] 2021-12-20 21:06:47,452 >> MLFLOW: Logging file maps/product_id.csv
[INFO|mlflow_wrapper.py:193] 2021-12-20 21:07:18,230 >> Setting experiment permissions for: /Users/7fa96ba7-8869-429e-b213-e2bd1c6082e3/scratch


In [48]:
import mlflow
mlflow.end_run()

# Predictions
Predict outputs  the top n items and scores. The n is set in the training args. 

In [16]:
pred = trainer.predict("sample_train.parquet")

In [17]:
#predicted items
pred.predictions[0]

array([[ 89,  55,  72, ...,  19, 105, 100],
       [ 89,  55,  72, ...,  19, 105, 100],
       [ 89,  55, 114, ...,  19, 105, 100],
       ...,
       [ 89,  55,  72, ...,  19, 105, 100],
       [ 89,  55,  72, ...,  19, 105, 100],
       [ 89,  55,  72, ...,  19, 105, 100]])

In [20]:
pred.predictions[0][0]

array([ 89,  55,  72, 114,  43,  51,  69,  19, 105, 100])

In [29]:
pred.predictions[0][5]

array([ 55, 114,  89, 100, 105, 532,  75,  51,  43,  72])

In [23]:
target = pd.read_parquet('sample_val.parquet')

In [24]:
target.head()

Unnamed: 0,user_session,event_type,product_id,category_id,category_code,brand,price,user_id,event_time_ts,prod_first_event_time_ts,rel_time,len
0,1,"[1, 1]","[35597, 17799]","[2053013553559896355, 2053013553559896355]","[2, 2]","[678, 537]","[69.89, 72.59]","[513605798, 513605798]","[1570452448, 1570452484]","[1569914121, 1569902730]","[538327.0, 549754.0]",2
1,6,"[1, 1]","[16211, 1245]","[2172371436436455782, 2172371436436455782]","[35, 35]","[11, 11]","[477.40999999999997, 238.48000000000002]","[491844619, 491844619]","[1570337853, 1570337872]","[1569902010, 1569896513]","[435843.0, 441359.0]",2
2,17,"[1, 1]","[734, 23602]","[2053013554415534427, 2053013554415534427]","[11, 11]","[41, 41]","[486.24, 447.37]","[513068111, 513068111]","[1570140667, 1570140681]","[1569896288, 1569905833]","[244379.0, 234848.0]",2
3,22,"[1, 1]","[14844, 11442]","[2053013553945772349, 2053013553945772349]","[41, 41]","[144, 144]","[49.54, 25.74]","[522539566, 522539566]","[1570019649, 1570019663]","[1569901408, 1569900026]","[118241.0, 119637.0]",2
4,26,"[1, 1]","[43759, 89957]","[2053013563651392361, 2053013563651392361]","[2, 2]","[3, 33]","[359.68, 287.52]","[512805595, 512805595]","[1570295859, 1570295926]","[1569921644, 1570090595]","[374215.0, 205331.0]",2


In [53]:
#predicted scores
pred.predictions[1]

array([[-6.3711824, -6.508836 , -6.549979 , ..., -6.9555006, -7.0562887,
        -7.0823975],
       [-6.3711824, -6.508836 , -6.549979 , ..., -6.9555006, -7.0562887,
        -7.0823975],
       [-6.3711824, -6.508836 , -6.549979 , ..., -6.9555006, -7.0562887,
        -7.0823975],
       ...,
       [-6.371183 , -6.5088367, -6.54998  , ..., -6.9555016, -7.0562897,
        -7.082398 ],
       [-6.371183 , -6.5088367, -6.54998  , ..., -6.9555016, -7.0562897,
        -7.082398 ],
       [-6.371183 , -6.5088367, -6.54998  , ..., -6.9555016, -7.0562897,
        -7.082398 ]], dtype=float32)

In [54]:
pred.predictions[0].shape

(95754, 10)

In [55]:
df_test = pd.read_parquet("sample_test.parquet")

In [56]:
df_test.shape

(95754, 6)

In [58]:
df_test.head()

Unnamed: 0,user_id,category_code,brand,product_id,rel_time,item_count
0,861784,[17],[87],[1220],[277043.0],1
1,861785,[1],[9],[17365],[175049.0],1
2,861786,[66],[574],[5919],[368361.0],1
3,861787,[4],[16],[4258],[367198.0],1
4,861788,[4],[149],[2585],[396595.0],1


# Scratch and stuff in previous notebook

In [29]:
OUTPUT_DIR = "."#os.environ.get("OUTPUT_DIR", "/workspace/data/sessions_by_day")

In [16]:
start_time_window_index = 1
final_time_window_index = 7
#Iterating over days of one week
for time_index in range(start_time_window_index, final_time_window_index):
    # Set data 
    time_index_train = time_index
    time_index_eval = time_index + 1
    train_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_train}/train.parquet"))
    eval_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet"))
    print(train_paths)
    
    # Train on day related to time_index 
    print('*'*20)
    print("Launch training for day %s are:" %time_index)
    print('*'*20 + '\n')
    trainer.train_dataset_or_path = train_paths
    trainer.reset_lr_scheduler()
    trainer.train()
    trainer.state.global_step +=1
    print('finished')
    
    # Evaluate on the following day
    trainer.eval_dataset_or_path = eval_paths
    train_metrics = trainer.evaluate(metric_key_prefix='eval')
    print('*'*20)
    print("Eval results for day %s are:\t" %time_index_eval)
    print('\n' + '*'*20 + '\n')
    for key in sorted(train_metrics.keys()):
        print(" %s = %s" % (key, str(train_metrics[key]))) 
    wipe_memory()

['/workspace/data/sessions_by_day/1/train.parquet']
********************
Launch training for day 1 are:
********************



***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished


********************
Eval results for day 2 are:	

********************

 eval/loss = 10.3350191116333
 eval/next-item/ndcg_at_20 = 0.011259923689067364
 eval/next-item/ndcg_at_40 = 0.011259923689067364
 eval/next-item/recall_at_20 = 0.0416666679084301
 eval/next-item/recall_at_40 = 0.0416666679084301
 eval_runtime = 0.1209
 eval_samples_per_second = 794.045
 eval_steps_per_second = 24.814


***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


['/workspace/data/sessions_by_day/2/train.parquet']
********************
Launch training for day 2 are:
********************



Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 3 are:	

********************

 eval/loss = 9.907649040222168
 eval/next-item/ndcg_at_20 = 0.07261061668395996
 eval/next-item/ndcg_at_40 = 0.09025609493255615
 eval/next-item/recall_at_20 = 0.21875
 eval/next-item/recall_at_40 = 0.3020833432674408
 eval_runtime = 0.1604
 eval_samples_per_second = 598.353
 eval_steps_per_second = 18.699


***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


['/workspace/data/sessions_by_day/3/train.parquet']
********************
Launch training for day 3 are:
********************



Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 4 are:	

********************

 eval/loss = 9.442419052124023
 eval/next-item/ndcg_at_20 = 0.10495926439762115
 eval/next-item/ndcg_at_40 = 0.13046541810035706
 eval/next-item/recall_at_20 = 0.3020833432674408
 eval/next-item/recall_at_40 = 0.4270833432674408
 eval_runtime = 0.1071
 eval_samples_per_second = 896.554
 eval_steps_per_second = 28.017


***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


['/workspace/data/sessions_by_day/4/train.parquet']
********************
Launch training for day 4 are:
********************



Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 5 are:	

********************

 eval/loss = 8.860241889953613
 eval/next-item/ndcg_at_20 = 0.11112610995769501
 eval/next-item/ndcg_at_40 = 0.1652408242225647
 eval/next-item/recall_at_20 = 0.3333333432674408
 eval/next-item/recall_at_40 = 0.59375
 eval_runtime = 0.1023
 eval_samples_per_second = 938.1
 eval_steps_per_second = 29.316


***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


['/workspace/data/sessions_by_day/5/train.parquet']
********************
Launch training for day 5 are:
********************



Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 6 are:	

********************

 eval/loss = 8.344633102416992
 eval/next-item/ndcg_at_20 = 0.10326863825321198
 eval/next-item/ndcg_at_40 = 0.17447516322135925
 eval/next-item/recall_at_20 = 0.2395833432674408
 eval/next-item/recall_at_40 = 0.59375
 eval_runtime = 0.1037
 eval_samples_per_second = 925.533
 eval_steps_per_second = 28.923


***** Running training *****
  Num examples = 768
  Num Epochs = 5
  Instantaneous batch size per device = 256
  Total train batch size (w. parallel, distributed & accumulation) = 256
  Gradient Accumulation steps = 1
  Total optimization steps = 15


['/workspace/data/sessions_by_day/6/train.parquet']
********************
Launch training for day 6 are:
********************



Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)




finished
********************
Eval results for day 7 are:	

********************

 eval/loss = 7.80819845199585
 eval/next-item/ndcg_at_20 = 0.15623269975185394
 eval/next-item/ndcg_at_40 = 0.1892910897731781
 eval/next-item/recall_at_20 = 0.3854166865348816
 eval/next-item/recall_at_40 = 0.5520833730697632
 eval_runtime = 0.101
 eval_samples_per_second = 950.741
 eval_steps_per_second = 29.711


### Saves the model

In [17]:
trainer._save_model_and_checkpoint(save_model_class=True)

Saving model checkpoint to ./tmp/checkpoint-16
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


### Reloads the model

In [18]:
trainer.load_model_trainer_states_from_checkpoint('./tmp/checkpoint-%s'%trainer.state.global_step)

### Re-compute eval metrics of validation data

In [19]:
eval_data_paths = glob.glob(os.path.join(OUTPUT_DIR, f"{time_index_eval}/valid.parquet"))

In [20]:
# set new data from day 7
eval_metrics = trainer.evaluate(eval_dataset=eval_data_paths, metric_key_prefix='eval')
for key in sorted(eval_metrics.keys()):
    print("  %s = %s" % (key, str(eval_metrics[key])))

  eval/loss = 7.80819845199585
  eval/next-item/ndcg_at_20 = 0.15623269975185394
  eval/next-item/ndcg_at_40 = 0.1892910897731781
  eval/next-item/recall_at_20 = 0.3854166865348816
  eval/next-item/recall_at_40 = 0.5520833730697632
  eval_runtime = 0.1228
  eval_samples_per_second = 781.569
  eval_steps_per_second = 24.424


That's it!  
You have just trained your session-based recommendation model using Transformers4Rec.

Tip: We can easily log and visualize model training and evaluation on [Weights & Biases (W&B)](https://wandb.ai/home), [Tensorboard](https://www.tensorflow.org/tensorboard) and [NVIDIA DLLogger](https://github.com/NVIDIA/dllogger). By default, the HuggingFace transformers `Trainer` (which we extend) uses Weights & Biases (W&B) to log training and evaluation metrics, which provides nice results visualization and comparison between different runs.