# Example of implementing LLMRank in our framework to run evaluation

Run in google colaboratory

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os

repo_path = '/content/drive/MyDrive/Colab Notebooks/thesis_work/llm-for-rec'
sys.path.append(repo_path)

## Install requirements

In [3]:
!pip install -q -r '{repo_path}/requirements/requirements.txt'

## Add config

In [4]:
import os

config_dict = {
    "csv_args": {"delimiter": "\t"},
    "source_column": "item_id:token",
    "search_kwargs": {"k": 20},
    "data_path": os.path.join(repo_path, "datasets"),
    "load_col": {
        "inter": ["user_id", "item_id", "rating", "timestamp"],
        "item": ["item_id", "movie_title"],
    },
    "title_col":"movie_title",
    "text_col": ["movie_title", "release_year", "class"],
    "MAX_ITEM_LIST_LENGTH": 10,
    "eval_args": {"split": {"LS": "valid_and_test"}, "order": "TO", "mode": "full"},
    "repeatable": True,
    "loss_type": "CE",
    "train_batch_size": 100,
    "eval_batch_size": 8,
    "valid_metric": "NDCG@10",
    "metrics": ["Recall", "NDCG"],
    "topk": [1, 5, 20],
    "train_neg_sample_args": None,
}

In [5]:
assert config_dict['search_kwargs']['k'] >= max(config_dict['topk'])

## Get dataset and config

In [6]:
# preprocessing for ml-100k
def ml100k_preprocess(text: str) -> str:
    if text.endswith(', The'):
        text = 'The ' + text[:-5]
    elif text.endswith(', A'):
        text = 'A ' + text[:-3]
    return text

In [25]:
from llm4rec.pipelines import RecBolePipelineRecommender
from llm4rec.dataset import RecboleSeqDataset
from llm4rec.evaluation.trainer import PipelineTrainer
from recbole.data.utils import data_preparation
from recbole.config import Config
import os
import torch

model_cls = RecBolePipelineRecommender
dataset_name = 'ml-100k'

config = Config(model=model_cls, dataset=dataset_name,
            config_dict=config_dict)

dataset = RecboleSeqDataset(config, preprocess_text_fn=ml100k_preprocess)
train_data, _, eval_data = data_preparation(config, dataset)



## Initialize models

Create instance of tradional sequential recsys model SASRec from RecBole implementation

In [8]:
from recbole.model.sequential_recommender import SASRec
from llm4rec.tasks import SequentialRecBoleModelWrapper

sas_model = SequentialRecBoleModelWrapper(SASRec, config.final_config_dict, train_data.dataset,
                                      n_layers=2, n_heads=2, embedding_size=64, hidden_size=64,
                                      inner_size=256, hidden_dropout_prob=0.5, attn_dropout_prob=0.5,
                                      hidden_act='gelu', layer_norm_eps=1e-12, initializer_range=0.02,
                                      loss_type='CE',
                                      pretrained_file=os.path.join(repo_path, 'examples', 'SASRec-ml-1m.pth'),
                                      top_k=20)

Create instance of LLM and LLM Ranker model

In [9]:
from langchain import HuggingFaceHub
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from llm4rec.tasks import RankerRecommender
import os

path_to_openai_env = os.path.join(repo_path, "api_keys.env")
load_dotenv(path_to_openai_env)

llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
#llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.1, "max_length":512})
ranker = RankerRecommender(llm=llm, item2text=dataset.item_token2text)

Combine steps in pipeline

In [14]:
from llm4rec.pipelines import RecBolePipelineRecommender

model = RecBolePipelineRecommender(config=config,
                                    dataset=dataset,
                                    tasks=[sas_model, ranker],
                                    verbose=False)

## Test on one user data

Enable debug mode for testing

In [11]:
from langchain.globals import set_debug

set_debug(True)
model.verbose=True

In [13]:
for batched_data in eval_data:
    interaction, history_index, positive_u, positive_i = batched_data
    batch_size = len(interaction["user_id"])

    for inter_idx in range(batch_size):
        user_id = interaction[inter_idx]["user_id"]
        history_ids = interaction[inter_idx]["item_id_list"]
        history_length = min(
            config["MAX_ITEM_LIST_LENGTH"],
            interaction[inter_idx]["item_length"],
        )
        history_names = [eval_data.dataset.item_id2text(hist_id) for hist_id in history_ids[:history_length].tolist()]
        history_item_ids = eval_data.dataset.id2token("item_id", history_ids[:history_length])
        user_token_id  = eval_data.dataset.id2token('user_id', user_id)

        reco = model.recommend(user_token=user_token_id, prev_interactions=history_item_ids)
        print(reco)
        break
    break

Task 1 outputs:  ['39', '1613', '290', '20', '668', '452', '1094', '1682', '603', '1487', '1271', '740', '164', '57', '1376', '982', '281', '225', '1057', '1100']
[32;1m[1;3m[llm/start][0m [1m[llm:ChatGroq] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: I've been interested in the following items in the past in order:\nmovie_title:The Empire Strikes Back; release_year:1980; class:Action Adventure Drama Romance Sci-Fi War,\nmovie_title:Beautiful Girls; release_year:1996; class:Drama,\nmovie_title:Mars Attacks!; release_year:1996; class:Action Comedy Sci-Fi War,\nmovie_title:Broken Arrow; release_year:1996; class:Action Thriller,\nmovie_title:Amistad; release_year:1997; class:Drama,\nmovie_title:The Long Kiss Goodnight; release_year:1996; class:Action Thriller,\nmovie_title:French Kiss; release_year:1995; class:Comedy Romance,\nmovie_title:The Maltese Falcon; release_year:1941; class:Film-Noir Mystery,\nmovie_title:Dazed and Confused; release_year:1993; class:Comedy,\n

In [None]:
set_debug(False)
model.verbose = False

## Evaluate pipeline

In [None]:
trainer = PipelineTrainer(config, model)
test_result = trainer.evaluate(eval_data, show_progress=config['show_progress'])

[1;35mEvaluate   [0m:   7%|███▏                                           | 8/118 [27:18<6:15:27, 204.80s/it][0m


KeyboardInterrupt: 

In [None]:
test_result