# Example of using explanation module

Run in google colaboratory

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import os

repo_path = '/content/drive/MyDrive/thesis/llm4rec/llm-for-rec'
sys.path.append(repo_path)

Reading a Hugginface api key

In [22]:
from dotenv import load_dotenv

load_dotenv(os.path.join(repo_path, "huggingface.env"))

True

## Install requirements

In [3]:
!pip install -q -r '{repo_path}/requirements/requirements.txt'

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.9/266.9 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m67.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

## Add config

In [41]:
import os

config_dict = {
    "csv_args": {"delimiter": "\t"},
    "source_column": "item_id:token",
    "search_kwargs": {"k": 20},
    "data_path": os.path.join(repo_path, "datasets"),
    "load_col": {
        "inter": ["user_id", "item_id", "rating", "timestamp"],
        "item": ["item_id", "movie_title"],
    },
    "text_col": ["movie_title", "release_year", "class"],
    "MAX_ITEM_LIST_LENGTH": 10,
    "eval_args": {"split": {"LS": "valid_and_test"}, "order": "TO", "mode": "full"},
    "repeatable": True,
    "loss_type": "CE",
    "train_batch_size": 100,
    "eval_batch_size": 8,
    "valid_metric": "NDCG@10",
    "metrics": ["Recall", "NDCG"],
    "topk": [1, 20, 50, 100, 200],
    "train_neg_sample_args": None,
    "threshold_rating": 0.5,
}

## Get dataset and config

In [42]:
from llm4rec.tasks import RecBoleRetrievalRecommender
from llm4rec.dataset import RecboleSeqDataset
from recbole.data.utils import data_preparation
from recbole.config import Config
from recbole.model.abstract_recommender import AbstractRecommender
import os
import torch

model_cls = RecBoleRetrievalRecommender
dataset_name = 'ml-100k'

config = Config(model=model_cls, dataset=dataset_name,
            config_dict=config_dict)

dataset = RecboleSeqDataset(config)
_, _, test_data = data_preparation(config, dataset)



## Let's take first user and show how explanation works in his case

Getting batch and taking 1 interaction in that batch

In [43]:
i=0
batch = next(iter(test_data))
user = batch[0][i]
history_length = user['item_length']
history_names = test_data.dataset.id2text(user['item_id_list'][:history_length])
history_item_ids = test_data.dataset.id2token("item_id", user['item_id_list'][:history_length])
prev_interactions = dict(zip(history_item_ids, history_names))

Initializing the retreival model

In [26]:
retrieval_model = RecBoleRetrievalRecommender(config=config,
                                             dataset=dataset,
                                             items_info_path=os.path.join(config['data_path'], f'{dataset_name}.item'),
                                             csv_loader_args=dict(csv_args=config['csv_args'],
                                                                  source_column=config['source_column']),
                                             search_kwargs=config['search_kwargs'],
                                             search_type='similarity',
                                             emb_model_kwargs={"device":"cuda:0" if torch.cuda.is_available() else "cpu"})

Getting candidates

In [32]:
candidates = retrieval_model.recommend(user_profile = "", prev_interactions=prev_interactions,
                                 top_k=config['search_kwargs']['k'])
candidate_ids = test_data.dataset.token2id("item_id", candidates)
candidate_texts = test_data.dataset.id2text(candidate_ids)

Let's recall what were the names of the items the user interacted with

In [33]:
history_names

array(['Empire Strikes Back, The 1980 Action Adventure Drama Romance Sci-Fi War',
       'Beautiful Girls 1996 Drama',
       'Mars Attacks! 1996 Action Comedy Sci-Fi War',
       'Broken Arrow 1996 Action Thriller', 'Amistad 1997 Drama',
       'Long Kiss Goodnight, The 1996 Action Thriller',
       'French Kiss 1995 Comedy Romance',
       'Maltese Falcon, The 1941 Film-Noir Mystery',
       'Dazed and Confused 1993 Comedy',
       'Strange Days 1995 Action Crime Sci-Fi'], dtype=object)

And now we have suggested candidate items.

In [34]:
candidate_texts

array(['Abyss, The 1989 Action Adventure Sci-Fi Thriller',
       "E.T. the Extra-Terrestrial 1982 Children's Drama Fantasy Sci-Fi",
       '2001: A Space Odyssey 1968 Drama Mystery Sci-Fi Thriller',
       'Event Horizon 1997 Action Mystery Sci-Fi Thriller',
       'Terminator, The 1984 Action Sci-Fi Thriller',
       'Escape from New York 1981 Action Adventure Sci-Fi Thriller',
       'Arrival, The 1996 Action Sci-Fi Thriller',
       'Mimic 1997 Sci-Fi Thriller',
       'Nemesis 2: Nebula 1995 Action Sci-Fi Thriller',
       'Escape from L.A. 1996 Action Adventure Sci-Fi Thriller',
       'Alien 1979 Action Horror Sci-Fi Thriller',
       'Face/Off 1997 Action Sci-Fi Thriller',
       'Con Air 1997 Action Adventure Thriller',
       'Lost in Space 1998 Action Sci-Fi Thriller',
       'Net, The 1995 Sci-Fi Thriller',
       'Spawn 1997 Action Adventure Sci-Fi Thriller',
       'Alien 3 1992 Action Horror Sci-Fi Thriller',
       'Conspiracy Theory 1997 Action Mystery Romance Thriller

Let's take the first item from the candidates and ask model to explain why this item can be interesting to the user

## Create an ExplainableRecommender

In [None]:
# first we need to initialize the llm, which will give us explanation
from langchain_community.llms import HuggingFaceEndpoint


llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    max_new_tokens= 50,
    temperature= 0.2,
    repetition_penalty= 1.05,
)

In [45]:
from llm4rec.tasks import ExplainableRecommender


exp_recommender = ExplainableRecommender(config=config, llm=llm)

# let's try to pass first candidate and check the explanation
explanation = exp_recommender.explain_recommendations(user_interaction=user, history_names=history_names, candidate_item=candidate_texts[0])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe person has a list of liked items: Empire Strikes Back, The 1980 Action Adventure Drama Romance Sci-Fi War, Broken Arrow 1996 Action Thriller, Amistad 1997 Drama, Long Kiss Goodnight, The 1996 Action Thriller, French Kiss 1995 Comedy Romance, Maltese Falcon, The 1941 Film-Noir Mystery, Dazed and Confused 1993 Comedy, Strange Days 1995 Action Crime Sci-Fi. The person has a list of disliked items: Beautiful Girls 1996 Drama, Mars Attacks! 1996 Action Comedy Sci-Fi War. Generate ONE SENTENCE explanation why the following candidate item Abyss, The 1989 Action Adventure Sci-Fi Thriller can be liked by the user.
                     Answer should be ONLY ONE sentence in the following way: You might like the movie because...
                     .
                  [0m

[1m> Finished chain.[0m


### The explanation is the following

In [46]:
explanation

' You might like the movie because it is an action adventure sci-fi thriller with a deep and thought-provoking storyline about the exploration of the deep sea and the discovery of strange and mysterious creatures.'