# Dataset

In [1]:
from models.csv_loader import CSVLoader
from models.products.product_registry import ProductRegistry
from models.products.product_mapping_row import ProductMappingRow
from models.products.product_row import ProductRow

product_registry = ProductRegistry(CSVLoader(ProductRow).read(), CSVLoader(ProductMappingRow).read())

In [2]:
from models.users.user_registry import UserRegistry
from models.users.user_mapping_row import UserMappingRow
from models.users.user_row import UserRow

user_registry = UserRegistry(CSVLoader(UserRow).read(), CSVLoader(UserMappingRow).read())

In [3]:
from models.ratings.rating_registry import RatingRegistry
from models.ratings.rating_row import RatingRow

rating_registry = RatingRegistry(CSVLoader(RatingRow).read(), user_registry, product_registry)

# Rec method

In [4]:
from models.reco.reco_factory import RecoFactory
import os 
from paths import PATHS

    
user_recos = dict()
for json_file_name in os.listdir(PATHS["recommendations"]):
    user_id = int(json_file_name.split("_")[-1].split(".")[0])
    user_reco_path = os.path.join(PATHS["recommendations"], json_file_name)
    user_recos[user_id] = RecoFactory.from_file(user_reco_path)

In [5]:
user_recos[33][0]

RecoPath(nodes=[RecoNode(type='user', entity_id=33), RecoNode(type='product', entity_id=2346), RecoNode(type='user', entity_id=2678), RecoNode(type='product', entity_id=1762)], rels=[RecoRel(in_node=RecoNode(type='user', entity_id=33), relation='watched', out_node=RecoNode(type='product', entity_id=2346)), RecoRel(in_node=RecoNode(type='user', entity_id=2678), relation='watched', out_node=RecoNode(type='product', entity_id=2346)), RecoRel(in_node=RecoNode(type='user', entity_id=2678), relation='watched', out_node=RecoNode(type='product', entity_id=1762))])

# Explanation

In [6]:
from typing import List

from models.reco.reco_path import RecoPath


def generate_facts(path: RecoPath):
    facts_txt = "% Path: \n"
    for rel in path.rels:
        facts_txt += rel.to_facts() + "\n"
        user = user_registry.find_by_eid(rel.in_node.entity_id)
        product = product_registry.find_by_eid(rel.out_node.entity_id)
        facts_txt += rating_registry.find_user_product_rating(user.uid, product.pid).to_facts() + "\n"
    facts_txt += "% Background Knowledge: \n"
    for node in path.nodes:
        if node.type == "user":
            user = user_registry.find_by_eid(node.entity_id)
            facts_txt += user.to_facts() + "\n"
        elif node.type == "product":
            product = product_registry.find_by_eid(node.entity_id)
            facts_txt += product.to_facts() + "\n"
    return facts_txt
            

In [None]:
print(generate_facts(user_recos[33][0]))

In [7]:
import dotenv

dotenv.load_dotenv()

True

In [8]:
import os

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

In [9]:
from huggingface_hub import login

# Log in to Hugging Face
login(token=HUGGINGFACEHUB_API_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /gpfs/home/acad/umons-smit/mmanderl/.cache/huggingface/token
Login successful


In [None]:
# # answering_chain.input_schema.schema()
# from langchain_core.prompts import PromptTemplate
# from langchain_core.output_parsers import StrOutputParser
# from langchain_community.llms import HuggingFaceEndpoint
# from langchain.chains import LLMChain
# from langchain_core.runnables import RunnablePassthrough, RunnableLambda
# from langchain.prompts import PromptTemplate
# import json
# # from langchain_core.llms import OpenAI

# reasoning_prompt_template = """You are an expert in graph-based recommender systems.
# You try to follow the following explanation goal:
# 1. **Transparency:** Clearly explain how the recommendation algorithm made the decision.
# 2. **Scrutability:** Allow the user to provide feedback if the recommendation seems incorrect.
# 3. **Trust:** Build user’s confidence in the recommender system.
# 4. **Effectiveness:** Help user make informed decisions about the recommendation.
# 5. **Efficiency:** Provide a quick explanation to facilitate faster decision-making.
# 6. **Persuasiveness:** Convince user of the relevance of the recommendation.
# 7. **Satisfaction:** Enhance the ease of use and overall experience of the system for the user.
# Given the background knowledge: {background_knowledge},
# explain why the movie {movie} was recommended to the user {user}.
# """

# reasoning_prompt = PromptTemplate.from_template(reasoning_prompt_template)

# reasoning_llm = HuggingFaceEndpoint(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     **{
#         "max_new_tokens": 1500,
#         "temperature": 0.1,
#         "top_k": 50,
#         "top_p": 0.9,
#         "repetition_penalty": 1.1,
#         "length_penalty": 0.9,
#         "frequency_penalty": 1.0,
#     },
# )

# answering_llm = HuggingFaceEndpoint(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     **{
#         "max_new_tokens": 200,
#         "temperature": 

In [None]:
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model

# # Load the tokenizer
# # model_name = "./Mixtral-8x7B-Instruct-v0.1"
# model_name = "./Mistral-7B-Instruct-v0.2"
# # tokenizer = AutoTokenizer.from_pretrained(model_name)

# model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
# tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto")

# # Set up the text generation pipeline
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     # device=0,  # Use device 0 for initial input; the model is already distributed
#     max_new_tokens=1500,
#     temperature=0.1,
#     top_k=50,
#     top_p=0.9,
#     repetition_penalty=1.1,
# )

In [None]:
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model

# # Load the tokenizer
# # model_name = "./Mixtral-8x7B-Instruct-v0.1"
# model_name = "./Mistral-7B-Instruct-v0.2"
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Initialize the model with empty weights to save memory during setup
# with init_empty_weights():
#     model = AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True)

# # Infer the device map to distribute the model across 4 GPUs, each with 38GiB memory
# device_map = infer_auto_device_map(
#     model,
#     # max_memory={0: "38GiB", 1: "38GiB", 2: "38GiB", 3: "38GiB"},
#     max_memory={0: "8GiB", 1: "8GiB", 2: "8GiB", 3: "8GiB"},
#     no_split_module_classes=["GPTJBlock"]
# )

# # Offload any parts that don't fit in GPU memory to the specified directory
# offload_dir = "/gpfs/scratch/acad/trail/tmp/offload_dir"
# model = dispatch_model(model, device_map=device_map, offload_dir=offload_dir)

# # Set up the text generation pipeline
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     # device=0,  # Use device 0 for initial input; the model is already distributed
#     max_new_tokens=1500,
#     temperature=0.1,
#     top_k=50,
#     top_p=0.9,
#     repetition_penalty=1.1,
# )

# # answering_llm = reasoning_llm

In [None]:
# from langchain.llms.base import LLM
# from typing import Optional, List, Any
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from pydantic import Field

# class MultiGPUTransformersLLM(LLM):
#     model: Any = Field(...)
#     tokenizer: Any = Field(...)
#     device: int = Field(default=0)

#     def __init__(self, model: Any, tokenizer: Any, device: int = 0):
#         """
#         Initialize the multi-GPU transformers LLM.

#         Args:
#         - model: The distributed model across multiple GPUs.
#         - tokenizer: The tokenizer associated with the model.
#         - device: GPU device ID for tokenization (handled by model's device map).
#         """
#         # Set attributes using Pydantic's __init__
#         super().__init__(model=model, tokenizer=tokenizer, device=device)

#     @property
#     def _llm_type(self) -> str:
#         """Return type of LLM."""
#         return "multi_gpu_transformers_llm"

#     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
#         """
#         Run the multi-GPU model on the given prompt and return the generated text.

#         Args:
#         - prompt: The input prompt string.
#         - stop: Optional list of stop tokens.

#         Returns:
#         - The generated text response.
#         """
#         # Tokenize the input (do not move it to a specific device)
#         inputs = self.tokenizer(prompt, return_tensors="pt")

#         # Generate text with the distributed model (no need to move inputs to a specific GPU)
#         output_ids = self.model.generate(inputs["input_ids"], max_new_tokens=150, do_sample=True)

#         # Decode the generated output
#         generated_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)

#         # Optionally apply stop tokens
#         if stop:
#             for stop_token in stop:
#                 generated_text = generated_text.split(stop_token)[0]

#         return generated_text

In [None]:
# multi_gpu_llm = MultiGPUTransformersLLM(model=model, tokenizer=tokenizer, device=0)

In [None]:
# multi_gpu_llm

In [10]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

# Load the tokenizer and model locally
# model_name = "mistralai/Mixtral-8x22B-Instruct-v0.1"  # Replace with your local model path if you have downloaded it locally
# model_name = "./Mixtral-8x22B-Instruct-v0.1"
# model_name = "./Mixtral-8x7B-Instruct-v0.1"
# model_name = "./Mistral-7B-Instruct-v0.2"
model_name = "/gpfs/scratch/acad/trail/Meta-Llama-3.1-70B-Instruct"

quantized_8bit = False
fp16 = True

assert not (quantized_8bit and fp16), "Cannot use both 8-bit quantization and FP16 at the same time"

# If you've downloaded the model locally, replace with the path to the model directory
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)

# # Set up the text generation pipeline
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     device=0,  # Set to -1 for CPU, or specify GPU id if using CUDA
#     max_new_tokens=1500,
#     temperature=0.1,
#     top_k=50,
#     top_p=0.9,
#     repetition_penalty=1.1,
# )

if fp16:
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
elif quantized_8bit:
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name, device_map="auto", load_in_8bit=True)

# Set up the text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # device=0,  # Use device 0 for initial input; the model is already distributed
    max_new_tokens=1500,
    temperature=0.1,
    top_k=50,
    top_p=0.9,
    repetition_penalty=1.1,
)

Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]

In [11]:
from langchain_huggingface.llms import HuggingFacePipeline
hf = HuggingFacePipeline(pipeline=pipe)

reasoning_llm = hf
answering_llm = hf

In [None]:
# hf

In [None]:
# from langchain.llms.base import LLM
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
# from typing import Optional, List

# class CustomTransformersLLM(LLM):
#     def __init__(self, model_name: str, device: int = -1):
#         """
#         Initialize the LLM with a pre-trained transformers model.
        
#         Args:
#         - model_name: Path to the pre-trained model (local or from Hugging Face Hub).
#         - device: GPU device id. Default is -1 for CPU, 0 for the first GPU, etc.
#         """

#         # Load tokenizer
#         tokenizer = AutoTokenizer.from_pretrained(model_name)

#         # Initialize the model with empty weights to save memory during setup
#         with init_empty_weights():
#             model = AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True)

#         # Infer the device map to distribute the model across GPUs
#         device_map = infer_auto_device_map(
#             model,
#             max_memory={0: "8GiB", 1: "8GiB", 2: "8GiB", 3: "8GiB"},
#             no_split_module_classes=["GPTJBlock"]
#         )

#         # Offload parts of the model to disk if necessary
#         offload_dir = "/gpfs/scratch/acad/trail/tmp/offload_dir"
#         model = dispatch_model(model, device_map=device_map, offload_dir=offload_dir)

#         # Set up the text generation pipeline using the distributed model
#         self.generator = pipeline(
#             "text-generation",
#             model=model,
#             tokenizer=tokenizer,
#             max_new_tokens=1500,
#             temperature=0.1,
#             top_k=50,
#             top_p=0.9,
#             repetition_penalty=1.1,
#         )

#     @property
#     def _llm_type(self) -> str:
#         """Return the type of LLM."""
#         return "custom_transformers_llm"

#     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
#         """
#         Generate text from the model based on the prompt.

#         Args:
#         - prompt: The input prompt string.
#         - stop: Optional list of stop tokens to stop generation.

#         Returns:
#         - The generated text response.
#         """
#         if not isinstance(prompt, str):
#             raise ValueError("Prompt must be a string")

#         # Generate text using the transformers model pipeline
#         generated_outputs = self.generator(prompt, max_new_tokens=150, num_return_sequences=1)

#         # Extract the generated text
#         generated_text = generated_outputs[0]['generated_text']

#         # Optionally apply stop tokens
#         if stop:
#             for stop_token in stop:
#                 generated_text = generated_text.split(stop_token)[0]

#         return generated_text

In [None]:
# # model_name = "./Mixtral-8x7B-Instruct-v0.1"
# model_name = "./Mistral-7B-Instruct-v0.2"
# local_llm = CustomTransformersLLM(model_name=model_name, device=0)  # Specify device 0 for GPU, or -1 for CPU

In [12]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate

# reasoning_llm = HuggingFaceEndpoint(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     **{
#         "max_new_tokens": 1500,
#         "temperature": 0.1,
#         "top_k": 50,
#         "top_p": 0.9,
#         "repetition_penalty": 1.1
#     },
# )

# answering_llm = HuggingFaceEndpoint(
#     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     **{
#         "max_new_tokens": 200,
#         "temperature": 0.4,
#         "top_k": 30,
#         "top_p": 0.8,
#         "repetition_penalty": 1.05
#     },
# )

reasoning_template = """You are an expert in graph-based recommender systems.
You try to follow the following explanation goal:
1. **Transparency:** Clearly explain how the recommendation algorithm made the decision.
2. **Scrutability:** Allow the user to provide feedback if the recommendation seems incorrect.
3. **Trust:** Build user’s confidence in the recommender system.
4. **Effectiveness:** Help user make informed decisions about the recommendation.
5. **Efficiency:** Provide a quick explanation to facilitate faster decision-making.
6. **Persuasiveness:** Convince user of the relevance of the recommendation.
7. **Satisfaction:** Enhance the ease of use and overall experience of the system for the user.
Given the background knowledge: {background_knowledge},
explain why the movie "{product_name}" was recommended to the user {user}.
"""

reasoning_prompt = PromptTemplate.from_template(reasoning_template)

answer_template = """Based on the reasoning provided: {reasoning},
give a concise and helpful response about why the movie "{product_name}" was recommended to the user {user} 
without repeating the explanation goals.
"""
answer_prompt = PromptTemplate.from_template(answer_template)


In [13]:
reasoning_chain = reasoning_prompt | reasoning_llm
answering_chain = answer_prompt | answering_llm

In [None]:
# reasoning_chain = reasoning_prompt | multi_gpu_llm
# answering_chain = answer_prompt | multi_gpu_llm

In [14]:
from recommendation.explainers.cot_explainer import COTExplainer

cot_explainer = COTExplainer(product_registry, user_registry, rating_registry, reasoning_chain, answering_chain)

In [15]:
path = user_recos[33][0]
continuation, trace = cot_explainer.explain(path)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [16]:
# print("reasoning prompt:")
# print(trace.reasoning_trace.prompt)
# print("reasoning completion:")
# print(trace.reasoning_trace.completion)
# print("answer prompt:")
# print(trace.answering_trace.prompt)
print("answer completion:")
print(trace.answering_trace.completion)

answer completion:
Based on the reasoning provided: You are an expert in graph-based recommender systems.
You try to follow the following explanation goal:
1. **Transparency:** Clearly explain how the recommendation algorithm made the decision.
2. **Scrutability:** Allow the user to provide feedback if the recommendation seems incorrect.
3. **Trust:** Build user’s confidence in the recommender system.
4. **Effectiveness:** Help user make informed decisions about the recommendation.
5. **Efficiency:** Provide a quick explanation to facilitate faster decision-making.
6. **Persuasiveness:** Convince user of the relevance of the recommendation.
7. **Satisfaction:** Enhance the ease of use and overall experience of the system for the user.
Given the background knowledge: % Path: 
watched(User33, Product2346)
rated(User33, Product2346, 4)
watched(User2678, Product2346)
rated(User2678, Product2346, 4)
watched(User2678, Product1762)
rated(User2678, Product1762, 3)
% Background Knowledge: 
gend

In [None]:
# # for i, path in enumerate(user_recos[33][0]):
# # path = user_recos[0][0]
# def explain(path):
#     bk = generate_facts(path)
#     product_eid = path.recommendation[1].entity_id
#     product = product_registry.find_by_eid(product_eid)
#     user_eid = path.recommendation[0].entity_id
#     user = user_registry.find_by_eid(user_eid)
#     result =   cot_chain.invoke(
#         {"background_knowledge": bk, "product_name": product.name, "user": str(user)},
#     )
#     print("-----------------")
#     print(result)
#     # print(result)
#     # with open(f"explanations/exp_{i}.txt", "w") as f:
#     #     f.write(llm_chain.run(explain(reco)))

In [None]:
# path = user_recos[33][0]
# explain(path)
# # await explain(path)

In [None]:
# # import logging
# # logging.basicConfig(filename='./error.log', level=logging.DEBUG, 
# #                     format='%(asctime)s %(levelname)s %(name)s %(message)s')
# # logger=logging.getLogger(__name__)

# for user_reco in user_recos.values():
#     for reco in user_reco:
#         # try:
#         #     # raise Exception("test")
#         #     explain(reco)
#         # except Exception as e:
#         #     print(e)
#         #     logger.error(e)


# # FIXME: some generate exception
#     # print(user_reco)
# # for path in user_recos[0]:
#     # explain(path)
#     # break