# Example of Using Agent System

In [1]:
%cd "/Users/ainura/studies/thesis/llm-for-rec"
repo_path = "/Users/ainura/studies/thesis/llm-for-rec"

/Users/ainura/studies/thesis/llm-for-rec


Reading api keys

In [2]:
from dotenv import load_dotenv

load_dotenv(repo_path + "/api_keys.env")

True

## Install requirements

In [None]:
!pip install -q -r '{repo_path}/requirements/requirements.txt'

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.9/266.9 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m67.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

## Add config

In [4]:
import numpy as np
import os.path as osp
from recbole.data.dataset import SequentialDataset
import typing as tp


class RecboleSeqDataset(SequentialDataset):
    """
    Dataset that returns user_id, previous interaction history and next item_id

    Attributes:
        data_path (str): The path to dataset files.
        dataset_name (str): The name of the dataset.
        id_token (List[str]): The mapping from internal numerical item ids to item ids from dataset file.
        preprocess_text_fn (Callable): The function to transform text feature of an item.
        item_text (Dict[int, str]): The mapping from internal numerical id of item to items text feature.
    """
    def __init__(
        self, config: tp.Dict[str, tp.Any], preprocess_text_fn: tp.Callable = None
    ):
        """
        Initializes the PipelineDataset.

        Args:
            config (str): The config file from RecBole.
            preprocess_text_fn (Optional[Callable]]): The function to apply to specified text feature of an item.
        """
        super().__init__(config)
        self.data_path = config["data_path"]
        self.dataset_name = config["dataset"]
        self.item_id_token = self.field2id_token["item_id"]
        self.user_id_token = self.field2id_token["user_id"]
        self.preprocess_text_fn = preprocess_text_fn
        self.user_text = self.load_user_text()
        self.item_attr = self.load_item_text()

    def load_user_text(self) -> tp.List[str]:
        # from internal ids to text
        token_text = {}
        user_text = np.full(len(self.user_id_token), "", dtype=object)
        user_text[0] = "[PAD]"
        user_file_path = osp.join(self.data_path, f"{self.dataset_name}.user")

        if not osp.exists(user_file_path):
            self.logger.info(
                "Dataset seem to have no information about users."
            )
            return user_text
        
        # token id to text mapping
        with open(user_file_path, "r", encoding="utf-8") as file:
            col_names = file.readline().strip().split("\t")
            col_names = [col.split(":")[0] for col in col_names]
            
            text_col_idx = list(range(1, len(col_names)))
            #print(col_names)
            
            for line in file:
                description = line.strip().split("\t")
                user_id = description[0]
                text = "; ".join([f'{col_names[col_idx]}: {description[col_idx]}' for col_idx in text_col_idx])
                #print(user_id, text)
                token_text[user_id] = text

        # internal id to text mapping
        for i, token in enumerate(self.user_id_token):
            if token == "[PAD]":
                continue
            raw_text = token_text[token]
            #if self.preprocess_text_fn:
            #    raw_text = self.preprocess_text_fn(raw_text)
            user_text[i] = raw_text
        return user_text


    def load_item_text(self) -> tp.List[str]:
        # from internal ids to text
        token_attr = {}
        item_text = np.full(len(self.item_id_token), "", dtype=object)
        item_text[0] = "[PAD]"
        item_attr = {}
        item_attr[0] = {}
        
        item_file_path = osp.join(self.data_path, f"{self.dataset_name}.item")

        # token id to text mapping
        with open(item_file_path, "r", encoding="utf-8") as file:
            col_names = file.readline().strip().split("\t")
            col_names = [col.split(":")[0] for col in col_names]
            
            if type(self.config["text_col"]) == type(list):
                text_col_idx = [col_names.index(self.config["text_col"])]
            else:
                text_col_idx = [col_names.index(col_name) for col_name in self.config["text_col"]]

            for line in file:
                description = line.strip().split("\t")
                item_id = description[0]
                attributes = {col_names[col_idx]: description[col_idx] for col_idx in text_col_idx}
                #"; ".join([f'{col_names[col_idx]}: {description[col_idx]}' for col_idx in text_col_idx])
                token_attr[item_id] = attributes

        # internal id to text mapping
        for i, token in enumerate(self.item_id_token):
            if token == "[PAD]":
                continue
            raw_attr = token_attr[token]
            #if self.preprocess_text_fn:
            #    raw_text = self.preprocess_text_fn(raw_text)
            item_attr[i] = raw_attr
        return item_attr
    
    def user_id2text(self, id: int) -> str:
        # internal id to text
        return self.user_text[id]
        
    def user_token2text(self, token: str) -> str:
        internal_id = self.token2id('user_id', token)
        return self.user_id2text(internal_id)
    
    def item_id2text(self, id: int) -> str:
        # internal id to text
        attr = self.item_attr[id]
        text = "; ".join([f'{attr_key}:{attr[attr_key]}' for attr_key in attr])

        return text#self.item_text[id]
        
    def item_token2text(self, token: str) -> str:
        internal_id = self.token2id('item_id', token)
        return self.item_id2text(internal_id)
        
    def item_token2attr(self, token: str) -> str:
        internal_id = self.token2id('item_id', token)
        return self.item_attr[internal_id]

In [3]:
import os

config_dict = {
    "csv_args": {"delimiter": "\t"},
    "source_column": "item_id:token",
    "search_kwargs": {"k": 20},
    "data_path": '/Users/ainura/studies/thesis/llm-for-rec/datasets', #os.path.join(repo_path, "datasets"),
    "load_col": {
        "inter": ["user_id", "item_id", "rating", "timestamp"],
        "item": ["item_id", "movie_title"],
    },
    "text_col": ["movie_title", "release_year", "class"],
    "MAX_ITEM_LIST_LENGTH": 10,
    "eval_args": {"split": {"LS": "valid_and_test"}, "order": "TO", "mode": "full"},
    "repeatable": True,
    "loss_type": "CE",
    "train_batch_size": 100,
    "eval_batch_size": 8,
    "valid_metric": "NDCG@10",
    "metrics": ["Recall", "NDCG"],
    "topk": [1, 5, 20],
    "train_neg_sample_args": None,
}

## Get dataset and config

In [5]:
from llm4rec.pipelines import RecBolePipelineRecommender
from llm4rec.dataset import RecboleSeqDataset
from recbole.data.utils import data_preparation
from recbole.config import Config
import torch

model_cls = RecBolePipelineRecommender
dataset_name = 'ml-100k'

config = Config(model=model_cls, dataset=dataset_name,
            config_dict=config_dict)

dataset = RecboleSeqDataset(config)
_, _, test_data = data_preparation(config, dataset)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
  split_point = np.cumsum(feat[field].agg(len))[:-1]
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermed

## Let's take first user

Getting batch and taking 1 interaction in that batch

In [7]:
i=0
batch = next(iter(test_data))
user = batch[0][i]
history_length = min(
            config["MAX_ITEM_LIST_LENGTH"],
            user['item_length']
                  )
history_ids = user['item_id_list']
history_item_ids = test_data.dataset.id2token("item_id", user['item_id_list'][:history_length])
prev_interactions = history_item_ids

Initializing the retreival model

In [10]:
from llm4rec.tasks import RetrievalRecommender

retrieval = RetrievalRecommender(
                embeddings=None,
                item2text=dataset.item_token2text,
                items_info_path=os.path.join(config['data_path'], f"{config['dataset']}.item"),
                csv_loader_args=dict(csv_args=config['csv_args'],
                                source_column=config['source_column']),
                text_splitter_args=dict(chunk_size=1000, chunk_overlap=0),
                search_type="similarity",
                search_kwargs=config['search_kwargs'],
                emb_model_kwargs={"device":"cuda:0" if torch.cuda.is_available() else "cpu"})

### Initializing the models for Ranker tool

In [11]:
from langchain_groq import ChatGroq


ranker_model = ChatGroq(model_name="llama3-70b-8192", temperature=0)

### Creation of tools from the tasks

In [13]:
from llm4rec.tasks import RankerRecommender
from llm4rec.tools import create_retrieval_tool, create_ranking_tool


retrieval_tool = create_retrieval_tool(retrieval=retrieval)

ranker = RankerRecommender(llm = ranker_model,item2text=dataset.item_token2text)
ranker_tool = create_ranking_tool(ranker=ranker)

### Initializing models for agents

In [14]:
from langchain_openai import ChatOpenAI


agent_executor_model = ChatOpenAI(model="gpt-4o", temperature=0)
planning_model = ChatGroq(model_name="llama3-70b-8192", temperature=0)
reflection_model = ChatGroq(model_name="llama3-70b-8192", temperature=0)

# Simple Agent

In [15]:
from llm4rec.agents import SimpleAgent


agent = SimpleAgent(tools=[retrieval_tool, ranker_tool], llm_executor=agent_executor_model)

In [16]:
agent.recommend(user_profile="", prev_interactions=prev_interactions, top_k=config['search_kwargs']['k'])



[1m> Entering new AgentExecutor chain...[0m


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[32;1m[1;3m
Invoking: `retrieval_recommender` with `{'user_profile': 'User', 'prev_interactions': ['172', '762', '235', '546', '750', '147', '781', '484', '959', '39'], 'top_k': 20}`


[0m[36;1m[1;3m['1378', '1649', '852', '265', '344', '1241', '643', '269', '775', '1101', '741', '1115', '748', '264', '1320', '257', '1611', '212', '152', '480'][0m

  warn_deprecated(


[32;1m[1;3mHere are 20 candidate items recommended for you:

1. 1378
2. 1649
3. 852
4. 265
5. 344
6. 1241
7. 643
8. 269
9. 775
10. 1101
11. 741
12. 1115
13. 748
14. 264
15. 1320
16. 257
17. 1611
18. 212
19. 152
20. 480[0m

[1m> Finished chain.[0m


{'input': "\nTask: User . This User has previous interactions with these items: ['172' '762' '235' '546' '750' '147' '781' '484' '959' '39']. \nPlease give 20 candidate items recommendations for this user considering his preferences.\n",
 'output': 'Here are 20 candidate items recommended for you:\n\n1. 1378\n2. 1649\n3. 852\n4. 265\n5. 344\n6. 1241\n7. 643\n8. 269\n9. 775\n10. 1101\n11. 741\n12. 1115\n13. 748\n14. 264\n15. 1320\n16. 257\n17. 1611\n18. 212\n19. 152\n20. 480',
 'intermediate_steps': [(ToolAgentAction(tool='retrieval_recommender', tool_input={'user_profile': 'User', 'prev_interactions': ['172', '762', '235', '546', '750', '147', '781', '484', '959', '39'], 'top_k': 20}, log="\nInvoking: `retrieval_recommender` with `{'user_profile': 'User', 'prev_interactions': ['172', '762', '235', '546', '750', '147', '781', '484', '959', '39'], 'top_k': 20}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_mTSZU9CSlknS6zRfoVhJp

# Plan-and-Execute Agent

In [17]:
from llm4rec.agents import PlanExecuteAgent


agent_plan_execute = PlanExecuteAgent(tools=[retrieval_tool, ranker_tool],
                                      llm_executor=agent_executor_model,
                                      llm_for_planning=planning_model,
                                      llm_for_reflection=reflection_model)

In [19]:
response = agent_plan_execute.recommend(user_profile="", prev_interactions=prev_interactions, top_k=config['search_kwargs']['k'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `retrieval_recommender` with `{'user_profile': 'user', 'prev_interactions': ['172', '762', '235', '546', '750', '147', '781', '484', '959', '39'], 'top_k': 100}`


[0m[36;1m[1;3m['1378', '1649', '852', '265', '344', '1241', '643', '269', '775', '1101', '741', '1115', '748', '264', '1320', '257', '1611', '212', '152', '480', '8', '1197', '1184', '1044', '1631', '345', '1644', '194', '328', '5', '831', '129', '43', '1174', '1237', '356', '323', '939', '196', '1085', '1074', '1016', '2', '1055', '932', '94', '1183', '197', '295', '63', '1094', '1324', '68', '412', '1627', '1661', '1511', '917', '930', '619', '337', '204', '47', '331', '125', '298', '827', '117', '260', '7', '1556', '164', '225', '1465', '539', '1548', '426', '1559', '1374', '1596', '636', '1058', '245', '355', '894', '1137', '1312', '38', '1143', '1052', '1077', '336', '572', '1114', '1061', '10', '925', '350', '1428', '1141'][0m[32;1m[1;3m
Invo

In [20]:
response["output"]

'Here are 20 recommended items for you:\n\n1. 257\n2. 298\n3. 328\n4. 827\n5. 1115\n6. 264\n7. 323\n8. 295\n9. 775\n10. 1378\n11. 1649\n12. 852\n13. 265\n14. 344\n15. 1241\n16. 643\n17. 269\n18. 1101\n19. 741\n20. 748'