In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os

repo_path = '/content/drive/MyDrive/Colab Notebooks/thesis_work/llm-for-rec'
sys.path.append(repo_path)

In [3]:
!pip install -q -r '{repo_path}/requirements/requirements.txt'

In [4]:
import os

config_dict = {
    "csv_args": {"delimiter": "\t"},
    "source_column": "item_id:token",
    "search_kwargs": {"k": 20},
    "data_path": os.path.join(repo_path, "datasets"),
    "load_col": {
        "inter": ["user_id", "item_id", "rating", "timestamp"],
        "item": ["item_id", "movie_title"],
        "user": ["user_id", "age", "gender"]
    },
    "text_col": ["movie_title", "release_year", "class"],
    "MAX_ITEM_LIST_LENGTH": 10,
    "eval_args": {"split": {"LS": "valid_and_test"}, "order": "TO", "mode": "full"},
    "repeatable": True,
    "loss_type": "CE",
    "train_batch_size": 100,
    "eval_batch_size": 8,
    "valid_metric": "NDCG@10",
    "metrics": ["Recall", "NDCG"],
    "topk": [1, 5, 20],
    "train_neg_sample_args": None,
}

In [5]:
from llm4rec.pipelines import RecBolePipelineRecommender
from llm4rec.dataset import RecboleSeqDataset
from llm4rec.trainer import PipelineTrainer
from recbole.data.utils import data_preparation
from recbole.config import Config
from recbole.model.abstract_recommender import AbstractRecommender
import os
import torch

model_cls = RecBolePipelineRecommender
dataset_name = 'ml-100k'

config = Config(model=model_cls, dataset=dataset_name,
            config_dict=config_dict)

dataset = RecboleSeqDataset(config)
train_data, _, eval_data = data_preparation(config, dataset)



In [6]:
from dotenv import load_dotenv
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
import torch

embedding_size = 384
embedding_fn = HuggingFaceEmbeddings(
                model_name="all-MiniLM-L6-v2", model_kwargs={"device":"cuda:0" if torch.cuda.is_available() else "cpu"})

path_to_openai_env = os.path.join(repo_path, "huggingface.env")
load_dotenv(path_to_openai_env)

llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.1, "max_new_tokens":512,
                                                                                  "return_full_text":False})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warn_deprecated(


## Init memory

In [8]:
from llm4rec.memory import ItemMemory

item_memory = ItemMemory()
for item_id in dataset.item_id_token:
    item_memory.update(item_id, dataset.item_token2text(item_id))

In [9]:
from llm4rec.memory import UserMemory

user_memory = UserMemory(user_attributes=dataset.user_token2text,
                         short_term_limit=20, llm=llm,
                         embeddings=embedding_fn,
                         item_memory=item_memory,
                         emb_size=embedding_size)

In [None]:
import numpy as np

history_matrix, _, history_lens = train_data.dataset.history_item_matrix()
inter_matrix = train_data.dataset.inter_matrix('csr', value_field='rating')
user_id_mapping = lambda user_ids: train_data.dataset.id2token('user_id', user_ids)
item_id_mapping =  lambda item_ids: train_data.dataset.id2token('item_id', item_ids)
min_rating = 1
max_rating = 5
num_inter = 25

n_users = 5

for user_id in range(1, n_users):
    user_history = history_matrix[user_id][:history_lens[user_id]].tolist()
    ratings = inter_matrix[user_id, :].toarray() * (max_rating-min_rating) + min_rating
    ratings = ratings.astype('int')[0]

    for item, rating in zip(user_history, ratings):
        user_memory.update(str(user_id), {'rating':rating, 'item_id':str(item)})



In [None]:
user_memory.short_term_memory.memory_store

In [None]:
user_memory.long_term_memory.memory_store