# Perguntas sobre RecSys-RL

## 1. Lendo a chave

In [1]:
from dotenv import load_dotenv

import os

load_dotenv()

True

In [2]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")

In [3]:
PINECONE_API_KEY

'98a62383-2f1c-4112-91a4-4ced24915603'

## 2. Carregando o paper sobre a implementação de RL em RS com feedback de usuário explícito

### 2.1. Paper

In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [9]:
# Sutton - RL
loader_paper = PyPDFLoader('../PDF/recsys-rl.pdf')

In [14]:
recsys_paper = loader_paper.load()

In [15]:
recsys_paper[1]

Document(page_content='Fig. 1. Analysis on sequential patterns on user’s behavior in MovieLens and\nYahoo!Music datasets\nclick and read the two pieces of news with equal probability,\nwhere one is about a thunderstorm alert and the other is about\na basketball player Kobe Bryant. In this example, after reading\nthe news about thunderstorm, the user probably is not willing\nto read news about this issue anymore; while on the other hand,\nthe user will possibly read more about NBA or basketball\nafter reading the news about Kobe. The fact suggests that\nrecommending the news about Kobe will introduce more long-\nterm rewards. Hence, when recommending items to users, both\nthe immediate and long-term rewards should be taken into\nconsideration.\nRecently, Reinforcement Learning (RL) [20], which has\nshown great potential in various challenging scenarios that\nrequire both dynamic modeling and long term planning, such\nas game playing [21], [22], real-time ads bidding [23], [24],\nneural 

### 2.2. Implementação

In [8]:
from langchain_community.document_loaders import NotebookLoader

In [30]:
loader_jupyter = NotebookLoader(
    "../Others/recsys_rl.ipynb",
    include_outputs=True,
    max_output_length=200,
    remove_newline=True,
)

In [31]:
recsys_jupyter = loader_jupyter.load()

  filtered_data = filtered_data.applymap(remove_newlines)


In [32]:
recsys_jupyter

[Document(page_content='\'code\' cell: \'[\'from collections import defaultdict\', \'import os\', \'import pickle\', \'import random\', \'import requests\', \'import time\', \'import tqdm\', \'\', \'from IPython.core.debugger import set_trace\', \'import numpy as np\', \'import pandas as pd\', \'from pytorch_ranger import Ranger\', \'import torch\', \'import torch.nn as nn\', \'import torch.nn.functional as F \', \'import torch.utils.data as td\', \'from torch.utils.tensorboard import SummaryWriter\', \'\', \'from utils import (EvalDataset, OUNoise, Prioritized_Buffer, get_beta, \', \'                   preprocess_data, to_np, hit_metric, dcg_metric)\']\'\n\n \'code\' cell: \'[\'data_dir = "data"\', \'rating = "ml-1m.train.rating"\', \'\', \'params = {\', "    \'batch_size\': 512,", "    \'embedding_dim\': 8,", "    \'hidden_dim\': 16,", "    \'N\': 5, # memory size for state_repr", "    \'ou_noise\':False,", \'    \', "    \'value_lr\': 1e-5,", "    \'value_decay\': 1e-4,", "    \'pol

### 2.3. Juntando os documentos gerados

In [33]:
recsys = recsys_paper + recsys_jupyter

## 3. Quebrando em mais pedaços (`Chunks`)

In [34]:
# Definições dos chunks
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 6000,
    chunk_overlap  = 200,
    length_function = len,
)

In [35]:
texts = text_splitter.split_documents(recsys)

In [36]:
len(texts)

17

## 4. Embeddings de cada documento + busca semântica

In [37]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

  from tqdm.autonotebook import tqdm


In [38]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [39]:
# Inicializando o pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)
index_name = 'rl4rs'

In [40]:
# Passa todos os embeddings e guarda no index criado na conta em Pinecone
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [41]:
# Lançando a query e vendo os resultados mais similares
query = 'Quais foram as métricas utilizadas para validação do framework?'
docs = docsearch.similarity_search(query)


In [42]:
docs

[Document(page_content='C. Evaluation\nIn this subsection, we discuss how to evaluate the models\nwith a environment simulator. The most straightforward way to\nevaluate the RL based models is to conduct online experiments\non recommender systems where the recommender directly\ninteracts with users. However, the underlying commercial risk\nand the costly deployment on the platform make it impracti-\ncal. Therefore, throughout the testing phase, we conduct the\nevaluation of the proposed models on public ofﬂine datasets\nand propose two ways to evaluate the models, which are the\nofﬂine evaluation and the online evaluation.\n1) Ofﬂine evaluation: Intuitively, the ofﬂine evaluation of\nthe trained models is to test the recommendation performance\nwith the learned policy, which is described in Algorithm 2.\nSpeciﬁcally, for a given session Sj, the recommender only\nrecommends the items that appear in this session, denoted as\nI(Sj), rather than the ones in the whole item space. The reason

## 5. Chat implementando o modelo

In [43]:
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

In [44]:
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model='gpt-4-1106-preview',
    temperature=1.0
)

In [45]:
def augment_prompt(query: str):
    # get top 3 results from knowledge base
    results = docsearch.similarity_search(query, k=3)
    # get the text from the results
    source_knowledge = "\n".join([x.page_content for x in results])
    # feed into an augmented prompt
    augmented_prompt = f"""Usando o contexto abaixo, responda a pergunta abaixo. Se a pergunta não estiver relacionada ao contexto, esqueça o contexto dado e responda como você normalmente responderia, pois você ainda é um assistente prestativo. E se apesar da pergunta não estiver relacionada você não saber a resposta, apenas diga que não sabe como normalmente você faria.

    Contextos:
    {source_knowledge}

    Query: {query}"""
    return augmented_prompt

In [47]:
import gradio as gr

def predict(message, history):
    history_langchain_format = [SystemMessage(content='Você é um assistente gentil e prestativo. Você ajudará nos estudos acerca do artigo de aprendizado por reforço para sistemas de recomendação. Busque responder as perguntas quando conseguir!')]
    for human, ai in history:
        history_langchain_format.append(HumanMessage(content=human))
        history_langchain_format.append(AIMessage(content=ai))
    history_langchain_format.append(HumanMessage(content=augment_prompt(message)))
    gpt_response = chat(history_langchain_format)
    return gpt_response.content

In [48]:
gr.ChatInterface(predict).launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


