In [1]:
# https://docs.cohere.com/docs/chat-on-langchain
# https://www.pinecone.io/learn/refine-with-rerank/

import os
import openai
import getpass  # platform.openai.com
from dotenv import load_dotenv

load_dotenv(dotenv_path='.env')
cohere_api_key = os.getenv("COHERE_API_KEY")

In [2]:
import cohere

co = cohere.ClientV2(api_key=cohere_api_key)

docs = [
    "Carson City is the capital city of the American state of Nevada.",
    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
    "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.",
    "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
    "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
]

response = co.rerank(
    model="rerank-english-v3.0",
    query="What is the capital of the United States?",
    documents=docs,
    top_n=3,
    return_documents=True
)

print(response)




In [3]:
for result in response.results:
    print(result)

document=V2RerankResponseResultsItemDocument(text='Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.') index=3 relevance_score=0.999071
document=V2RerankResponseResultsItemDocument(text='Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.') index=4 relevance_score=0.7867867
document=V2RerankResponseResultsItemDocument(text='Carson City is the capital city of the American state of Nevada.') index=0 relevance_score=0.32713068


In [4]:
for result in response.results:
    if result.document is not None:
        print(f"Document {result.index}, Relevance score: {result.relevance_score}: {result.document.text} \n")
    else:
        print(f"Document not found for result {result.index}. Relevance score: {result.relevance_score} \n")


Document 3, Relevance score: 0.999071: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. 

Document 4, Relevance score: 0.7867867: Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. 

Document 0, Relevance score: 0.32713068: Carson City is the capital city of the American state of Nevada. 



In [5]:
# https://docs.cohere.com/v2/docs/overview
import cohere
import os
from dotenv import load_dotenv

load_dotenv(dotenv_path='.env')
cohere_api_key=os.getenv("COHERE_API_KEY")
co = cohere.ClientV2(api_key=cohere_api_key)

query = "What is the capital of the United States?"
docs = [
  {"Title":"Facts about Carson City","Content":"Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274."},
  {"Title":"The Commonwealth of Northern Mariana Islands","Content":"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan."},
  {"Title":"The Capital of United States Virgin Islands","Content":"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas."},
  {"Title":"Washington D.C.","Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America."},
	{"Title":"Capital Punishment in the US","Content":"Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."}]

results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, rank_fields=['Title','Content'],top_n=2, return_documents=True)
print(results.id)
print(results.results)
print("\n\n")
for item in results.results:
    #print(item)
    print(item.relevance_score)
    print(item.document.Content)
    print("*****************")

bbdc198e-c927-4f08-a33a-fea5bbe6c27e
[V2RerankResponseResultsItem(document=V2RerankResponseResultsItemDocument(text=None, Content='Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.', Title='Washington D.C.'), index=3, relevance_score=0.9987405), V2RerankResponseResultsItem(document=V2RerankResponseResultsItemDocument(text=None, Content='Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.', Title='Capital Punishment in the US'), index=4, relevance_score=0.5011778)]



0.9987405
Washington, D.C. (also

In [8]:
from langchain_cohere import CohereRagRetriever
from langchain_cohere import ChatCohere
from langchain_core.documents import Document

cohere_chat_model = ChatCohere(cohere_api_key=os.getenv("COHERE_API_KEY"))
rag = CohereRagRetriever(llm=cohere_chat_model, connectors=[])

query = "Qual a cor da mesa?"
input_docs = [
    Document(page_content="A cor da mesa é vermelha", metadata={"id": "id-1", "citations": "citation1"}),
    Document(page_content="A cor da bola é amarela", metadata={"id": "id-2", "citations": "citation2"}),
    Document(page_content="A cor do mesa é violeta", metadata={"id": "id-3", "citations": "citation3"}),
]

# docs = rag.get_relevant_documents(query)  # Chamada sem 'source_documents'
docs = rag.invoke(
    query,
    documents=input_docs,
)

# Print the documents
for doc in docs[:-1]:
    print(doc.metadata)
    print("\n\n" + doc.page_content)
    print("\n\n" + "-" * 30 + "\n\n")
    
# Print the final generation 
answer = docs[-1].page_content
print(answer)
# Print the final citations 
citations = docs[-1].metadata['citations']
print(citations)

{'id': 'id-1', 'text': 'A cor da mesa é vermelha'}


A cor da mesa é vermelha


------------------------------


A cor da mesa é vermelha.
[ChatCitation(start=16, end=25, text='vermelha.', document_ids=['id-1'])]


In [9]:
len(docs)

2

In [10]:
import numpy as np

### STEP 1: Embed the documents

documents = [
 "Reembolso de despesas de viagem: gerencie facilmente suas despesas de viagem enviando-as por meio de nossa ferramenta financeira. ",
 "Trabalhar no exterior: É possível trabalhar remotamente de outro país. Basta coordenar com seu gerente e garantir que você esteja disponível.",
 "Benefícios de saúde e bem-estar: nos preocupamos com o seu bem-estar e oferecemos inscrições em academias, aulas de ioga no local e seguro saúde",
 "Frequência das avaliações de desempenho: realizamos avaliações informais trimestralmente e revisões formais duas vezes por ano.",
]

# Embed the documents
doc_emb = co.embed(
    texts=documents,
    model="embed-english-v3.0",
    input_type="search_document",
    embedding_types=["float"]
).embeddings.float

### STEP 2: Embed the query

# Add the user query
query = "Qual sua política de trabalho remoto?"

# Embed the query
query_emb = co.embed(
    texts=[query],
    model="embed-english-v3.0",
    input_type="search_query",
    embedding_types=["float"]
).embeddings.float

### STEP 3: Return the most similar documents

# Calculate similarity scores
scores = np.dot(query_emb, np.transpose(doc_emb))[0]

# Sort and filter documents based on scores
top_n = 2
top_doc_idxs = np.argsort(-scores)[:top_n] # retornará os índices dos elementos em ordem decrescente (-scores).

# Display search results
for idx, docs_idx in enumerate(top_doc_idxs):
    print(f"Rank: {idx+1}")
    print(f"Document: {documents[docs_idx]}\n")



Rank: 1
Document: Trabalhar no exterior: É possível trabalhar remotamente de outro país. Basta coordenar com seu gerente e garantir que você esteja disponível.

Rank: 2
Document: Reembolso de despesas de viagem: gerencie facilmente suas despesas de viagem enviando-as por meio de nossa ferramenta financeira. 

