## Evaluation Cases
- RAGAS: https://www.youtube.com/watch?v=Anr1br0lLz8

In [1]:
import os
import openai
from dotenv import load_dotenv

load_dotenv('.env')
openai.api_base = os.getenv('OPENAI_ENDPOINT')
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_version = "2023-09-15-preview"
llm_model = 'gpt-35-turbo-jdrios'
emb_model = 'text-embedding-ada-002-jdrios'

In [2]:
# Documents
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader([
    "https://www.leal.co/usuarios",
    "https://www.leal.co/puntos",
    "https://www.leal.co/leal-coins"
])
documents = loader.load()

In [3]:
# Embeddings (with OpenAI)
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    azure_deployment=emb_model,
    azure_endpoint=os.getenv('OPENAI_ENDPOINT'),
)

In [5]:
# Embeddings (with HuggingFace)
from langchain.embeddings import HuggingFaceHubEmbeddings

# emb_model = "sentence-transformers/all-mpnet-base-v2"
emb_model = "BAAI/bge-small-en-v1.5"
embeddings = HuggingFaceHubEmbeddings(repo_id=emb_model,
                                        huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'))

In [4]:
# Creating Retriever
from langchain_pinecone import Pinecone
index_name = 'thesis-model-1'

vector_store = Pinecone.from_existing_index(index_name, embeddings)
retriever = vector_store.as_retriever()

In [6]:
# Creatin Prompt
from langchain.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'Redirigir...':
Context:
{context}
Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [6]:
from operator import itemgetter

from langchain_openai import AzureChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

primary_qa_llm = AzureChatOpenAI(model_name=llm_model, temperature=0, api_version="2023-09-15-preview", azure_endpoint=os.getenv('OPENAI_ENDPOINT'))

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)

In [28]:
question = "Que son los Leal coins"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)

Tener Leal Coins es como tener efectivo, con ellos puedes adquirir lo que quieras en cualquiera de las marcas aliadas de Leal.


# HF Model

In [21]:
import nest_asyncio
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceInferenceAPI

from llama_index.embeddings import HuggingFaceInferenceAPIEmbedding
import pandas as pd

nest_asyncio.apply()


def build_query_engine(llm):
    vector_index = VectorStoreIndex.from_documents(
        documents, service_context=ServiceContext.from_defaults(chunk_size=512, llm=llm),
        embed_model=HuggingFaceInferenceAPIEmbedding,
    )

    query_engine = vector_index.as_query_engine(similarity_top_k=2)
    return query_engine

# Function to evaluate as Llama index does not support async evaluation for HFInference API
def generate_responses(query_engine, test_questions, test_answers):
  responses = [query_engine.query(q) for q in test_questions]

  answers = []
  contexts = []
  for r in responses:
    answers.append(r.response)
    contexts.append([c.node.get_content() for c in r.source_nodes])
  dataset_dict = {
        "question": test_questions,
        "answer": answers,
        "contexts": contexts,
  }
  if test_answers is not None:
    dataset_dict["ground_truth"] = test_answers
  ds = Dataset.from_dict(dataset_dict)
  return ds

ModuleNotFoundError: No module named 'llama_index.llms.huggingface'

In [13]:
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

primary_qa_llm=HuggingFaceHub(repo_id='HuggingFaceH4/zephyr-7b-beta', # 'tiiuae/falcon-7b-instruct',
                               model_kwargs={"temperature": 0.1, "max_new_tokens": 100})

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": prompt | primary_qa_llm, "context": itemgetter("context")}
)
question = "Hola, soy Julian"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"])

Human: Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'Redirigir...':
Context:
[Document(page_content='Los Leal Coins, son como el efectivo', metadata={'description': 'Encuentra en la app Leal una forma fácil de disfrutar lo que más te gusta.', 'language': 'No language found.', 'source': 'https://www.leal.co/leal-coins', 'title': 'Los Leal Coins, son como el efectivo'}), Document(page_content='Usuarios Leal, puntos y Leal Coins', metadata={'description': 'Los puntos puedes redimirlos por premios exclusivos en la marca en la que acumulas, los Leal Coins son universales y puedes usarlos en miles de marcas físicas u online que tenemos para ti.', 'language': 'No language found.', 'source': 'https://www.leal.co/usuarios', 'title': 'Usuarios Leal, puntos y Leal Coins'}), Document(page_content='Tus puntos se convierten en premios', metadata={'description': 'Tenemos cientos de comercios donde ganas premios por ha

In [None]:
question = "Que son los Leal coins"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)

## Evaluating process

In [14]:
# Create Test Data
from langchain.document_loaders import DirectoryLoader
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.from_langchain(generator_llm=primary_qa_llm,critic_llm=primary_qa_llm,embeddings=embeddings)
testset = generator.generate_with_langchain_docs(documents, test_size=30, 
                                                 raise_exceptions=False, with_debugging_logs=False,
                                                 distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})   

embedding nodes:   0%|          | 0/6 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


ValueError: a cannot be empty unless no samples are taken

In [None]:
test_df = testset.to_pandas()
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What are Leal Coins and how can they be used?,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",Leal Coins are a universal currency that can b...,simple,"[{'source': 'https://www.leal.co/usuarios', 't...",True
1,What kind of rewards can be redeemed with Leal...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",The answer to given question is present in con...,simple,"[{'source': 'https://www.leal.co/usuarios', 't...",True
2,What is Leal and how does it help users earn r...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",Leal is an app that allows users to earn rewar...,simple,"[{'source': 'https://www.leal.co/usuarios', 't...",True
3,What is Leal and how does it help users earn r...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",Leal is an app that allows users to earn rewar...,simple,"[{'source': 'https://www.leal.co/usuarios', 't...",True
4,What are Leal Coins and how can they be used?,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",Leal Coins are a universal currency that can b...,simple,"[{'source': 'https://www.leal.co/usuarios', 't...",True
5,What are the rewards and differences between L...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",The points earned through purchases on Leal ca...,reasoning,"[{'source': 'https://www.leal.co/usuarios', 't...",True
6,What are the rewards and differences between L...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",The points earned through purchases on Leal ca...,reasoning,"[{'source': 'https://www.leal.co/usuarios', 't...",True
7,What are the uses and ways to earn Leal Coins?,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",Leal Coins can be earned through various ways ...,multi_context,"[{'source': 'https://www.leal.co/usuarios', 't...",True
8,What is the process for earning rewards with L...,"[Usuarios Leal, puntos y Leal Coins\n\n\n\n\nI...",,multi_context,"[{'source': 'https://www.leal.co/usuarios', 't...",True


In [87]:
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()

In [88]:
# Add answers to the dataframe
answers = []
contexts = []

for question in test_questions:
  response = retrieval_augmented_qa_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [89]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [90]:
# Import needed variables
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [91]:
# 3. Select Embeddings Model
results = evaluate(response_dataset, metrics, llm=primary_qa_llm,embeddings=embeddings)

Evaluating:   0%|          | 0/45 [00:00<?, ?it/s]

In [92]:
results

{'faithfulness': 0.8889, 'answer_relevancy': 0.8628, 'context_recall': 0.7778, 'context_precision': 0.7593, 'answer_correctness': 0.5882}

In [26]:
results_df = results.to_pandas()
results_df

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,What can you do with the points you accumulate...,You can redeem the points for exclusive prizes...,[crédito Leal y gana cashback hasta de 5% con ...,You can redeem the points you accumulate in th...,1.0,0.865079,1.0,1.0,0.731761
1,What can you use Leal Coins for in the Leal app?,You can use Leal Coins in thousands of physica...,[crédito Leal y gana cashback hasta de 5% con ...,Leal Coins can be used to acquire bonuses from...,1.0,0.954475,1.0,1.0,0.899607
2,What can you do with the points you accumulate...,You can redeem the points for exclusive prizes...,[crédito Leal y gana cashback hasta de 5% con ...,You can redeem the points you accumulate in th...,1.0,0.865079,1.0,1.0,0.731761
3,What can you use Leal Coins for in the Leal app?,You can use Leal Coins in thousands of physica...,[crédito Leal y gana cashback hasta de 5% con ...,Leal Coins can be used to acquire bonuses from...,1.0,0.954475,1.0,1.0,0.899607
4,What can you use Leal Coins for in the Leal app?,You can use Leal Coins in thousands of physica...,[crédito Leal y gana cashback hasta de 5% con ...,Leal Coins can be used to acquire bonuses from...,1.0,0.954475,1.0,1.0,0.899607
5,What distinguishes points from Leal Coins in t...,Points can be redeemed for exclusive rewards i...,[tu app Leal y entérate cuantos puntos te falt...,Points can be redeemed for exclusive rewards i...,0.666667,0.926413,1.0,1.0,0.581555
6,What distinguishes points from Leal Coins in t...,Points can be redeemed for exclusive rewards i...,[tu app Leal y entérate cuantos puntos te falt...,Points can be redeemed for exclusive rewards i...,0.666667,0.926413,1.0,1.0,0.581555
7,What's the difference between Puntos and Leal ...,The points can be redeemed for exclusive rewar...,[tu app Leal y entérate cuantos puntos te falt...,Puntos can be redeemed for exclusive rewards f...,1.0,0.0,1.0,0.805556,0.575817
8,What's the difference between Puntos and Leal ...,The points can be redeemed for exclusive rewar...,[tu app Leal y entérate cuantos puntos te falt...,Puntos can be redeemed for exclusive rewards f...,1.0,0.0,0.666667,0.805556,0.575817
9,What can you use Leal Coins for in the Leal app?,You can use Leal Coins in thousands of physica...,[crédito Leal y gana cashback hasta de 5% con ...,Leal Coins can be used to acquire bonuses from...,1.0,0.954475,1.0,1.0,0.899607


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluating: 100%|██████████| 50/50 [04:40<00:00,  5.61s/it]


## Better implementation

In [39]:
from langchain import hub
retrieval_qa_prompt = hub.pull("rlm/rag-prompt", api_url="https://api.hub.langchain.com")

In [33]:
print(retrieval_qa_prompt.messages[0].prompt.template)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:


In [41]:
# Multi Query Retriever
from langchain.retrievers import MultiQueryRetriever
advanced_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=primary_qa_llm)

# Document 
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(primary_qa_llm, prompt)

# Retrieval Chain
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(advanced_retriever, document_chain)

In [44]:
response = retrieval_chain.invoke({"input": "Que son los Leal Coins"})

KeyError: "Input to ChatPromptTemplate is missing variables {'question'}.  Expected: ['context', 'question'] Received: ['input', 'context']"

In [None]:
response

In [44]:
import random
def chatbot_response(user_query, chatbot_name="Le A.I", company_name="Leal"):
  """
  This function simulates a friendly customer service chatbot conversation.

  Args:
      user_query: The user's question or input.
      chatbot_name: The name of the chatbot
      company_name: The name of your company

  Returns:
      A string representing the chatbot's response.
  """
  # Define conversation flow
  prompts = [
      {
          "START_SEQ": True,
          "USER_QUERY": None,
          "RESPONSE A": f"Hi there! I'm {chatbot_name}, your friendly customer service assistant for {company_name}. How can I help you today?",
          "RESPONSE B": f"Great to see you! Is there anything I can assist you with on this day?"
      },
      {
          "USER_QUERY": None,
          "RESPONSE A": f"I understand you're having trouble with {user_query}. Let's see what we can do to fix that.",
          "RESPONSE B": f"It sounds like you're looking for information about {user_query}. I'm happy to help you find what you need."
      },
      {
          "USER_QUERY": None,
          "RESPONSE A": "Here are a few things you can try: [List your solutions here]. Let me know if any of these work!",
          "RESPONSE B": f"I can definitely walk you through the steps for {user_query}. Would you like me to do that?",
          "RESPONSE C": f"No problem! I've found some helpful resources on {user_query} that you might find useful: [List your resources here]."
      },
      {
          "USER_QUERY": None,
          "RESPONSE A": "No worries at all! We'll get this figured out together.",
          "RESPONSE B": "That's a great question! Let me see if I can find an answer for you.",
          "RESPONSE C": f"I apologize for any inconvenience this may have caused. Is there anything else I can do to assist you today?"
      },
      {
          "USER_QUERY": None,
          "RESPONSE A": "If the issue seems complex, you can offer to connect the user to a human agent.",
          "RESPONSE B": f"It seems like your situation might require a bit more personalized attention. Would you like me to connect you with one of our customer service representatives?"
      },
      {
          "USER_QUERY": None,
          "RESPONSE A": "I hope this information was helpful! Is there anything else I can help you with today?",
          "RESPONSE B": "Glad I could be of assistance! Have a wonderful {day.name}!"
      },
      {
          "END_SEQ": True,
          "USER_QUERY": None,
          "RESPONSE A": None,
          "RESPONSE B": None
      }
  ]

  # Loop through conversation prompts
  current_prompt = 0
  while current_prompt < len(prompts):
    prompt = prompts[current_prompt]
    if prompt.get("USER_QUERY") is None or prompt.get("USER_QUERY") == user_query:
      response = random.choice([prompt.get(f"RESPONSE {char}") for char in "ABC" if prompt.get(f"RESPONSE {char}")==""])
      if response:
        print(response)
      if prompt.get("END_SEQ"):
        break
    current_prompt += 1

# Example usage
chatbot_response("I'm having trouble logging in.")

IndexError: Cannot choose from an empty sequence

In [12]:
from pysentimiento import create_analyzer
analyzer = create_analyzer(task="sentiment", lang="es")
analyzer.predict("Qué gran jugador es Messi")


config.json:   0%|          | 0.00/925 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/435M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


AnalyzerOutput(output=POS, probas={POS: 0.946, NEU: 0.037, NEG: 0.017})

In [25]:
sentiment, probas = analyzer.predict("Qué gran jugador es Messi")


TypeError: cannot unpack non-iterable AnalyzerOutput object

In [28]:
sentiment.output

'POS'