In [None]:
%run RAG.ipynb

In [None]:
import mlflow
import pandas as pd

from mlflow.metrics.genai import faithfulness, relevance, EvaluationExample
from langchain.chains import RetrievalQA
from langchain import hub
from langchain.prompts.chat import ChatPromptTemplate

In [None]:
# Creating a data frame with e-commerce related questions
eval_df = pd.DataFrame(
    {
        "questions": [
            "How can I track my order status on Rakuten?",
            "What is Rakuten's return policy for electronics?",
            "Can I change the shipping address after placing my order?",
            "What payment methods are accepted on Rakuten?",
            "Is it possible to cancel my order after it has been shipped?",
            "How do I apply a promo code to my purchase on Rakuten?",
            "What should I do if I receive a damaged product?",
            "Are there any warranties on the products sold through Rakuten?",
            "How can I contact Rakuten customer service for order issues?",
            "What are Rakuten Points and how do I use them?",
            "Is international shipping available for orders placed on Rakuten?",
            "How do I create a Rakuten account?",
            "What is Rakuten's policy on product exchanges?",
            "How can I leave a review for a product I purchased on Rakuten?",
            "Are there any member-exclusive discounts on Rakuten?",
            "What to do if my payment is declined on Rakuten?",
            "How do I update my payment information on Rakuten?",
            "Can I shop on Rakuten without creating an account?",
            "What is the estimated delivery time for orders within the US?",
            "How does Rakuten handle privacy and data protection?",
        ],
    }
)

In [None]:
# Create good and bad examples for faithfulness in the context of e-commerce questions
faithfulness_examples = [
    EvaluationExample(
        input="What is Rakuten's return policy for electronics?",
        output="You can return electronics within 30 days of purchase, but the item must be unopened.",
        score=2,
        justification="The output provides a partially correct answer but misses important context about the conditions for returns, restocking fees, and potential exceptions for defective items.",
        grading_context={
            "context": "Rakuten's return policy for electronics states that items can be returned within 30 days of purchase. However, to qualify for a return, the item must be unopened and in its original condition. A restocking fee may apply, and returns for defective items are handled differently."
        },
    ),
    EvaluationExample(
        input="What is Rakuten's return policy for electronics?",
        output="Rakuten allows returns on electronics within 30 days of purchase, provided the items are unopened and in their original condition. A restocking fee may apply, and defective items have a separate return process.",
        score=5,
        justification="The output accurately reflects the provided context by detailing the conditions under which electronics can be returned, including the time frame, item condition, potential restocking fees, and the process for defective items.",
        grading_context={
            "context": "Rakuten's return policy for electronics states that items can be returned within 30 days of purchase. However, to qualify for a return, the item must be unopened and in its original condition. A restocking fee may apply, and returns for defective items are handled differently."
        },
    ),
]
faithfulness_metric = faithfulness(model="openai:/gpt-4", examples=faithfulness_examples)

In [None]:
# Create good and bad examples for relevance in the context of e-commerce questions
relevance_examples = [
    EvaluationExample(
        input="How do I apply a promo code to my purchase on Rakuten?",
        output="Promo codes can save you money on your purchases. To use a promo code, you should find the code, make sure it's valid, and then apply it at checkout. Make sure to check the expiration date.",
        score=2,
        justification="The output provides general information about how promo codes work, but it includes irrelevant details about finding and validating the code instead of directly explaining how to apply it on Rakuten.",
        grading_context={
            "context": "On Rakuten, to apply a promo code to your purchase, navigate to the checkout page. There, you will find a field labeled 'Promo Code' or 'Discount Code' where you can enter your code before completing the purchase."
        },
    ),
    EvaluationExample(
        input="How do I apply a promo code to my purchase on Rakuten?",
        output="On Rakuten, navigate to the checkout page and look for a field labeled 'Promo Code' or 'Discount Code'. Enter your code in this field before completing your purchase to apply the discount.",
        score=5,
        justification="The output is highly relevant to the question, providing a clear and concise explanation on exactly how to apply a promo code on Rakuten, directly addressing the user's inquiry without unnecessary information.",
        grading_context={
            "context": "On Rakuten, to apply a promo code to your purchase, navigate to the checkout page. There, you will find a field labeled 'Promo Code' or 'Discount Code' where you can enter your code before completing the purchase."
        },
    ),
]

relevance_metric = relevance(model="openai:/gpt-4", examples=relevance_examples)

In [None]:
# Function that returns the response from the RAG for the evaluation dataset
def model(input_df):
    answer = []
    for index, row in input_df.iterrows():
        system_prompt, urls, contexts = build_system_prompt(row["questions"])            
        messages = [
            SystemMessage(
                content=system_prompt
            ),
            HumanMessage(
                content=row["questions"]
            ),
        ]
        response = qa.predict(input=messages)
       
        answer.append({"result":qa.predict(input=messages),
                      "source_documents":contexts})

    return answer

In [None]:
# Lets run the evaluation for the llm-embedding model combinations

qa = None
df_metrics = pd.DataFrame()

llms = ('OpenAI', 'Anthropic')
# llms = ('OpenAI')

# Iterate through each combination and execute the evaluations
for llm_name in llms:
    run_name = f"{llm_name}_{embedding_model_name}_run"
    print(f'run_name={run_name}')
    # Log parameters
    print(f"model : {llm_name}")
    print(f"embedding : {embedding_model_name}")
    qa = queryAIModel(llm_name, return_chain=True)
    # Run the evaluation
    results = mlflow.evaluate(
    model,
    eval_df,
    model_type="question-answering",
    evaluators="default",
    predictions="result",
    extra_metrics=[faithfulness_metric, relevance_metric, mlflow.metrics.latency()],
    evaluator_config={
        "col_mapping": {
            "inputs": "questions",
            "context": "source_documents",
            }
        },
    )
    metrics_series = pd.Series(results.metrics, name=f'{llm_name}_{embedding_model_name}')
    metrics_df = pd.DataFrame([metrics_series])
    df_metrics = pd.concat([df_metrics, metrics_df], ignore_index=True)
    
df_metrics = df_metrics.T
df_metrics.columns = llms

In [None]:
df_metrics

In [None]:
# Lets now log these metrics in Domino
# Define the experiment name
experiment_name = 'RAG eval'
mlflow.set_experiment(experiment_name)
for column in df_metrics:
    with mlflow.start_run(run_name=column):
        for metric_name, metric_value in df_metrics[column].items():
            # Log the metric
            mlflow.log_metric(metric_name, metric_value)

In [None]:
# Looks good lets push the prompt to a prompt hub
# set LANGCHAIN_HUB_API_KEY in an env variable
# hub.push("subirmansukhani/rakuten-qa-rag", ChatPromptTemplate.from_template(template), new_repo_is_public=False)

In [None]:
# Lets take a look at the prompt hub

# from IPython.display import Javascript, display

# # Define the URL you want to open
# url = 'https://smith.langchain.com/hub/my-prompts?organizationId=6ac11f6f-c332-4bac-b45b-28a8a96410b4'

# # JavaScript code to open a new tab with the specified URL and display it in the cell's output area
# js_code = f'''
# var newWindow = window.open("{url}");
# element.append(newWindow.document.body);
# '''

# # Display the JavaScript output in the cell's output area
# display(Javascript(js_code))