## Simple RAG playground

In [83]:
import instructor
from openai import AsyncOpenAI
import os
from pydantic import BaseModel
from dataclasses import dataclass

In [84]:
os.environ["OPENAI_API_KEY"] = "your-api-key"
PROJECT_ID = "1b35d9bf94ff801792bfd1824fac0c96"
NOTION_TOKEN = 'your-notion-token'

In [85]:
openai_client = instructor.from_openai(AsyncOpenAI())

## Simple RAG app

In [6]:
! git clone https://huggingface.co/datasets/explodinggradients/ragas-airline-dataset

Cloning into 'ragas-airline-dataset'...
remote: Enumerating objects: 14, done.[K
remote: Total 14 (delta 0), reused 0 (delta 0), pack-reused 14 (from 1)[K
Unpacking objects: 100% (14/14), 16.16 KiB | 719.00 KiB/s, done.


In [86]:
class Ragoutput(BaseModel):
    response:str



@dataclass
class Myllmapp:
    prompt: str = "Your are expert document chatbot. Answer from following documents"

    def _read_markdown_file(self, file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content

    def __post_init__(self):
        files = [os.path.join("ragas-airline-dataset",file) for file in os.listdir("ragas-airline-dataset") if file.endswith(".md")]
        self.content = [f"{self._read_markdown_file(file)}---\n\n--{file}" for file in files]
        self.content = '\n\n'.join(self.content)
        self.client = openai_client


    async def ask(self, query:str):
        response = await self.client.chat.completions.create(
                        model="gpt-4o-mini",
                        response_model=Ragoutput,
                        messages=[
                            {"role": "system", "content": self.prompt},
                            {"role": "user", "content": self.content},
                            {"role": "user", "content": query}
                        ],
                    )

        return response.response
        
    

    

In [87]:
my_rag = Myllmapp()

In [89]:
await my_rag.ask("Can i get a refund for my missed flight?")

'If you miss your flight, whether you can receive a refund depends on the fare conditions of your ticket:\n\n1. **Refundable Tickets (Flexible Fares)**:\n   - If you have a refundable ticket, you can cancel your flight and request a full refund, even if you missed the flight.\n\n2. **Non-Refundable Tickets (Standard Fares)**:\n   - Typically, non-refundable tickets do not allow for refunds if you miss your flight. However, you may be eligible for a partial refund or travel credit, but cancellation fees may apply.\n\n3. **Basic Economy & Promotional Fares**:\n   - These tickets usually do not allow any refunds or modifications if you miss your flight.\n\nTo proceed with a refund request:\n- Check your ticket conditions via "Manage My Booking." \n- If eligible, submit a refund request through the same platform.'

## Setup sdk

In [90]:
from ragas_annotator.project.core import Project

In [91]:
project = Project(
    name="Customer support RAG", 
    notion_api_key=NOTION_TOKEN, 
    notion_root_page_id=PROJECT_ID,
)
project

Project(name='Customer support RAG', root_page_id=1b35d9bf94ff801792bfd1824fac0c96)

## Read dataset

In [92]:
from ragas_annotator.model.notion_model import NotionModel
from ragas_annotator import nmt

class Dataset(NotionModel):
    id: str = nmt.ID()
    query: str = nmt.Title()
    expected_answer: str = nmt.Text()

In [94]:
dataset = project.get_dataset(
    name="RAG Dataset",
    model=Dataset,
)
dataset.load()

In [95]:
len(dataset)

15

In [96]:
dataset[0]

Dataset(id=15 query='Are there any special policies for traveling with infants?' expected_answer='Infants under 2 years can travel on an adult’s lap for free or with a discounted seat. Free checked baggage allowance may apply depending on the airline.')

## LLM as judge

In [107]:
import typing as t
@ragas.metrics
async def judge(x,y):
    class Judge(BaseModel):
        verdict: t.Literal["pass","fail"]
        
    response = await openai_client.chat.completions.create(
                            model="gpt-4o-mini",
                            response_model=Judge,
                            messages=[
                                {"role": "system", "content": "check if response and expected answer are the same."},
                                {"role": "user", "content": f"response:{x}, expected {y}"},
                            ],
                        )  
    return response.verdict

    

In [112]:
await judge("aftab is designer","aftab is UX designer")

'fail'

In [None]:
judge.train("fourth-experiment")
judge.save('my-name')

In [119]:
import typing as t
async def tone_judge(x,y):
    class Judge(BaseModel):
        verdict: int
        
    response = await openai_client.chat.completions.create(
                            model="gpt-4o-mini",
                            response_model=Judge,
                            messages=[
                                {"role": "system", "content": "check if the tone is arrogant. and give score from 0 to 10 depending on the arrogance level."},
                                {"role": "user", "content": f"response:{x}, expected {y}"},
                            ],
                        )  
    return response.verdict

    

## Exp - 1

In [122]:
class Experiment(Dataset):
    response: str = nmt.Text()
    # correctness: str = nmt.Select(options=["pass","fail"])
    tone_score: str = nmt.Text()

In [123]:
@project.langfuse_experiment(Experiment, name_prefix="Workshop")
async def run_experiment(row: SupermeDataset):
    response = await my_rag.ask(row.query)
    tone_score = await judge(response, row.expected_answer)
    experiment_view = Experiment(
        id=row.id,
        query=row.query,
        expected_answer=row.expected_answer,
        response=response,
    )
    return experiment_view

In [124]:
my_rag.prompt = "You are a specialized document chatbot. Provide answers based on the following documents."

In [126]:
await run_experiment.run_async(
    name="fifth-experiment",
    dataset=dataset
)

100%|███████████████████████████████████████████████████| 15/15 [00:06<00:00,  2.39it/s]


Experiment(name=fifth-experiment, model=Experiment)

## Exp - 1

In [110]:
my_rag.prompt = "You are a specialized document chatbot, answer shortly like a customer support guy."

In [111]:
await run_experiment.run_async(
    name="third-experiment",
    dataset=dataset
)

100%|███████████████████████████████████████████████████| 15/15 [00:05<00:00,  2.84it/s]


Experiment(name=third-experiment, model=Experiment)

## Compare

In [81]:
exp_one = project.get_experiment('first-experiment',Experiment)
exp_two =  project.get_experiment('second-experiment',Experiment)

In [82]:
project.compare_experiments(exp_one,exp_two)

Experiments have different models: <class '__main__.Experiment'> and <class '__main__.Experiment'>
Experiments have different models: <class '__main__.Experiment'> and <class '__main__.Experiment'>
Uploading to Notion: 100%|██████████████████████████████| 15/15 [00:43<00:00,  2.88s/it]


'https://www.notion.so/1ba5d9bf94ff81e2ab94c2586a1ed315'