# Notebook
- create dataset
- create llm as judge
- run experiments

### Create your first project and upload test dataset 

In [1]:
from json import load

with open('yann-lecun-wisdom/yann_test.json', 'r') as f:
    data = load(f)

In [10]:
from ragas_experimental import BaseModel

class TestDataset(BaseModel):
    question: str
    citations: list[str]
    grading_notes: str

In [None]:
import os
# go to dev.app.ragas.io to create an app token
RAGAS_APP_TOKEN = "your-app-token"
RAGAS_API_BASE_URL = "https://api.dev.app.ragas.io"

os.environ["RAGAS_APP_TOKEN"] = RAGAS_APP_TOKEN
os.environ["RAGAS_API_BASE_URL"] = RAGAS_API_BASE_URL
os.environ["OPENAI_API_KEY"] = "your-openai-key"

In [None]:
! pip install git+https://github.com/explodinggradients/ragas_annotator.git@ragas-experimental

In [2]:
from ragas_experimental import Project



In [None]:
p = Project.create(
    name="yann-lecun-wisdom",
    description="Yann LeCun Wisdom",
)


In [8]:
p = Project.get(name="yann-lecun-wisdom")
p

Project(name='yann-lecun-wisdom')

In [None]:
test_dataset = p.create_dataset(name="test-yann-lecun", model=TestDataset)
for item in data:
    t = TestDataset(question=item["question"], citations=item["citations"], grading_notes=item["grading_notes"])
    test_dataset.append(t)
test_dataset

Dataset(name=test-yann-lecun, model=TestDataset, len=0)

In [14]:
test_dataset = p.get_dataset(dataset_name="test-yann-lecun", model=TestDataset)


In [15]:
test_dataset.load()

### Create LLM as judge

In [16]:
# de
from ragas_experimental.llm import ragas_llm
from ragas_experimental.metric import DiscreteMetric
from openai import AsyncOpenAI

llm = ragas_llm(provider="openai",model="gpt-4o",client=AsyncOpenAI())

my_metric = DiscreteMetric(
    llm=llm,
    name='correctness',
    prompt="Given the Question: {query} \n Evaluate if given answer {response} \n based on the Grading notes\n: {grading_notes}.",
    values=["pass","fail"],
)



# test LLM as judge
result = my_metric.score(query="what is your response", response="this is my response",grading_notes="- response should not contains word response")
result

'fail'

In [17]:
from linkedin_ai import LinkedinAI

In [18]:
my_ai = await LinkedinAI.from_bm25('yann-lecun-wisdom/yann-lecun_posts.json')

Loaded 437 LinkedIn posts
BM25 index initialized


In [19]:
await my_ai.ask("what is your response")



"My response is centered around the importance of open access and open-source models in AI development. I believe that AI assistants, which will mediate all our interactions with the digital world, should be open and open-source, similar to the software infrastructure of the Internet. This openness ensures that these AI systems, which will contain all human culture and knowledge, are accessible and can be trusted by everyone. This is why Meta made Llama-2 open and free. \n\nIn a discussion at the Paris Peace Forum, I highlighted this vision, and it was met with a curious response from Microsoft President Brad Smith, who seemed to misunderstand the concept of open access/open source. It's crucial to move beyond outdated anti-open source stances and recognize the value of open models for economic, national security, and foreign policy interests, as emphasized in Meta's response to the NTIA. \n\nFurthermore, in the realm of science and technology discussions, it's essential to maintain ob

### Run experiments

In [20]:
class ExperimentModel(TestDataset):
    response: str
    score: str
    score_reason: str

@p.experiment(ExperimentModel)
async def experiment_func(item: TestDataset):
    response = await my_ai.ask(item.question)
    score = await my_metric.ascore(query=item.question, response=response, grading_notes=item.grading_notes)
    return ExperimentModel(question=item.question, citations=item.citations, grading_notes=item.grading_notes, response=response, score=score.result, score_reason=score.reason)

In [21]:
await experiment_func.run_async(test_dataset)

Running experiment: 100%|██████████| 60/60 [00:22<00:00,  2.67it/s]


Experiment(name=nostalgic_naur, model=ExperimentModel)

### Compare and plot experiments

In [None]:
# p.compare_and_plot(experiment_names=[], model=ExperimentModel, metric_names=[])