In [2]:
import json
import textwrap
import random
import pandas as pd
import random


from openevals.llm import create_llm_as_judge
from openevals.prompts import CORRECTNESS_PROMPT

In [3]:
PROMPT = "provide a one paragraph summary of artificial intelligence"

REPSPONSE_TEXT = textwrap.dedent(
    """
    Artificial Intelligence (AI) is a branch of computer science focused on creating machines that can perform tasks requiring human-like intelligence, \nsuch as learning, reasoning, problem-solving, perception, and language understanding. It encompasses a wide range of techniques, including machine \nlearning, neural networks, and deep learning, which enable systems to recognize patterns, make decisions, and improve over time. AI is applied across \nvarious fields, from healthcare and finance to robotics and entertainment, driving innovation and efficiency. While AI presents significant \nopportunities, it also raises ethical concerns related to bias, privacy, and job displacement, necessitating careful regulation and responsible \ndevelopment.
    """
)

REFERENCE_TEXT = textwrap.dedent(
    """
    Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks typically requiring human intelligence, such as \nlearning, reasoning, problem-solving, perception, and natural language understanding. AI is powered by techniques like machine learning, deep \nlearning, and neural networks, enabling it to analyze vast amounts of data, recognize patterns, and make autonomous decisions. While AI has \ntransformative applications in fields like healthcare, finance, and automation, it also raises major considerations. Ethical concerns include bias in \nalgorithms, data privacy, and the potential for job displacement. Additionally, ensuring AI systems are transparent, explainable, and aligned with \nhuman values is crucial for responsible development and deployment.
    """
)

## Helper Functions

In [4]:
# function to read in a paragraph and scrambe the words in each sentence
def scramble_paragraph(paragraph):
    sentences = paragraph.split(".")
    scrambled_sentences = []
    for sentence in sentences:
        words = sentence.split(" ")
        random.shuffle(words)
        scrambled_sentence = " ".join(words)
        scrambled_sentences.append(scrambled_sentence)
    scrambled_paragraph = ". ".join(scrambled_sentences)
    return scrambled_paragraph


# function to print a paragraph with out line breaks
# into lines of about 80 characters make sure to break at a white space
def print_paragraph(paragraph):
    lines = textwrap.wrap(paragraph, width=100)
    for line in lines:
        print(line)

# function that takes in a evaluator objecct and paragraph and
# runs the evaluator on the paragraph and prints the results
# then runs the evaluaton on the scambled paragraph and prints the results
def evaluate_paragraph(evaluator, prompt, paragraph, reference=None):
    print(f"prompt: {prompt}")
    print("Evaluating paragraph:")
    print_paragraph(paragraph)
    print()

    if reference:
        print("Evaluating with reference:")
        print_paragraph(reference)
        print()
        results = evaluator(
            inputs=prompt, 
            outputs=paragraph, 
            reference_outputs=reference
        )
    else:
        print("Evaluating without reference")
        print()
        results = evaluator(
            inputs=prompt, 
            outputs=paragraph
        )
        
    print(f"KEY: {results['key']}")
    print(f"SCORE: {results['score']}")
    print(f"FEEDBACK:")
    print_paragraph(results['comment'])
    


In [5]:
print(REPSPONSE_TEXT)


    Artificial Intelligence (AI) is a branch of computer science focused on creating machines that can perform tasks requiring human-like intelligence, 
such as learning, reasoning, problem-solving, perception, and language understanding. It encompasses a wide range of techniques, including machine 
learning, neural networks, and deep learning, which enable systems to recognize patterns, make decisions, and improve over time. AI is applied across 
various fields, from healthcare and finance to robotics and entertainment, driving innovation and efficiency. While AI presents significant 
opportunities, it also raises ethical concerns related to bias, privacy, and job displacement, necessitating careful regulation and responsible 
development.



In [6]:
print(scramble_paragraph(REPSPONSE_TEXT))

branch  perception, 
such computer  (AI) as is Intelligence understanding learning, creating perform science intelligence, and human-like tasks that requiring machines on reasoning,  a 
 Artificial focused of language problem-solving, can. improve and to over encompasses deep patterns, a machine enable range learning, and decisions, It neural which wide including systems techniques, recognize of make 
learning,  time networks,. AI innovation from fields, robotics across is and and 
various finance applied  efficiency and to entertainment, healthcare driving. 
development also necessitating related and While privacy, job 
opportunities, it raises responsible careful concerns  and to presents AI ethical significant regulation bias, displacement,. 



## RESPONSE CORRECTNESS

In [7]:
judge = correctness_evaluator = create_llm_as_judge(
    prompt=CORRECTNESS_PROMPT,
    feedback_key="correctness",
    # model="openai:o3-mini",
    model="openai:gpt-4o",
)
evaluate_paragraph(judge, PROMPT, REPSPONSE_TEXT, reference=REFERENCE_TEXT)
print("\n--------------------------------------------------\n")
evaluate_paragraph(judge, PROMPT, scramble_paragraph(REPSPONSE_TEXT), reference=REFERENCE_TEXT)


prompt: provide a one paragraph summary of artificial intelligence
Evaluating paragraph:
     Artificial Intelligence (AI) is a branch of computer science focused on creating machines that
can perform tasks requiring human-like intelligence,  such as learning, reasoning, problem-solving,
perception, and language understanding. It encompasses a wide range of techniques, including machine
learning, neural networks, and deep learning, which enable systems to recognize patterns, make
decisions, and improve over time. AI is applied across  various fields, from healthcare and finance
to robotics and entertainment, driving innovation and efficiency. While AI presents significant
opportunities, it also raises ethical concerns related to bias, privacy, and job displacement,
necessitating careful regulation and responsible  development.

Evaluating with reference:
     Artificial Intelligence (AI) refers to the development of computer systems that can perform
tasks typically requiring human inte

In [8]:
judge = correctness_evaluator = create_llm_as_judge(
    prompt=CORRECTNESS_PROMPT,
    feedback_key="correctness",
    model="ollama:mistral-nemo"
)
evaluate_paragraph(judge, PROMPT, REPSPONSE_TEXT, reference=REFERENCE_TEXT)
print("\n--------------------------------------------------\n")
evaluate_paragraph(judge, PROMPT, scramble_paragraph(REPSPONSE_TEXT), reference=REFERENCE_TEXT)

prompt: provide a one paragraph summary of artificial intelligence
Evaluating paragraph:
     Artificial Intelligence (AI) is a branch of computer science focused on creating machines that
can perform tasks requiring human-like intelligence,  such as learning, reasoning, problem-solving,
perception, and language understanding. It encompasses a wide range of techniques, including machine
learning, neural networks, and deep learning, which enable systems to recognize patterns, make
decisions, and improve over time. AI is applied across  various fields, from healthcare and finance
to robotics and entertainment, driving innovation and efficiency. While AI presents significant
opportunities, it also raises ethical concerns related to bias, privacy, and job displacement,
necessitating careful regulation and responsible  development.

Evaluating with reference:
     Artificial Intelligence (AI) refers to the development of computer systems that can perform
tasks typically requiring human inte