## Imports

In [15]:
import marvin
from typing import List, Dict, Tuple
from itertools import groupby
import ast
import pandas as pd
marvin.settings.llm_model='openai/gpt-4'

## Functions

In [34]:
@marvin.fn
def replace_pronouns(text: str) -> str:
    """Replaces pronouns in the text with the subject they refer to."""

@marvin.fn
def parse_into_statements(text: str) -> list[str]:
    """Parse the `text` into a list of strings containing each fact as a separate sentence."""

@marvin.fn
def classify_answer_correctness(context_list: list[str], answer_list: list[str]) -> list[list[str]]:
    """
    Returns a list with the following list of strings:
    Grounded Statements: List of statements from the `answer_list`, the content of which is supported by or contained within the `context_list`.
    Ungrounded Statements: List of ungrounded statements from the `answer_list', the content of which is not supported by or contained within the `context_list`.
    Missing Statements:: List of statements from the `context_list`, the content of which is not supported by or contained within the `answer_list`.
    """

def calculate_groundedness_thoroughness(statement_list: List[str]) -> tuple[float, float]:
    """Calculates groundedness and thoroughness from a list of statements."""
    grounded = len(statement_list[0])
    ungrounded = len(statement_list[1])
    missing = len(statement_list[2])

    groundedness = grounded / (grounded + ungrounded) if grounded + ungrounded > 0 else 0
    thoroughness = grounded / (grounded + missing) if grounded + missing > 0 else 0

    return groundedness * 100, thoroughness * 100

def process_data(data_list):
    results = []

    for data in data_list:
        context = data['context']
        answer = data['answer']

        context_replace_pronouns = replace_pronouns(context)
        answer_replace_pronouns = replace_pronouns(answer)

        context_list = parse_into_statements(context_replace_pronouns)
        answer_list = parse_into_statements(answer_replace_pronouns)

        classifications = classify_answer_correctness(context_list, answer_list)

        groundedness, thoroughness = calculate_groundedness_thoroughness(classifications)

        result_data = {
            'context': context,
            'answer': answer,
            'context_replace_pronouns': context_replace_pronouns,
            'answer_replace_pronouns': answer_replace_pronouns,
            'context_list': context_list,
            'answer_list': answer_list,
            'classifications': classifications,
            'groundedness': groundedness,
            'thoroughness': thoroughness
        }

        results.append(result_data)

    df = pd.DataFrame(results)
    return df

## Run on one pair of statements

In [35]:
context = "The quick brown fox jumps over the rock because he's happy. He was born in 2005. The hedgehog was born in 2010, but she's even happier than him."
answer = "The quick brown fox was born in 2005, and the hedgehog in 2010. The quick brown fox is not as happy as the hedgehog"

context_replace_pronouns = replace_pronouns(context)
answer_replace_pronouns = replace_pronouns(answer)

context_list = parse_into_statements(context_replace_pronouns)
answer_list = parse_into_statements(answer_replace_pronouns)

classifications = classify_answer_correctness(context_list, answer_list)

groundedness, thoroughness = calculate_groundedness_thoroughness(classifications)

print("Groundedness: {:.2f}%".format(groundedness))
print("Thoroughness: {:.2f}%".format(thoroughness))


Groundedness: 66.67%
Thoroughness: 50.00%


In [36]:
context = "To boil pasta, first bring a large pot of salted water to a rolling boil over high heat. The general rule is about 4 to 6 quarts of water and 1 to 2 tablespoons of salt per pound of pasta. Add the pasta to the boiling water and stir occasionally to prevent sticking. Cook the pasta according to the package instructions for the desired doneness, usually between 8 to 12 minutes depending on the type and shape of the pasta. Taste a piece to ensure it's cooked to your preference. Once done, drain the pasta in a colander and, if desired, rinse under hot water to remove excess starch. Serve immediately with your choice of sauce or toppings."
answer = "To boil pasta, begin by filling a large pot with water, making sure there's enough to fully submerge the pasta. Bring the water to a rolling boil over high heat, then add salt to enhance the pasta's flavor. Once the water is boiling, carefully add the pasta, stirring gently to prevent sticking. Cook the pasta according to the package instructions or until it reaches your desired level of tenderness, usually around 8-12 minutes. To check for doneness, taste a piece of pasta—it should be tender but still slightly firm (al dente). Once cooked, drain the pasta in a colander and rinse it briefly with hot water to remove excess starch. Now your pasta is ready to be served with your favorite sauce or toppings."

context_replace_pronouns = replace_pronouns(context)
answer_replace_pronouns = replace_pronouns(answer)

context_list = parse_into_statements(context_replace_pronouns)
answer_list = parse_into_statements(answer_replace_pronouns)

classifications = classify_answer_correctness(context_list, answer_list)

groundedness, thoroughness = calculate_groundedness_thoroughness(classifications)

print("Groundedness: {:.2f}%".format(groundedness))
print("Thoroughness: {:.2f}%".format(thoroughness))
print(classifications)


Groundedness: 100.00%
Thoroughness: 81.82%
[['Fill a large pot with water, ensuring there is enough to fully submerge the pasta.', 'Bring the water to a rolling boil over high heat.', "Add salt to enhance the pasta's flavor.", 'Once the water is boiling, carefully add the pasta, stirring gently to prevent sticking.', 'Cook the pasta according to the package instructions or until it reaches your desired level of tenderness, usually around 8-12 minutes.', 'To check for doneness, taste a piece of pasta—it should be tender but still slightly firm (al dente).', 'Once cooked, drain the pasta in a colander.', 'Rinse the pasta briefly with hot water to remove excess starch.', 'Now your pasta is ready to be served with your favorite sauce or toppings.'], [], ['The general rule is about 4 to 6 quarts of water and 1 to 2 tablespoons of salt per pound of pasta.', 'Serve immediately with your choice of sauce or toppings.']]


## Run on a list of dictionaries - return DF

In [28]:
data_list = [
    {
        'context': 'The quick brown fox jumps over the rock because he\'s happy. He was born in 2005. The hedgehog was born in 2010, but she\'s even happier than him.',
        'answer': 'The quick brown fox was born in 2005, and the hedgehog in 2010. The quick brown fox is not as happy as the hedgehog'
    },
    {
        'context': 'The sun is a star at the center of our solar system. It is about 93 million miles away from Earth. The sun is a hot ball of glowing gases that provides light and warmth to Earth.',
        'answer': 'The sun is a star located approximately 93 million miles from Earth. It is the source of light and heat for our planet. The sun is not a solid object, but rather a sphere of hot glowing gases.'
    },
    {
        'context': 'Birds are warm-blooded vertebrates that lay eggs and have feathers, wings, and beaks. There are over 10,000 species of birds worldwide. Some common bird species include sparrows, pigeons, and parrots.',
        'answer': 'Birds are a diverse group of animals with feathers and wings. They are warm-blooded egg-laying vertebrates. The number of bird species globally exceeds 10,000. Pigeons, parrots, and sparrows are among the most familiar bird types.'
    },
    {
        'context': 'The Eiffel Tower is a wrought-iron lattice tower located on the Champ de Mars in Paris, France. It was constructed from 1887 to 1889 and stands at a height of 324 meters. The tower is named after Gustave Eiffel, whose company designed and built it.',
        'answer': 'The Eiffel Tower, found in Paris, France, is a lattice tower made of wrought iron. Built between 1887 and 1889, it reaches a height of 324 meters. Gustave Eiffel\'s company was responsible for the tower\'s design and construction, hence its name.'
    },
    {
        'context': 'The Great Wall of China is a series of fortifications and walls built across the historical northern borders of ancient Chinese states and Imperial China. The most well-known sections were built during the Ming dynasty, which ruled from 1368 to 1644.',
        'answer': 'The Great Wall of China, a series of walls and fortifications, was constructed along the northern borders of ancient Chinese states and Imperial China. The Ming dynasty, which lasted from 1368 to 1644, is responsible for the construction of the most famous sections of the wall.'
    }
]

result_df = process_data(data_list)

[2, 1, 3]
[4, 1, 2]
[8, 1, 3]
[5, 8, 1]
[4, 0, 0]


In [29]:
result_df

Unnamed: 0,context,answer,context_replace_pronouns,answer_replace_pronouns,context_list,answer_list,classifications,groundedness,thoroughness
0,The quick brown fox jumps over the rock becaus...,"The quick brown fox was born in 2005, and the ...",The quick brown fox jumps over the rock becaus...,"The quick brown fox was born in 2005, and the ...","[The quick brown fox jumps over the rock., The...","[The quick brown fox was born in 2005., The he...","[[The quick brown fox was born in 2005., The h...",66.666667,40.0
1,The sun is a star at the center of our solar s...,The sun is a star located approximately 93 mil...,The sun is a star at the center of our solar s...,The sun is a star located approximately 93 mil...,[The sun is a star at the center of our solar ...,"[The sun is a star., The sun is located approx...","[[The sun is a star., The sun is located appro...",80.0,66.666667
2,Birds are warm-blooded vertebrates that lay eg...,Birds are a diverse group of animals with feat...,Birds are warm-blooded vertebrates that lay eg...,Birds are a diverse group of animals with feat...,"[Birds are warm-blooded vertebrates., Birds la...",[Birds are a diverse group of animals with fea...,"[[Birds are warm-blooded vertebrates., Birds l...",88.888889,72.727273
3,The Eiffel Tower is a wrought-iron lattice tow...,"The Eiffel Tower, found in Paris, France, is a...",The Eiffel Tower is a wrought-iron lattice tow...,"The Eiffel Tower, found in Paris, France, is a...",[The Eiffel Tower is a wrought-iron lattice to...,"[The Eiffel Tower is found in Paris, France., ...",[[The Eiffel Tower is located on the Champ de ...,38.461538,83.333333
4,The Great Wall of China is a series of fortifi...,"The Great Wall of China, a series of walls and...",The Great Wall of China is a series of fortifi...,"The Great Wall of China, a series of walls and...",[The Great Wall of China is a series of fortif...,[The Great Wall of China is a series of walls ...,[[The Great Wall of China is a series of walls...,100.0,100.0
