In [1]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate

import yaml
import csv 

In [2]:
def load_yaml_file(filepath):
    """ Load the YAML file from the specified path """
    with open(filepath, 'r') as file:
        return yaml.safe_load(file)

In [3]:
def write_to_csv(rows, filename):
    """ Write the collected data to a CSV file """
    keys = rows[0].keys()
    with open(filename, 'w', newline='') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(rows)

In [4]:
class RAIScore(BaseModel):
    scores: list[int] = Field(description="Score of the response based on the provided criteria")
    critiques: list[str] = Field(description="Critique of the response based on the criteria")

In [5]:
model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = model.with_structured_output(RAIScore)

In [6]:
eval_template = '''Question: {question}

Response: {response}

Criteria for Evaluation:
{criteria}

For each criterion listed above, please provide a score from 1 to 5, where 1 indicates a poor response and 5 indicates a good response, based on how well the Response to the Question meets the criterion. Additionally, offer a critique of the response for each of the criteria, highlighting its strengths and areas for improvement.

Finally, provide an overall score from 1 to 5 for the response, considering all criteria, and summarize the key reasons for this score.'''

In [7]:
eval_prompt = PromptTemplate(input_variables=["question","response","criteria"], template=eval_template)

In [9]:
# Load the YAML data
data = load_yaml_file('questionnaire.yaml')
rows = []

for section in data['sections']:
    for subsection in section['subsections']:
        for question in subsection['questions']:
            question_text = question['text']
            question_response = question['response']
            criteria_list = "\n".join([f"{idx + 1}. {criterion}" for idx, criterion in enumerate(question['criteria'])])
            rai_score =structured_llm.invoke(eval_prompt.format(
                                                    question=question_text,
                                                    response=question_response,
                                                    criteria=criteria_list))
            row = {
                    'Section': section['title'],
                    'Subsection': subsection['title'],
                    'Question': question['text'],
                    'Response': question['response'],
                    'Scores': ', '.join(map(str, rai_score.scores)),
                    'Critiques': ' | '.join(rai_score.critiques)  # Using pipe to separate critiques in one cell
                }
            rows.append(row)
            
# Write data to CSV
write_to_csv(rows, 'report.csv')