# CSV Eval

Runs the agent against a CSV with a single "question" column. Writes out another CSV with the "answer" column included.


In [None]:
!rm -rf temp
!git clone https://github.com/docugami/KG-RAG-datasets.git temp

In [2]:
from datetime import datetime
import os
import pandas as pd
from pathlib import Path

INPUT_CSV_PATH = Path(os.getcwd()) / "temp/sec-10-q/data/raw_questions/questions_mini.csv"
EVAL_NAME = INPUT_CSV_PATH.stem + "_" + datetime.now().strftime("%Y-%m-%d")
OUTPUT_CSV_PATH = INPUT_CSV_PATH.with_name(EVAL_NAME + "_answers" + INPUT_CSV_PATH.suffix)

## Create Docugami Agent

In [6]:
from docugami_kg_rag.agent import agent as docugami_agent
from langchain_core.messages import HumanMessage


def predict_docugami_agent(question: str) -> str:
    return docugami_agent.invoke(
        {
            "messages": [HumanMessage(content=question)],
        }
    )

In [7]:
os.environ['LANGCHAIN_PROJECT'] = EVAL_NAME

# Eval the CSV
df = pd.read_csv(INPUT_CSV_PATH)
if "Question" in df.columns:
    # Apply the predict function to each question and create a new column for the answers
    df["Answer"] = df["Question"].apply(predict_docugami_agent)
    # Write the dataframe with questions and answers to the output CSV
    df.to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"Output CSV created at: {OUTPUT_CSV_PATH}")
else:
    print("Error: The 'Question' column does not exist in the input CSV.")

Output CSV created at: /root/Source/github/langchain-template-docugami-kg-rag/notebooks/temp/sec-10-q/data/raw_questions/questions_mini_2024-03-28_answers.csv
