# Semi-structured eval: baseline retriever

We will test retrival of table information from the `Semi-structured Reports` dataset using various methods.

## Pre-requisites

In [None]:
%pip install -U langchain langsmith langchain_benchmarks
%pip install --quiet chromadb openai

## Dataset

In [7]:
import os

from langchain_benchmarks import registry
from langchain_benchmarks.rag.tasks.semi_structured_reports import get_file_names

# Task
task = registry["Semi-structured Reports"]

# Files used
paths = list(get_file_names())
files = [str(p) for p in paths]

### TODO: Replace when dataset is updated
dir = "/Users/rlm/Desktop/Eval_Sets/semi_structured_reports/"
files = [f for f in os.listdir(dir) if f.endswith(".pdf")]

## Load

In [8]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


def load_and_split(file):
    """
    Load and split PDF files
    """

    loader = PyPDFLoader(file)
    pdf_pages = loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=100, chunk_overlap=50
    )

    # Get chunks
    docs = text_splitter.split_documents(pdf_pages)
    texts = [d.page_content for d in docs]
    print(f"There are {len(texts)} text elements")
    return texts


texts = []
for fi in files:
    texts.extend(load_and_split(dir + fi))

There are 59 text elements
There are 77 text elements
There are 67 text elements
There are 285 text elements
There are 146 text elements
There are 130 text elements


## Index

In [9]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

vectorstore_baseline = Chroma.from_texts(
    texts=texts, collection_name="baseline-100-token", embedding=OpenAIEmbeddings()
)

retriever_baseline = vectorstore_baseline.as_retriever()

## RAG

In [10]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


def rag_chain(retriever):
    """
    RAG chain
    """

    # Prompt template
    template = """Answer the question based only on the following context, which can include text and tables:
    {context}
    Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)

    # LLM
    model = ChatOpenAI(temperature=0, model="gpt-4")

    # RAG pipeline
    chain = (
        {
            "context": retriever | (lambda x: "\n\n".join([i.page_content for i in x])),
            "question": RunnablePassthrough(),
        }
        | prompt
        | model
        | StrOutputParser()
    )
    return chain


# Create RAG chain
chain = rag_chain(retriever_baseline)

## Eval

In [None]:
### TODO: Replace with public dataset

import uuid

import pandas as pd
from langsmith import Client

# Read
df = pd.read_csv(dir + "semi_structured_reports.csv")

# Dataset
client = Client()
dataset_name = "Semi-Structured-Eval-v3"
dataset = client.create_dataset(dataset_name=dataset_name)

# Populate dataset
for _, row in df.iterrows():
    # Get Q, A
    q = row["Question"]
    a = row["Answer"]

    # Use the values in your function
    client.create_example(
        inputs={"question": q}, outputs={"answer": a}, dataset_id=dataset.id
    )

In [12]:
from langsmith.client import Client

from langchain_benchmarks.rag import get_eval_config


def run_eval(chain, eval_run_name):
    """
    Run eval
    """
    client = Client()
    test_run = client.run_on_dataset(
        ### TODO: Replace with public dataset
        dataset_name="Semi-Structured-Eval",
        llm_or_chain_factory=lambda: (lambda x: x["question"]) | chain,
        evaluation=get_eval_config(),
        verbose=True,
        project_name=eval_run_name,
    )


# Experiments
chain_map = {
    "baseline-100tok": chain,
}

run_id = str(uuid.uuid4())
for project_name, chain in chain_map.items():
    run_eval(chain, project_name + "_" + run_id)

View the evaluation results for project 'baseline-100tok_96c7a1d1-727a-4d14-bd80-acfa56ce5384' at:
https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/projects/p/354d539a-0d15-4937-b782-018934936207?eval=true

View all tests for Dataset Semi-Structured-Eval at:
https://smith.langchain.com/o/1fa8b1f4-fcb9-4072-9aa9-983e35ad61b8/datasets/95f61109-029d-43a9-ae7d-ec1d53c6f723
[------------------------------------------------->] 20/20
 Eval quantiles:
                               0.25       0.5      0.75      mean  \
embedding_cosine_distance  0.031645  0.066415  0.113165  0.070251   
score_string:accuracy      0.100000  0.500000  1.000000  0.540000   
faithfulness               1.000000  1.000000  1.000000  0.860000   
execution_time             7.493521  7.493521  7.493521  7.493521   

                                   mode  
embedding_cosine_distance  8.370961e-07  
score_string:accuracy      1.000000e-01  
faithfulness               1.000000e+00  
execution_time      