In [1]:
#!pip install langsmith==0.2.16 langchain-core==0.2.40 langchain-community==0.2.17 langchain-openai==0.1.25 langchain-qdrant==0.1.4

In [2]:
#!pip install -r requirements.txt

In [3]:
# resolves issue with import
#!pip install langchain-openai==0.1.25

In [4]:
#!pip install -qU pymupdf

In [5]:
#!pip install -qU pymupdf ragas

In [11]:
import re
from langchain_experimental.text_splitter import SemanticChunker
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings

In [12]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")

from uuid import uuid4

os.environ["LANGCHAIN_PROJECT"] = f"AIM - SDG - {uuid4().hex[0:8]}"

## Load documents

In [19]:
import fitz  # PyMuPDF
import os

def extract_images_and_tables_from_pdf(pdf_path, output_dir):
    # Ensure the output directories for images and tables exist
    image_dir = os.path.join(output_dir, 'images')
    table_dir = os.path.join(output_dir, 'tables')
    os.makedirs(image_dir, exist_ok=True)
    os.makedirs(table_dir, exist_ok=True)
    
    # Open the PDF
    doc = fitz.open(pdf_path)
    
    extracted_content = []
    
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        images = page.get_images(full=True)
        
        # Extract images
        for img_index, img in enumerate(images):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]
            image_path = f"{image_dir}/image_{page_num + 1}_{img_index}.{image_ext}"
            
            # Save the image
            with open(image_path, "wb") as img_file:
                img_file.write(image_bytes)
                
            # Store extracted image metadata
            extracted_content.append({
                'content_id': f'regulation_{xref}_image_{img_index}',
                'content_type': 'Image',
                'hierarchy_level': 'Figure',
                'page_number': page_num + 1,
                'bbox': img[2:6],
                'text': None,
                'image_path': image_path,
                'table_path': None,
                'associated_text_id': None  # Modify if you have a way to link this with text
            })
        
        # Extract tables if applicable
        text = page.get_text("text")
        tables = extract_tables_from_page(page)  # This function would extract tables if needed
        for table_index, table in enumerate(tables):
            table_path = f"{table_dir}/table_{page_num + 1}_{table_index}.txt"
            with open(table_path, "w") as table_file:
                table_file.write(table)  # Save the table as text
            
            # Store extracted table metadata
            extracted_content.append({
                'content_id': f'regulation_{page_num + 1}_table_{table_index}',
                'content_type': 'Table',
                'hierarchy_level': 'Table',
                'page_number': page_num + 1,
                'bbox': None,
                'text': None,
                'image_path': None,
                'table_path': table_path,
                'associated_text_id': None
            })
    
    doc.close()
    return extracted_content

def extract_tables_from_page(page):
    # Implement logic to extract tables from the page
    # This function should return a list of table data from the page
    # You can use regex or a table extraction tool
    tables = []
    # Add table extraction logic here
    return tables

## multiple at once?
from langchain_community.document_loaders import PyMuPDFLoader

PDF_LINK = "https://d1lamhf6l6yk6d.cloudfront.net/uploads/2021/08/The-pmarca-Blog-Archives.pdf"

loader = PyMuPDFLoader(
    PDF_LINK
)
documents = loader.load()

In [17]:
wh_link = 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf'
nist_link = 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf'

In [18]:
from langchain_community.document_loaders import PyMuPDFLoader

wh_documents = PyMuPDFLoader(file_path=wh_link).load()
nist_docments = PyMuPDFLoader(file_path=nist_link).load()

WHite sections and page chunking 
USING THIS TECHNICAL COMPANION wh_documents[13].page_content 14
 SAFE AND EFFECTIVE SYSTEMS  15 
wh_documents[14].page_content
wh_documents[21].page_content

 ALGORITHMIC DISCRIMINATION PROTECTIONS 23
wh_documents[22].page_content 
wh_documents[28].page_content

 DATA PRIVACY  30
wh_documents[29].page_content
wh_documents[38].page_content

 NOTICE AND EXPLANATION 40
wh_documents[39].page_content
wh_documents[44].page_content

 HUMAN ALTERNATIVES, CONSIDERATION, AND FALLBACK 46
wh_documents[45].page_content
wh_documents[51].page_content

APPENDIX
 EXAMPLES OF AUTOMATED SYSTEMS 53
wh_documents[52].page_content
wh_documents[53].page_content
 LISTENING TO THE AMERICAN PEOPLE 55
wh_documents[54].page_content
wh_documents[61].page_content
ENDNOTES 63
wh_documents[62].page_content
wh_documents[72].page_content

### Baseline Chunking

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100,
    length_function = len,
)

wh_split_chunks = text_splitter.split_documents(wh_documents)

wh_split_chunks[2].page_content

nist_split_chunks = text_splitter.split_documents(nist_docments)

### Section-based chunking

In [20]:

def chunk_pdf_with_sections(doc, sections):
    """
    Chunk a PDF by pages and embed section headers for each chunk.

    Parameters:
    - doc: The loaded PDF document using PyMuPDF.
    - sections: Dictionary mapping section names to page ranges.

    Returns:
    - section_chunks: List of dictionaries with page number, section name, and text chunks.
    """
    section_chunks = []

    # Loop through each section
    for section, pages in sections.items():
        for page_num in pages:
            #page = doc.load_page(page_num)
            page_text =  doc[page_num].page_content
            
            # Store each page as a chunk with section info
            chunk = {
                "section": section,
                #"page_num": page_num + 1,  # Adjusting for 0-based indexing
                "content": f"SECTION: {section}\n{page_text}"
            }
            section_chunks.append(chunk)
    
    return section_chunks

In [21]:
# Function to process the text from page 4 onward
def extract_full_text_from_pages(doc, start_page=4):
    full_text = ""
    for page_num in range(start_page, len(doc)):
        full_text += doc[page_num].page_content
    return full_text

In [22]:
nist_sections = [
    "\n1. \nIntroduction",  # Starts at page 4 (index 0) and ends before page 2 (index 1)
    "\n2. \nOverview of Risks Unique to or Exacerbated by GAI",  # Starts at page 5, ends before page 8
    "\n2.1. CBRN Information or Capabilities",  # Page 8 only
    "\n2.2. Confabulation",  # Page 9 only
    "\n2.3. Dangerous, Violent, or Hateful Content",  # Page 9 & 10 only
    "\n2.4. Data Privacy",  # Page 10 & 11 only
    "\n2.5. Environmental Impacts",  # Page 11 only
    "\n2.6. Harmful Bias and Homogenization",  # Page 11 & 12 
    "\n2.7. Human-Al Configuration",  # Page 12 only
    "\n2.8. Information Integrity",  # Page 12 & 13 only
    "\n2.9. Information Security",  # Page 13 & 14 only
    "\n2.10. \nIntellectual Property",  # Page 14 only
    "\n2.11. \nObscene, Degrading, and/or Abusive Content",  # Page 14 & 15 only
    "\n2.12. \nValue Chain and Component Integration",  # Page 15 only
    "\n3. Suggested Actions to Manage GAI Risks",  # Starts at page 15 to 49
    "Appendix A. Primary GAI Considerations",  # Starts at page 50 to 56
    "Appendix B. References" # 57 to 63
]

In [23]:
wh_sections = {
    "USING THIS TECHNICAL COMPANION": [13],
    "SAFE AND EFFECTIVE SYSTEMS": list(range(14, 22)),
    "ALGORITHMIC DISCRIMINATION PROTECTIONS": list(range(22, 29)),
    "DATA PRIVACY": list(range(29, 39)),
    "NOTICE AND EXPLANATION": list(range(39, 45)),
    "HUMAN ALTERNATIVES, CONSIDERATION, AND FALLBACK": list(range(45, 52)),
    "APPENDIX: EXAMPLES OF AUTOMATED SYSTEMS": [52, 53],
    "LISTENING TO THE AMERICAN PEOPLE": list(range(54, 62)),
    "ENDNOTES": list(range(62, 73))
}

In [24]:
full_text = extract_full_text_from_pages(nist_docments, start_page=4)
chunks = {}
pattern = "|".join([re.escape(section) for section in nist_sections])
# Split text based on the pattern
sections = re.split(f'({pattern})', full_text)
nist_chunks = {sections[1:][i]: sections[1:][i + 1] for i in range(0, len(sections[1:]), 2)}

In [25]:
wh_chunks = chunk_pdf_with_sections(wh_documents, wh_sections)

In [26]:
wh_chunks[0]

{'section': 'USING THIS TECHNICAL COMPANION',
 'content': 'SECTION: USING THIS TECHNICAL COMPANION\n \n \n \n \n \n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n-    \nUSING THIS TECHNICAL COMPANION\nThe Blueprint for an AI Bill of Rights is a set of five principles and associated practices to help guide the design, \nuse, and deployment of automated systems to protect the rights of the American public in the age of artificial \nintelligence. This technical companion considers each principle in the Blueprint for an AI Bill of Rights and \nprovides examples and concrete steps for communities, industry, governments, and others to take in order to \nbuild these protections into policy, practice, or the technological design process. \nTaken together, the technical protections and practices laid out in the Blueprint for an AI Bill of Rights can help \nguard the American public against many of the potential and actual harms identified by researchers, technolo\xad\ngists, ad

### Task 1: Review the two PDFs and decide how best to chunk up the data with a single strategy to optimally answer the variety of questions you expect to receive from people.

#### Deliverables

* Describe the default chunking strategy that you will use. 

     * After reviewing pdfs, I wanted to maintain the sections or headers of the documents. In order to execute this method, I found the sections via table of contents in each pdf. Then I used functions that found the regex pattern to associate section to content.

* Articulate a chunking strategy that you would also like to test out.

     * The alternative strategy will be section-based chunks plus semantic chunking. 

* Describe how and why you made these decisions

     * In order to maintain the sections or headers of the documents, I found the sections via table of contents in each pdf and converted in syntax (dict or list). Then I used functions that found the regex pattern (nist_docs) or page_num (wh_paper) to associate section to content. Due to the low N of source docs, I was able to execute static rules (regex or page_num) that will work for the sample. These approaches will have difficulty being replicated if new samples were provided. In future, using headers to chunk with around sections may be viable.

### Format into Document object

In [27]:
from langchain_core.documents import Document

In [28]:
def process_nist_chunks(nist_chunks):
    # This function processes the nist_chunks dictionary.
    documents = []
    for section_name, page_content in nist_chunks.items():
        doc = Document(page_content=page_content, metadata={'section': section_name})
        documents.append(doc)
    return documents

def process_wh_chunks(wh_chunks):
    # This function processes the wh_chunks list of dictionaries.
    documents = []
    for chunk in wh_chunks:
        section_name = chunk['section']
        page_content = chunk['content']
        doc = Document(page_content=page_content, metadata={'section': section_name})
        documents.append(doc)
    return documents

### Section-based chunks

In [29]:
nist_docs = process_nist_chunks(nist_chunks)

In [30]:
wh_docs= process_wh_chunks(wh_chunks)

In [31]:
nist_docs[0]

Document(metadata={'section': '\n1. \nIntroduction'}, page_content=' \nThis document is a cross-sectoral proﬁle of and companion resource for the AI Risk Management \nFramework (AI RMF 1.0) for Generative AI,1 pursuant to President Biden’s Executive Order (EO) 14110 on \nSafe, Secure, and Trustworthy Artiﬁcial Intelligence.2 The AI RMF was released in January 2023, and is \nintended for voluntary use and to improve the ability of organizations to incorporate trustworthiness \nconsiderations into the design, development, use, and evaluation of AI products, services, and systems.  \nA proﬁle is an implementation of the AI RMF functions, categories, and subcategories for a speciﬁc \nsetting, application, or technology – in this case, Generative AI (GAI) – based on the requirements, risk \ntolerance, and resources of the Framework user. AI RMF proﬁles assist organizations in deciding how to \nbest manage AI risks in a manner that is well-aligned with their goals, considers legal/regulatory

In [32]:
wh_docs[0]

Document(metadata={'section': 'USING THIS TECHNICAL COMPANION'}, page_content='SECTION: USING THIS TECHNICAL COMPANION\n \n \n \n \n \n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n-    \nUSING THIS TECHNICAL COMPANION\nThe Blueprint for an AI Bill of Rights is a set of five principles and associated practices to help guide the design, \nuse, and deployment of automated systems to protect the rights of the American public in the age of artificial \nintelligence. This technical companion considers each principle in the Blueprint for an AI Bill of Rights and \nprovides examples and concrete steps for communities, industry, governments, and others to take in order to \nbuild these protections into policy, practice, or the technological design process. \nTaken together, the technical protections and practices laid out in the Blueprint for an AI Bill of Rights can help \nguard the American public against many of the potential and actual harms identified by researchers, tec

#### Combine wh_docs and nist_docs document objects into one

In [33]:
full_docs = wh_docs + nist_docs

In [34]:
full_docs

[Document(metadata={'section': 'USING THIS TECHNICAL COMPANION'}, page_content='SECTION: USING THIS TECHNICAL COMPANION\n \n \n \n \n \n  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n-    \nUSING THIS TECHNICAL COMPANION\nThe Blueprint for an AI Bill of Rights is a set of five principles and associated practices to help guide the design, \nuse, and deployment of automated systems to protect the rights of the American public in the age of artificial \nintelligence. This technical companion considers each principle in the Blueprint for an AI Bill of Rights and \nprovides examples and concrete steps for communities, industry, governments, and others to take in order to \nbuild these protections into policy, practice, or the technological design process. \nTaken together, the technical protections and practices laid out in the Blueprint for an AI Bill of Rights can help \nguard the American public against many of the potential and actual harms identified by researchers, te

### semantic Chunking via langchain_experimental

In [44]:
base_text_splitter = SemanticChunker(OpenAIEmbeddings())
sem_documents = base_text_splitter.split_documents(full_docs) # 2 min 

## Embedding chunks

In [45]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

embeddings_lg = OpenAIEmbeddings(model="text-embedding-3-large")

In [46]:
from langchain_community.vectorstores import Qdrant

vectorstore = Qdrant.from_documents(
    documents=full_docs,
    embedding=embeddings,
    location=":memory:",
    collection_name="ai_policy"
)
retriever = vectorstore.as_retriever()

In [47]:
vectorstore_1 = Qdrant.from_documents(
    documents=sem_documents,
    embedding=embeddings_lg,
    location=":memory:",
    collection_name="alt_ai_policy"
)
alt_retriever = vectorstore_1.as_retriever()

### RAG_chain

In [48]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

## Generation LLM
llm = ChatOpenAI(model="gpt-4o")

In [49]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

Context: {context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

### LCEL chain for each different method

* rag_chain

* alt_rag_chain

In [50]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser

rag_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | rag_prompt | llm | StrOutputParser()
)

In [51]:
alt_rag_chain = (
    {"context": itemgetter("question") | alt_retriever, "question": itemgetter("question")}
    | rag_prompt | llm | StrOutputParser()
)

In [55]:
retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | alt_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | llm, "context": itemgetter("context")}
)

In [None]:
print(rag_chain.invoke({"question" : "What is the AI framework all about?"}))

The AI Risk Management Framework (AI RMF) is intended to help organizations incorporate trustworthiness considerations into the design, development, use, and evaluation of AI products, services, and systems. It is developed through a consensus-driven, open, transparent, and collaborative process. The framework aims to manage risks posed to individuals, organizations, and society by AI, and to foster the development of innovative approaches to address characteristics of trustworthiness, which include accuracy, explainability, interpretability, reliability, privacy, robustness, safety, security (resilience), and mitigation of unintended and/or harmful bias, as well as harmful uses. The framework is voluntary and is designed to be adaptable to various stages of the AI lifecycle and different AI technologies, including generative AI.


In [52]:
print(alt_rag_chain.invoke({"question" : "What is the AI framework all about?"}))

The AI Risk Management Framework (AI RMF 1.0) for Generative AI is designed to help organizations incorporate trustworthiness considerations into the design, development, use, and evaluation of AI products, services, and systems. It is intended for voluntary use and provides a structured approach to managing AI risks, specifically those associated with Generative AI (GAI). The framework offers insights into managing risks across various stages of the AI lifecycle and is informed by public feedback and consultations with diverse stakeholders. It aims to address primary considerations such as Governance, Content Provenance, Pre-deployment Testing, and Incident Disclosure.


In [56]:
## adds context and response keys to result object
print(retrieval_augmented_qa_chain.invoke({"question" : "What is the AI framework all about?"}))

{'response': AIMessage(content='The AI Risk Management Framework (AI RMF 1.0) for Generative AI, as described in the provided context, is a resource developed to help organizations incorporate trustworthiness considerations into the design, development, use, and evaluation of AI products, services, and systems. It aims to manage risks associated with generative AI, which includes models that generate synthetic content such as images, videos, audio, and text. The framework offers a cross-sectoral profile that assists organizations in managing AI risks across various stages of the AI lifecycle, considering legal and regulatory requirements, best practices, and risk management priorities. It was informed by public feedback and consultations with diverse stakeholders and addresses primary considerations relevant to generative AI, such as governance, content provenance, pre-deployment testing, and incident disclosure.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': 

# Task 2

### Build an end-to-end RAG application using an industry-standard open-source stack and your choice of commercial off-the-shelf models

Build a prototype and deploy to a Hugging Face Space, and include the public URL link to your space  create a short (< 2 min) loom video demonstrating some initial testing inputs and outputs.

 * https://huggingface.co/spaces/aaromosshf2424/midterm_aie4

* Loom video link https://www.loom.com/share/701153f9d7234883aad5df9a5621a6ca?sid=d3b24367-41d1-4155-a06c-f73376801ab2

How did you choose your stack, and why did you select each tool the way you did?

* Docker to containerize the model & app. Why: Self-sustained version of the model and requirements that can be published.

* Chainlit was used to allow for interaction between user, their query, and model functionality.

* Qdrant is the vectorDb to store embeddings that are created from the doc chunks

* Embedding model: OpenAI is used to build embeddings model "text-embedding-3-large"

* HuggingFace was used to host model (Open API)

* OpenAI 4o is the Generating LLM API using the user query and context chunks

* Langchain was used for orchestration across OpenAI API, Qdrant, etc

## Task 3: Creating a Golden Test Data Set

Generate a synthetic test data set and baseline an initial evaluation

#### pip install ragas

In [36]:
! pip install ragas

Collecting ragas
  Using cached ragas-0.1.20-py3-none-any.whl.metadata (5.5 kB)
Collecting datasets (from ragas)
  Using cached datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting pysbd>=0.3.4 (from ragas)
  Using cached pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting appdirs (from ragas)
  Using cached appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting filelock (from datasets->ragas)
  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting pyarrow>=15.0.0 (from datasets->ragas)
  Using cached pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets->ragas)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets->ragas)
  Using cached pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting xxhash (from datasets->ragas)
  Using cached xxhash-3.5.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting multiprocess (from 

In [37]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

distributions = {
    simple: 0.5,
    multi_context: 0.4,
    reasoning: 0.1
}

  from .autonotebook import tqdm as notebook_tqdm


In [38]:
testset = generator.generate_with_langchain_docs(full_docs, 20, distributions, with_debugging_logs=True)

Filename and doc_id are the same for all nodes.                   
Generating:   0%|          | 0/20 [00:00<?, ?it/s][ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['AI Actor Tasks', 'AI Development', 'AI Deployment', 'AI Impact Assessment', 'Operation and Monitoring']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 1, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.25}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Privacy risk', 'AI red-teaming', 'Fairness and bias', 'Systemic bias', 'Harmful bias']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 1, 'depth': 2, 'structure': 1, 'relevance': 2, 'score': 1.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Large language models', 'AI', 'Algorithmic systems', 'Generative AI', 'Information integrity']
[ragas.testset.filters.DEBUG] context scoring: {'c

In [40]:
test_df = testset.to_pandas()

In [41]:
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What was President Biden's response to the Sup...,[SECTION: ENDNOTES\n \n \n \n \nENDNOTES\n1.Th...,Remarks by President Biden on the Supreme Cour...,simple,[{'section': 'ENDNOTES'}],True
1,What information should be included in regular...,[SECTION: SAFE AND EFFECTIVE SYSTEMS\n \n \n \...,Regularly-updated reports for entities respons...,simple,[{'section': 'SAFE AND EFFECTIVE SYSTEMS'}],True
2,How is systemic bias evaluated and documented ...,[ etc.); Optimization objectives; Training alg...,Systemic bias in GAI systems is evaluated and ...,simple,[{'section': ' 2.12. Value Chain and Componen...,True
3,How do text-to-image models make it easy to cr...,[ \nGAI systems can produce content that is in...,Text-to-image models make it easy to create im...,simple,"[{'section': ' 2.3. Dangerous, Violent, or Hat...",True
4,How is systemic bias evaluated and documented ...,[ etc.); Optimization objectives; Training alg...,Systemic bias in GAI systems is evaluated and ...,simple,[{'section': ' 2.12. Value Chain and Componen...,True
5,What metrics can be used to reflect the effect...,[ identify \nconcerns and/or current literacy ...,Identify metrics that reflect the effectivenes...,simple,[{'section': ' 2.12. Value Chain and Componen...,True
6,How do intellectual property risks arise from ...,[ \nIntellectual property risks from GAI syste...,Intellectual property risks from GAI systems m...,simple,[{'section': ' 2.10. Intellectual Property'}],True
7,What organizations were involved in the meetin...,[SECTION: LISTENING TO THE AMERICAN PEOPLE\nAP...,"Adobe, American Civil Liberties Union (ACLU), ...",simple,[{'section': 'LISTENING TO THE AMERICAN PEOPLE'}],True
8,How can vendor contracts be reviewed to avoid ...,[ GAI \nincident response functions; Rehearse ...,Review vendor contracts to avoid arbitrary or ...,simple,[{'section': ' 2.12. Value Chain and Componen...,True
9,What are some examples of automated systems th...,[SECTION: APPENDIX: EXAMPLES OF AUTOMATED SYST...,Automated systems that impact the safety of co...,simple,[{'section': 'APPENDIX: EXAMPLES OF AUTOMATED ...,True


In [42]:
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()

In [57]:
answers = []
contexts = []

for question in test_questions:
  response = retrieval_augmented_qa_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [58]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [59]:
response_dataset[0]

{'question': "What was President Biden's response to the Supreme Court decision to overturn Roe v. Wade?",
 'answer': "President Biden's response to the Supreme Court decision to overturn Roe v. Wade can be found in his remarks on June 24, 2022. You can read the full response at the following link: [Remarks by President Biden on the Supreme Court Decision to Overturn Roe v. Wade](https://www.whitehouse.gov/briefing-room/speeches-remarks/2022/06/24/remarks-by-president-biden-on-the-supreme-court-decision-to-overturn-roe-v-wade/).",
 'contexts': ["SECTION: ENDNOTES\n \n \n \n \nENDNOTES\n1.The Executive Order On Advancing Racial Equity and Support for Underserved Communities Through the\nFederal\xa0Government. https://www.whitehouse.gov/briefing-room/presidential-actions/2021/01/20/executive\norder-advancing-racial-equity-and-support-for-underserved-communities-through-the-federal-government/\n2. The White House. Remarks by President Biden on the Supreme Court Decision to Overturn Roe v.

### Evaluate our Pipeline with Ragas

In [60]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [61]:
results = evaluate(response_dataset, metrics)

Evaluating:  40%|████      | 40/100 [00:17<00:19,  3.02it/s]No statements were generated from the answer.
Evaluating: 100%|██████████| 100/100 [01:20<00:00,  1.24it/s]


In [62]:
results_df = results.to_pandas()
results_df

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,What was President Biden's response to the Sup...,[SECTION: ENDNOTES\n \n \n \n \nENDNOTES\n1.Th...,President Biden's response to the Supreme Cour...,Remarks by President Biden on the Supreme Cour...,1.0,0.941003,1.0,1.0,0.986177
1,What information should be included in regular...,[Reporting. When members of the public wish to...,Regularly-updated reports for entities respons...,Regularly-updated reports for entities respons...,1.0,0.978649,1.0,1.0,0.994118
2,How is systemic bias evaluated and documented ...,[ \nBias exists in many forms and can become i...,Systemic bias in the context of GAI systems is...,Systemic bias in GAI systems is evaluated and ...,1.0,0.992335,1.0,0.916667,0.537858
3,How do text-to-image models make it easy to cr...,[Text-to-image models also make it easy to cre...,Text-to-image models make it easy to create im...,Text-to-image models make it easy to create im...,1.0,0.875886,1.0,1.0,0.768552
4,How is systemic bias evaluated and documented ...,[ \nBias exists in many forms and can become i...,Systemic bias in GAI systems is evaluated and ...,Systemic bias in GAI systems is evaluated and ...,1.0,0.989375,1.0,1.0,0.680668
5,What metrics can be used to reflect the effect...,[Action ID \nSuggested Action \nGAI Risks \nMS...,The effectiveness of security measures in term...,Identify metrics that reflect the effectivenes...,0.764706,0.997419,0.666667,1.0,0.361112
6,How do intellectual property risks arise from ...,[ \nIntellectual property risks from GAI syste...,Intellectual property risks from GAI systems i...,Intellectual property risks from GAI systems m...,1.0,0.987392,0.75,0.75,0.879481
7,What organizations were involved in the meetin...,[SECTION: LISTENING TO THE AMERICAN PEOPLE\nAP...,The organizations involved in the meetings con...,"Adobe, American Civil Liberties Union (ACLU), ...",,0.999274,1.0,1.0,0.982893
8,How can vendor contracts be reviewed to avoid ...,[Address general risks associated with a lack ...,Vendor contracts can be reviewed by avoiding a...,Review vendor contracts to avoid arbitrary or ...,1.0,0.949382,0.25,1.0,0.759448
9,What are some examples of automated systems th...,[SECTION: APPENDIX: EXAMPLES OF AUTOMATED SYST...,Examples of automated systems that impact the ...,Automated systems that impact the safety of co...,1.0,1.0,1.0,1.0,0.967447


### Assess your pipeline using the RAGAS framework including key metrics faithfulness, answer relevancy, context precision, and context recall.  Provide a table of your output results.

### What conclusions can you draw about performance and effectiveness of your pipeline with this information?