In [3]:
!pip install -q langchain-core==0.2.40 langchain-openai==0.1.25 langchain-huggingface==0.0.3 

In [4]:
!pip install -qU pymupdf ragas

In [5]:
import os
import getpass

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")

In [6]:
from uuid import uuid4

os.environ["LANGCHAIN_PROJECT"] = f"AIM - SDG - {uuid4().hex[0:8]}"

In [7]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

# Loading Source Documents

In [8]:
os.getcwd()

'c:\\Users\\andre\\OneDrive\\Documents\\AIE4\\AIE4\\Midterm'

In [12]:
from langchain_community.document_loaders import PyMuPDFLoader
bill_docs = PyMuPDFLoader('Blueprint-for-an-AI-Bill-of-Rights.pdf').load()
nist_docs = PyMuPDFLoader('NIST_report.pdf').load()

In [13]:
documents = bill_docs + nist_docs

# Generate Synthetic Data

In [15]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

generator_llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18") #changed from gpt-3.5-turbo
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

distributions = {
    simple: 0.5,
    multi_context: 0.4,
    reasoning: 0.1
}

In [16]:
testset = generator.generate_with_langchain_docs(documents, 20, distributions, with_debugging_logs=True)

embedding nodes:   0%|          | 0/284 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/20 [00:00<?, ?it/s]

[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['GAI system incidents', 'Organizational risk management', 'Incident response processes', 'Third-party GAI resources', 'Data privacy and localization compliance']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Human subject protection', 'Content provenance', 'Data privacy', 'AI system performance', 'Privacy-enhancing technologies']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 3, 'structure': 2, 'relevance': 3, 'score': 2.5}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Prompt injection', 'Indirect prompt injection attacks', 'Data poisoning', 'Intellectual property risks', 'Obscene and degrading content']
[ragas.testset.filters.DEBUG] c

In [17]:
testset.to_pandas()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What criteria are used to measure AI system pe...,[ \n30 \nMEASURE 2.2: Evaluations involving hu...,AI system performance or assurance criteria ar...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
1,What applications can GAI technologies be leve...,[ \n51 \ngeneral public participants. For exam...,GAI technologies can be leveraged for many app...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
2,What was the purpose of the meetings conducted...,[APPENDIX\n• OSTP conducted meetings with a va...,The purpose of the meetings conducted by OSTP ...,simple,[{'source': 'Blueprint-for-an-AI-Bill-of-Right...,True
3,What are the intellectual property risks assoc...,"[ \n11 \nvalue chain (e.g., data inputs, proce...",Intellectual property risks from GAI systems m...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
4,What measures are suggested for ensuring the a...,[ \n25 \nMP-2.3-002 Review and document accura...,The suggested measures for ensuring the accura...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
5,What measures are suggested to manage GAI risk...,[ \n42 \nMG-2.4-002 \nEstablish and maintain p...,The suggested measures to manage GAI risks ass...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
6,What measures are suggested to ensure informat...,[ \n28 \nMAP 5.2: Practices and personnel for ...,Suggested measures to ensure information integ...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True
7,What are the key considerations regarding data...,[TABLE OF CONTENTS\nFROM PRINCIPLES TO PRACTIC...,The answer to given question is not present in...,simple,[{'source': 'Blueprint-for-an-AI-Bill-of-Right...,True
8,What are some examples of how data privacy pri...,[ \n \n \n \nDATA PRIVACY \nWHY THIS PRINCIPL...,Some examples of how data privacy principles a...,simple,[{'source': 'Blueprint-for-an-AI-Bill-of-Right...,True
9,What organizational practices are necessary fo...,"[ \n19 \nGV-4.1-003 \nEstablish policies, proc...",Organizational practices necessary for enablin...,simple,"[{'source': 'NIST_report.pdf', 'file_path': 'N...",True


# LangSmith Dataset

In [18]:
from langsmith import Client

client = Client()

dataset_name = "AI Questions"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Questions about responsibly managing AI"
)

In [19]:
for test in testset.to_pandas().iterrows():
  client.create_example(
      inputs={
          "question": test[1]["question"]
      },
      outputs={
          "answer": test[1]["ground_truth"]
      },
      metadata={
          "context": test[0]
      },
      dataset_id=dataset.id
  )

# Basic RAG Chain

In [20]:
rag_documents = documents

In [21]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

rag_documents = text_splitter.split_documents(rag_documents)

In [22]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [23]:
from langchain_community.vectorstores import Qdrant

vectorstore = Qdrant.from_documents(
    documents=rag_documents,
    embedding=embeddings,
    location=":memory:",
    collection_name="Responsible AI"
)

In [24]:
retriever = vectorstore.as_retriever()

In [25]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Context: {context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [38]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser
from langchain_openai import ChatOpenAI

primary_qa_llm = ChatOpenAI(model="gpt-4o-mini")

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | primary_qa_llm, "context": itemgetter("context")}
)

In [39]:
retrieval_augmented_qa_chain.invoke({"question" : "What are some responsible ways to use AI?"})

{'response': AIMessage(content='Some responsible ways to use AI include ensuring that AI is lawful and respectful of national values, purposeful and performance-driven, accurate, reliable, and effective, safe, secure, and resilient, understandable, responsible and traceable, regularly monitored, transparent, and accountable.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 1169, 'total_tokens': 1221, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-f19b79a0-d837-4c8f-b8b9-4da21f440dce-0', usage_metadata={'input_tokens': 1169, 'output_tokens': 52, 'total_tokens': 1221}),
 'context': [Document(metadata={'source': 'Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 20, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an A

# LangSmith Evaluation Set-up

In [34]:
testset_df = testset.to_pandas()
test_questions = testset_df['question'].values.tolist()
test_groundtruths = testset_df['ground_truth'].values.tolist()

In [40]:
answers = []
contexts = []

for question in test_questions:
  response = retrieval_augmented_qa_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [41]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [43]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [44]:
results = evaluate(response_dataset, metrics)

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

In [45]:
results

{'faithfulness': 0.7963, 'answer_relevancy': 0.8297, 'context_recall': 0.7458, 'context_precision': 0.7903, 'answer_correctness': 0.5693}

In [46]:
results_df = results.to_pandas()
results_df

Unnamed: 0,question,contexts,answer,ground_truth,faithfulness,answer_relevancy,context_recall,context_precision,answer_correctness
0,What criteria are used to measure AI system pe...,[and Homogenization \nAI Actor Tasks: AI Deplo...,AI system performance or assurance criteria ar...,AI system performance or assurance criteria ar...,1.0,0.958068,1.0,1.0,1.0
1,What applications can GAI technologies be leve...,"[assessments, and alerting, dynamic risk asses...",GAI technologies can be leveraged for applicat...,GAI technologies can be leveraged for many app...,1.0,0.968036,1.0,0.5,0.362548
2,What was the purpose of the meetings conducted...,[APPENDIX\n• OSTP conducted meetings with a va...,The purpose of the meetings conducted by OSTP ...,The purpose of the meetings conducted by OSTP ...,1.0,1.0,1.0,0.805556,0.999684
3,What are the intellectual property risks assoc...,[2.10. \nIntellectual Property \nIntellectual ...,Intellectual property risks from GAI systems m...,Intellectual property risks from GAI systems m...,1.0,0.99158,0.5,1.0,0.747246
4,What measures are suggested for ensuring the a...,[Suggested Action \nGAI Risks \nMP-2.2-001 \nI...,The suggested measures for ensuring the accura...,The suggested measures for ensuring the accura...,1.0,1.0,0.5,1.0,0.362176
5,What measures are suggested to manage GAI risk...,[GAI resources; Apply organizational risk tole...,The suggested measures to manage GAI risks ass...,The suggested measures to manage GAI risks ass...,1.0,0.999999,0.75,1.0,0.577877
6,What measures are suggested to ensure informat...,[or risks of bias or homogenization. \nThere m...,The suggested measures to ensure information i...,Suggested measures to ensure information integ...,1.0,0.991402,0.0,0.416667,0.697974
7,What are the key considerations regarding data...,[Applying The Blueprint for an AI Bill of Righ...,The key considerations regarding data privacy ...,The answer to given question is not present in...,1.0,1.0,1.0,0.0,0.179462
8,What are some examples of how data privacy pri...,[DATA PRIVACY \nWHY THIS PRINCIPLE IS IMPORTAN...,I don't know.,Some examples of how data privacy principles a...,0.0,0.0,1.0,1.0,0.177616
9,What organizational practices are necessary fo...,[GOVERN 4.3: Organizational practices are in p...,Organizational practices necessary for enablin...,Organizational practices necessary for enablin...,1.0,1.0,0.666667,1.0,0.843065
