# Task 0:  Installs and Environment Variables 

In [None]:
!pip install -U -q langchain langchain-openai langchain_core langchain-community langchainhub openai langchain-qdrant
!pip install -qU ragas
!pip install -qU qdrant-client pymupdf pandas

In [2]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

# Task 1:  Dealing with the Data

In [3]:
from langchain_community.document_loaders import PyMuPDFLoader

# List of file paths for the PDFs you want to load
paths = ["https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf", "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"]

# Create a list to store loaded documents
documents = []

# Loop through each PDF and load it
for path in paths:
    loader = PyMuPDFLoader(path)
    documents.extend(loader.load())  # Add the documents to the list

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

CHUNK_SIZE = 200
CHUNK_OVERLAP = 50

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

chunks = text_splitter.split_documents(documents)
len(chunks)

In [5]:
from langchain_openai import OpenAIEmbeddings

# Specify the embedding model
EMBEDDING_MODEL = "text-embedding-ada-002"

# Initialize the OpenAIEmbeddings class
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

LOCATION = ":memory:"
COLLECTION_NAME = "Midterm"
VECTOR_SIZE = 1536

# Initialize the Qdrant client
qdrant_client = QdrantClient(
    location=LOCATION
    )

# Create a collection in Qdrant
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=VECTOR_SIZE, 
        distance=Distance.COSINE
        )
    )

# Initialize QdrantVectorStore with the Qdrant client
qdrant_vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings,
)

# Add documents to the collection
qdrant_vector_store.add_documents(documents)

In [21]:
retriever = qdrant_vector_store.as_retriever()

# retrieved_documents = retriever.invoke("What are underserved communities?")
# retrieved_documents = retriever.invoke("What should be expected of automated systems?")
retrieved_documents = retriever.invoke("What is action ID GV-1.3-001?")


for doc in retrieved_documents:
  print(doc)

page_content=' 
13 
• 
Not every suggested action applies to every AI Actor14 or is relevant to every AI Actor Task. For 
example, suggested actions relevant to GAI developers may not be relevant to GAI deployers. 
The applicability of suggested actions to relevant AI actors should be determined based on 
organizational considerations and their unique uses of GAI systems. 
Each table of suggested actions includes: 
• 
Action ID: Each Action ID corresponds to the relevant AI RMF function and subcategory (e.g., GV-
1.1-001 corresponds to the ﬁrst suggested action for Govern 1.1, GV-1.1-002 corresponds to the 
second suggested action for Govern 1.1). AI RMF functions are tagged as follows: GV = Govern; 
MP = Map; MS = Measure; MG = Manage. 
• 
Suggested Action: Steps an organization or AI actor can take to manage GAI risks.  
• 
GAI Risks: Tags linking suggested actions with relevant GAI risks.  
• 
AI Actor Tasks: Pertinent AI Actor Tasks for each subcategory. Not every AI Actor Task lis

# Task 2:  Building a Quick End-to-End Prototype

In [14]:
from langchain.prompts import ChatPromptTemplate

template = """
Only answer the question using the context below.  If the answer can't be found in the context, respond "I don't know". 

Question:
{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_template(template)

In [15]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")} 
    | RunnablePassthrough.assign(context=itemgetter("context")) 
    | {"response": prompt | llm, "context": itemgetter("context")}
    )  

In [20]:
#question = "What is confabulation?"
question = "What is action ID GV-1.3-002?"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)

I don't know.
