# Task 0:  Installs and Environment Variables 

In [2]:
# Uninstall incompatible version of langchain-core
%pip uninstall langchain-core -y

# Install compatible version of langchain-core
%pip install langchain-core==0.2.40

# Install langchain-huggingface and ragas (if needed)
%pip install langchain-huggingface==0.0.3 ragas==0.1.20

# Check for any remaining package conflicts
%pip check


Found existing installation: langchain-core 0.2.41
Uninstalling langchain-core-0.2.41:
  Successfully uninstalled langchain-core-0.2.41
Note: you may need to restart the kernel to use updated packages.
Collecting langchain-core==0.2.40
  Using cached langchain_core-0.2.40-py3-none-any.whl.metadata (6.2 kB)
Using cached langchain_core-0.2.40-py3-none-any.whl (396 kB)
Installing collected packages: langchain-core
Successfully installed langchain-core-0.2.40
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
No broken requirements found.
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

# Task 1:  Dealing with the Data

In [4]:
from langchain_community.document_loaders import PyMuPDFLoader

# List of file paths for the PDFs you want to load
paths = ["https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf", "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"]

# Create a list to store loaded documents
documents = []

# Loop through each PDF and load it
for path in paths:
    loader = PyMuPDFLoader(path)
    documents.extend(loader.load())  # Add the documents to the list

len(documents)

137

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

CHUNK_SIZE = 200
CHUNK_OVERLAP = 50

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

chunks = text_splitter.split_documents(documents)
len(chunks)

2898

In [5]:
from langchain_openai import OpenAIEmbeddings

# Specify the embedding model
EMBEDDING_MODEL = "text-embedding-ada-002"

# Initialize the OpenAIEmbeddings class
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

In [6]:
from langchain_huggingface import HuggingFaceEmbeddings

# Specify the open-source embedding model from Hugging Face
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# Initialize the HuggingFaceEmbeddings class with the chosen model
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

# Example
# text = "This is an example sentence for generating embeddings."
# embedding_vector = embeddings.embed_query(text)
# print(embedding_vector)

  from tqdm.autonotebook import tqdm, trange


[-0.00017142681463155895, -0.017052756622433662, 0.0466596782207489, 0.07441818714141846, 0.03471486642956734, 0.05450190603733063, 0.01252576895058155, -0.049386944621801376, 0.029524460434913635, -0.04392328858375549, 0.06013912707567215, -0.05279029905796051, 0.0978655070066452, -0.009004428051412106, -0.014963291585445404, 0.0728435218334198, 0.052706003189086914, 0.006455709226429462, -0.061940401792526245, -0.01770511269569397, 0.022993385791778564, 0.04449153319001198, 0.07146460562944412, -0.04201526194810867, 0.012547018937766552, -0.05232679471373558, -0.03799056634306908, 0.06573976576328278, 0.15933726727962494, 0.00011792029545176774, 0.05847509205341339, -0.007675816770642996, -0.04133718088269234, 0.043803952634334564, 0.026761561632156372, 0.11080886423587799, -0.01858428306877613, 0.07720502465963364, -0.02903367020189762, -0.001317247748374939, 0.04944780841469765, 0.028358951210975647, 0.0126786008477211, 0.0531744509935379, 0.019705265760421753, -0.09619426727294922

In [8]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

LOCATION = ":memory:"
COLLECTION_NAME = "Midterm"
VECTOR_SIZE = 384

# Initialize the Qdrant client
qdrant_client = QdrantClient(
    location=LOCATION
    )

# Create a collection in Qdrant
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=VECTOR_SIZE, 
        distance=Distance.COSINE
        )
    )

# Initialize QdrantVectorStore with the Qdrant client
qdrant_vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings,
)

# Add documents to the collection
qdrant_vector_store.add_documents(documents)

['4574c371047e4ba7bd975acabdce0db7',
 'd8e045f5aaf6468685310572b1cfff9e',
 '384a80aaff7c498e8e06ff41c544273f',
 '8ab68d1cca554b72839f16a17795d635',
 '7464efba3fbe42c5ac7a417f460f8087',
 '6eb2b45547814ee4aa5ee98fb23a93f3',
 '381900be04e14e0d8895ebe1c897ee85',
 '52c6353ad3924efba30afe66b9507562',
 '1c3c4b418bd3481abd0bd901f7acea57',
 '3688592f231d4916883812b3e593dc8b',
 '2532118ca7004c468944cf19115af808',
 '1059c893814146e6be59e54883c3e733',
 'cb69e4bf40ac4e65ac7f378948867276',
 'a5ba6a781ffb42c9a99aad9b4d70d734',
 'b02613072c0740178297e3c5d33d258c',
 '2ac3dbb0f42c4afab50e3ae9316b587b',
 '328db6367e6c49fdbdcffd3be772131f',
 '434337fd308d4532b2f01072fbec8854',
 'a1da0c148c234d27a8851bc9b3dde839',
 '06feeaa955684d1c8009d70234e27269',
 '5e25b908a6954522ae53b8f788de8d6c',
 '886b5b597cb14b50a35351c6878a114b',
 'c673c1f754ae47d69a625cfef82015cf',
 '1e4319b5170b4843928f1949cd920fdb',
 'aca03e106c594d6e9a446d7c23146d42',
 'c0807ce327314be1bf8380ecdf1829bc',
 '27aaf0d62abb4fa2b1ae50b1d29eb603',
 

In [9]:
retriever = qdrant_vector_store.as_retriever()

# retrieved_documents = retriever.invoke("What are underserved communities?")
# retrieved_documents = retriever.invoke("What should be expected of automated systems?")
retrieved_documents = retriever.invoke("What is action ID GV-1.3-001?")


for doc in retrieved_documents:
  print(doc)

page_content=' 
19 
GV-4.1-003 
Establish policies, procedures, and processes for oversight functions (e.g., senior 
leadership, legal, compliance, including internal evaluation) across the GAI 
lifecycle, from problem formulation and supply chains to system decommission. 
Value Chain and Component 
Integration 
AI Actor Tasks: AI Deployment, AI Design, AI Development, Operation and Monitoring 
 
GOVERN 4.2: Organizational teams document the risks and potential impacts of the AI technology they design, develop, deploy, 
evaluate, and use, and they communicate about the impacts more broadly. 
Action ID 
Suggested Action 
GAI Risks 
GV-4.2-001 
Establish terms of use and terms of service for GAI systems. 
Intellectual Property; Dangerous, 
Violent, or Hateful Content; 
Obscene, Degrading, and/or 
Abusive Content 
GV-4.2-002 
Include relevant AI Actors in the GAI system risk identiﬁcation process. 
Human-AI Conﬁguration 
GV-4.2-003 
Verify that downstream GAI system impacts (such as the u

# Task 2:  Building a Quick End-to-End Prototype

In [10]:
from langchain.prompts import ChatPromptTemplate

template = """
Only answer the question using the context below.  If the answer can't be found in the context, respond "I don't know". 

Question:
{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_template(template)

In [11]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")} 
    | RunnablePassthrough.assign(context=itemgetter("context")) 
    | {"response": prompt | llm, "context": itemgetter("context")}
    )  

In [22]:
%pip install bitsandbytes transformers

import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "NousResearch/Meta-Llama-3.1-8B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained(model_id)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


RuntimeError: No GPU found. A GPU is needed for quantization.

In [13]:
question = "What is confabulation?"
#question = "What is action ID GV-1.3-002?"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)

Confabulation refers to a phenomenon in which GAI systems generate and confidently present erroneous or false content in response to prompts. It includes generated outputs that diverge from the prompts or contradict previously generated statements in the same context. These phenomena are also known as "hallucinations" or "fabrications."
