# Task 0:  Installs and Environment Variables 

In [8]:
# Uninstall incompatible version of langchain-core
%pip uninstall langchain-core -y

# Install compatible version of langchain-core
%pip install langchain-core==0.2.40

# Install langchain-huggingface and ragas (if needed)
%pip install langchain-huggingface==0.0.3 ragas==0.1.20

# Check for any remaining package conflicts
%pip check


Found existing installation: langchain-core 0.2.40
Uninstalling langchain-core-0.2.40:
  Successfully uninstalled langchain-core-0.2.40
Note: you may need to restart the kernel to use updated packages.
Collecting langchain-core==0.2.40
  Using cached langchain_core-0.2.40-py3-none-any.whl.metadata (6.2 kB)
Using cached langchain_core-0.2.40-py3-none-any.whl (396 kB)
Installing collected packages: langchain-core
Successfully installed langchain-core-0.2.40
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
No broken requirements found.
Note: you may need to restart the kernel to use updated packages.


In [1]:
%pip install langchain-core==0.2.39 langchain-huggingface==0.0.3 ragas==0.1.20

Collecting langchain-core==0.2.39
  Using cached langchain_core-0.2.39-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-huggingface==0.0.3
  Using cached langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Collecting ragas==0.1.20
  Using cached ragas-0.1.20-py3-none-any.whl.metadata (5.5 kB)
INFO: pip is looking at multiple versions of langchain-openai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-openai (from ragas==0.1.20)
  Using cached langchain_openai-0.2.0-py3-none-any.whl.metadata (2.6 kB)
  Using cached langchain_openai-0.1.24-py3-none-any.whl.metadata (2.6 kB)
Using cached langchain_core-0.2.39-py3-none-any.whl (396 kB)
Using cached langchain_huggingface-0.0.3-py3-none-any.whl (17 kB)
Using cached ragas-0.1.20-py3-none-any.whl (190 kB)
Downloading langchain_openai-0.1.24-py3-none-any.whl (51 kB)
Installing collected packages: langchain-core, langchain-openai, langchain-huggingface, ragas
  Att

In [10]:
import os
import openai
from getpass import getpass

openai.api_key = getpass("Please provide your OpenAI Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

# Task 1:  Dealing with the Data

In [11]:
from langchain_community.document_loaders import PyMuPDFLoader

# List of file paths for the PDFs you want to load
paths = ["https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf", "https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf"]

# Create a list to store loaded documents
documents = []

# Loop through each PDF and load it
for path in paths:
    loader = PyMuPDFLoader(path)
    documents.extend(loader.load())  # Add the documents to the list

len(documents)

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-huggingface 0.0.3 requires langchain-core<0.3,>=0.1.52, but you have langchain-core 0.3.5 which is incompatible.
ragas 0.1.20 requires langchain-core<0.3, but you have langchain-core 0.3.5 which is incompatible.[0m[31m
[0m

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

CHUNK_SIZE = 200
CHUNK_OVERLAP = 50

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP
)

chunks = text_splitter.split_documents(documents)
len(chunks)

NameError: name 'documents' is not defined

In [5]:
from langchain_openai import OpenAIEmbeddings

# Specify the embedding model
EMBEDDING_MODEL = "text-embedding-ada-002"

# Initialize the OpenAIEmbeddings class
embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

# Specify the open-source embedding model from Hugging Face
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

# Initialize the HuggingFaceEmbeddings class with the chosen model
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

# Example usage - Embed a sample text
text = "This is an example sentence for generating embeddings."
embedding_vector = embeddings.embed_query(text)

print(embedding_vector)

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

LOCATION = ":memory:"
COLLECTION_NAME = "Midterm"
VECTOR_SIZE = 1536

# Initialize the Qdrant client
qdrant_client = QdrantClient(
    location=LOCATION
    )

# Create a collection in Qdrant
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=VECTOR_SIZE, 
        distance=Distance.COSINE
        )
    )

# Initialize QdrantVectorStore with the Qdrant client
qdrant_vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings,
)

# Add documents to the collection
qdrant_vector_store.add_documents(documents)

In [None]:
retriever = qdrant_vector_store.as_retriever()

# retrieved_documents = retriever.invoke("What are underserved communities?")
# retrieved_documents = retriever.invoke("What should be expected of automated systems?")
retrieved_documents = retriever.invoke("What is action ID GV-1.3-001?")


for doc in retrieved_documents:
  print(doc)

# Task 2:  Building a Quick End-to-End Prototype

In [14]:
from langchain.prompts import ChatPromptTemplate

template = """
Only answer the question using the context below.  If the answer can't be found in the context, respond "I don't know". 

Question:
{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_template(template)

In [15]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")} 
    | RunnablePassthrough.assign(context=itemgetter("context")) 
    | {"response": prompt | llm, "context": itemgetter("context")}
    )  

In [None]:
#question = "What is confabulation?"
question = "What is action ID GV-1.3-002?"

result = retrieval_augmented_qa_chain.invoke({"question" : question})

print(result["response"].content)