In [1]:
%pip install langchain_community langchain  langchain_ollama langchain_qdrant


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain_ollama import OllamaLLM
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


In [3]:

llm = OllamaLLM(
    model="llama3.2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)
llm.invoke("The first man on the summit of Mount Everest, the highest peak on Earth, was ...")

Tenzing Norgay. Tenzing Norgay, a Nepali Sherpa mountaineer, and Sir Edmund Hillary, a New Zealand mountaineer and explorer, were the first two people to reach the summit of Mount Everest on May 29, 1953.

'Tenzing Norgay. Tenzing Norgay, a Nepali Sherpa mountaineer, and Sir Edmund Hillary, a New Zealand mountaineer and explorer, were the first two people to reach the summit of Mount Everest on May 29, 1953.'

In [4]:
from langchain_core.prompts import PromptTemplate

RAG_PROMPT_TEMPLATE = """\
<|start_header_id|>system<|end_header_id|>
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>

<|start_header_id|>user<|end_header_id|>
User Query:
{query}

Context:
{context}<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
"""

rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

In [5]:
rag_chain = rag_prompt | llm

In [13]:
rag_chain.invoke({"query" : "Who old is Carl?", "context" : "Carl is a sweet dude, he's 40."})

Carl is 40 years old.

'Carl is 40 years old.'

In [6]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="snowflake-arctic-embed",
)

In [7]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader(file_path="./pdf_files/Blueprint-for-an-AI-Bill-of-Rights.pdf")
documents = loader.load()

In [8]:
len(documents)

73

In [9]:
# Extract text content from each document
all_text = ""
for doc in documents:
    all_text += doc.page_content + "\n"


In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

chunk_size=1000
chunk_overlap=50

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
)

In [11]:
chunks =text_splitter.split_text(all_text)

In [12]:
from langchain_core.documents.base import Document

chunked_documents = [Document(page_content=chunk) for chunk in chunks]

In [13]:
from langchain_qdrant import QdrantVectorStore
collection_name="BillOfRights"
url = "http://localhost:6333/"
qdrant = QdrantVectorStore.from_documents(
    chunked_documents,
    embeddings,
    url=url,
    prefer_grpc=True,
    collection_name=collection_name,
)

In [14]:
collection_name="BillOfRights"
qdrant_vectorstore = QdrantVectorStore.from_existing_collection(
    embedding=embeddings,
    collection_name=collection_name,
    url=url
)

In [15]:
retriever = qdrant_vectorstore.as_retriever()

In [16]:
retriever.invoke("What is the AI Bill of Rights?")

[Document(metadata={'_id': '3ca8a4f6-de9d-421e-89ab-e190bbeced7a', '_collection_name': 'BillOfRights'}, page_content='BLUEPRINT FOR AN \nAI BILL OF \nRIGHTS \nMAKING AUTOMATED \nSYSTEMS WORK FOR \nTHE AMERICAN PEOPLE \nOCTOBER 2022'),
 Document(metadata={'_id': 'f6d12c09-4d93-4e6d-a87b-887f7c06a56f', '_collection_name': 'BillOfRights'}, page_content='FROM \nPRINCIPLES \nTO PRACTICE \nA TECHINCAL COMPANION TO\nTHE Blueprint for an \nAI BILL OF RIGHTS\n12\n\nTABLE OF CONTENTS\nFROM PRINCIPLES TO PRACTICE: A TECHNICAL COMPANION TO THE BLUEPRINT \nFOR AN AI BILL OF RIGHTS \n \nUSING THIS TECHNICAL COMPANION\n \nSAFE AND EFFECTIVE SYSTEMS\n \nALGORITHMIC DISCRIMINATION PROTECTIONS\n \nDATA PRIVACY\n \nNOTICE AND EXPLANATION\n \nHUMAN ALTERNATIVES, CONSIDERATION, AND FALLBACK\nAPPENDIX\n \nEXAMPLES OF AUTOMATED SYSTEMS\n \nLISTENING TO THE AMERICAN PEOPLE\nENDNOTES \n12\n14\n15\n23\n30\n40\n46\n53\n53\n55\n63\n13'),
 Document(metadata={'_id': '8f662373-13f8-4053-92fa-63534f5010ed', '_collect