1. Read PDF

In [1]:
import nest_asyncio
from llama_parse import LlamaParse
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve API key from environment variable
os.environ["LLAMA_CLOUD_API_KEY"] = os.getenv("LLAMA_CLOUD_API_KEY")

# Allow async event loop nesting (required in Jupyter or notebooks)
nest_asyncio.apply()

# Step 1: Load and parse PDF using LlamaParse
parser = LlamaParse(result_type="markdown")  # or "text" if you want plain output
pdf_path = "./data/Lecture1-a.pdf"

print("Parsing PDF...")
llama_parse_documents = parser.load_data(pdf_path)
print("Parsing complete.")

# Step 2: Combine all parsed document texts into a single markdown string
markdown_text = "\n".join(doc.text for doc in llama_parse_documents)

# Count the number of documents
num_documents = len(llama_parse_documents)
print(f"Total number of documents: {num_documents}")

Parsing PDF...
Started parsing the file under job_id 97889a67-e4e4-4684-832d-7588dfd53aee
Parsing complete.
Total number of documents: 25


In [2]:
# Display documents in a loop
doc_number = 8
if doc_number <= len(llama_parse_documents):
    print(f"--- Document {doc_number} ---")
    print(llama_parse_documents[doc_number - 1].text)
else:
    print(f"Document {doc_number} does not exist. Total documents: {len(llama_parse_documents)}")


--- Document 8 ---
# Key Areas in DevOps

- # Reduce Organizational Silos

Everyone shares the ownership of production and information is shared among everyone.
- # Accept Failure as Normal

Blameless PMs/ RCA. Risk taking mindset.
- # Implement Gradual Changes

Frequent deployments, frequent deterministic releases in small chunks which can be rolled back.
- # Leverage Tooling and Automation

Automate and reduce manual work as much as possible.
- # Measure Everything

Application, systems monitoring and metrics etc...


2. Split text into chunks 

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Step 4: Split the parsed text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
text_chunks = text_splitter.split_text(markdown_text)

# Display info about chunks
print(f"\n✅ Total Chunks Created: {len(text_chunks)}")



✅ Total Chunks Created: 22


In [4]:
# print the first two chunks
text_chunks[:24]

['# Intro to DevOps and Beyond\n\n# Ravindu Nirmal Fernando\n# About Me\n\n- STL - DevOps @ Sysco LABS - Sri Lanka\n- MSc in Computer Science specialized in Cloud Computing (UOM)\n- AWS Certified Solutions Architect - Professional\n- Certified Kubernetes Administrator (CKA)\n- AWS Community Builder\n\nRavindu Nirmal Fernando\n\nhttps://ravindunfernando.com\n# The Era before DevOps\n# Developers\n\nFocused on Agility\n\n# Operators\n\nFocused on Stability\n# Destructive downward spiral in IT\n\n# Act 01 - Operations teams',
 "# Act 01 - Operations teams\n\nmaintaining large fragile applications\n\nDoesn't have any visibility on the application, whether or not its working as expected\n\n# Act 02 - The product managers\n\nLarger, unrealistic commitments made to the outside world (client/investors) without understanding the complexities behind development and operations\n\n# Act 03 - The Developers\n\nDevelopers taking shortcuts and putting more and more fragile code on top of existing one

3. Embedding Chunks

In [5]:
# huggingface embeddings models lot of them available there
import torch
from sentence_transformers import SentenceTransformer

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "BAAI/bge-small-en-v1.5"
# model_name = "all-MiniLM-L6-v2"

embedding_model = SentenceTransformer(model_name, device=device)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
embeddings = embedding_model.encode(text_chunks, show_progress_bar=True)

Batches: 100%|██████████| 1/1 [00:13<00:00, 13.77s/it]


In [7]:
embeddings[0].shape # store the embeddings in a list dimension

(384,)

4. Store in the Vector Database

In [8]:
# Import client library
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Load environment variables from .env file
load_dotenv()

# Access environment variables
qdrant_api_key = os.getenv("QDRANT_API_KEY")



# Connect to Qdrant using credentials from .env
client = QdrantClient(
    url="https://74fbf056-a412-4035-9c9b-b85d0055af43.us-west-1-0.aws.cloud.qdrant.io",
    api_key=qdrant_api_key,
)

try:
    client.get_collections()
    print("✅ Successfully connected to Qdrant and retrieved collections.")
except Exception as e:
    print(f"❌ Connection failed: {e}")


✅ Successfully connected to Qdrant and retrieved collections.


Delete Collection If already Created

In [9]:
# Delete the collection
client.delete_collection(collection_name="qa_index")

True

In [10]:
# embedding_model.get_sentence_embedding_dimension()
collection_name = "qa_index"
client.delete_collection(collection_name)

client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    
)
# demention of vector is 384
# create a collection in qdrant
# metrix is cosine for semantic similarity
# if return true collection is created we can stroe vectors in it


True

5. Create payloads and ids

In [11]:
ids = [] # list of ids for each vectors we can use to retrieve the vectors text chunks
payload = [] # metadata for each vector we can use to retrieve the text chunks

for id, text in enumerate(text_chunks):
    ids.append(id)
    payload.append({"source": pdf_path, "content": text})

payload[0]

{'source': './data/Lecture1-a.pdf',
 'content': '# Intro to DevOps and Beyond\n\n# Ravindu Nirmal Fernando\n# About Me\n\n- STL - DevOps @ Sysco LABS - Sri Lanka\n- MSc in Computer Science specialized in Cloud Computing (UOM)\n- AWS Certified Solutions Architect - Professional\n- Certified Kubernetes Administrator (CKA)\n- AWS Community Builder\n\nRavindu Nirmal Fernando\n\nhttps://ravindunfernando.com\n# The Era before DevOps\n# Developers\n\nFocused on Agility\n\n# Operators\n\nFocused on Stability\n# Destructive downward spiral in IT\n\n# Act 01 - Operations teams'}

In [12]:
# store the vectors in qdrant 
client.upload_collection(
    collection_name=collection_name,
    vectors=embeddings,
    payload=payload,
    ids=ids,
    batch_size=256,  # How many vectors will be uploaded in a single request?
)

In [13]:
# count the number of vectors in the collection
client.count(collection_name)

CountResult(count=22)

In [127]:
# Check the distance metric in your Qdrant collection setup
client.get_collection(collection_name).config

CollectionConfig(params=CollectionParams(vectors=VectorParams(size=384, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_config=None, strict_mode_config=StrictModeConfigOutput(enabled=True, max_query_limit=None, max_timeout=None, unindexed_filtering_retrieve=False, unindexed_filtering_update=False, search_

6. Retrieval Component

In [14]:
def search(text: str, top_k: int):# search for the text in the collection
    query_embedding = embedding_model.encode(text).tolist()
    
    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        query_filter=None,  
        limit=top_k
    )
    return search_result

In [15]:
# Retrieve the top 5 most similar vectors to the query
question = "what are the key areas in devops?"
results = search(question, top_k=5) # retrieve the top 5 most similar vectors to the query
results

  search_result = client.search(


[ScoredPoint(id=3, version=0, score=0.7886511, payload={'source': './data/Lecture1-a.pdf', 'content': 'DevOps allows evolving and improving products at a faster pace than businesses using traditional software development and infrastructure management processes. This speed allows businesses to serve their customers better and compete effectively.\n# Key Areas in DevOps\n\n- # Reduce Organizational Silos\n\nEveryone shares the ownership of production and information is shared among everyone.\n- # Accept Failure as Normal\n\nBlameless PMs/ RCA. Risk taking mindset.\n- # Implement Gradual Changes'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=2, version=0, score=0.7245391, payload={'source': './data/Lecture1-a.pdf', 'content': '# Act 04 - Dev and Ops at war\n\n"It worked on my machine" phenomenon\n# How can we overcome these issues?\n# What is DevOps?\n\n“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to

In [16]:
text_chunks[2]

'# Act 04 - Dev and Ops at war\n\n"It worked on my machine" phenomenon\n# How can we overcome these issues?\n# What is DevOps?\n\n“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity” - What is DevOps? [AWS]\n\n“A compound of development (Dev) and operations (Ops), DevOps is the union of people, process, and technology to continually provide value to customers.” - What is DevOps? [Azure]'

7. Response Generation

In [17]:
system_prompt = """You are an ai assistant for question-answering tasks. Answer the question according only to the given context.
If question cannot be answered using the context, simply say I don't know. Do not make stuff up.

Context: {context}
"""

user_prompt = """
Question: {question}

Answer:"""

references = [obj.payload["content"] for obj in results]


context = "\n\n".join(references)

8. Response with References

RAG (Retrieval-Augmented Generation) principles: discourage hallucinations, only answer from the given documents

In [18]:
import requests
import json

# 1. Format prompt
final_prompt = system_prompt.format(context=context) + user_prompt.format(question=question)

# 2. API URL for completion mode
api_url = "http://localhost:11434/api/generate"

# 3. Payload
payload = {
    "model": "gemma3:1b",
    "prompt": final_prompt,
    "stream": True,
    "temperature": 0.1
}

# 4. Send request
response = requests.post(api_url, json=payload, stream=True)

# 5. Read streamed chunks and build the answer
print("\n\nANSWER:\n")
full_answer = ""

if response.status_code == 200:
    for line in response.iter_lines():
        if line:
            data = line.decode('utf-8')
            chunk = json.loads(data)
            if 'response' in chunk:
                token = chunk['response']
                full_answer += token
                print(token, end='', flush=True)
else:
    print(f"Error: {response.status_code} - {response.text}")

# 6. After streaming is done, print references
print("\n\nREFERENCES:\n")
for index, ref in enumerate(references):
    cleaned_ref = ref.strip()
    if cleaned_ref:
        print(f"Reference [{index + 1}]: {cleaned_ref}\n")




ANSWER:

- Reduce Organizational Silos
- Accept Failure as Normal
- Implement Gradual Changes
- Leverage Tooling and Automation
- Measure Everything

REFERENCES:

Reference [1]: DevOps allows evolving and improving products at a faster pace than businesses using traditional software development and infrastructure management processes. This speed allows businesses to serve their customers better and compete effectively.
# Key Areas in DevOps

- # Reduce Organizational Silos

Everyone shares the ownership of production and information is shared among everyone.
- # Accept Failure as Normal

Blameless PMs/ RCA. Risk taking mindset.
- # Implement Gradual Changes

Reference [2]: # Act 04 - Dev and Ops at war

"It worked on my machine" phenomenon
# How can we overcome these issues?
# What is DevOps?

“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity” - What is DevOps? [AWS]