1. Read PDF

In [31]:
import nest_asyncio
from llama_parse import LlamaParse
import os

os.environ["LLAMA_CLOUD_API_KEY"] = "llx-IhK3GeDJpcCidqImPthddxEqY8bNMqunKtfLQbjRzL0Xxvds"

# Allow async event loop nesting (required in Jupyter or notebooks)
nest_asyncio.apply()

# Step 1: Load and parse PDF using LlamaParse
parser = LlamaParse(result_type="markdown")  # or "text" if you want plain output
pdf_path = "./data/Lecture1-a.pdf"

print("Parsing PDF...")
llama_parse_documents = parser.load_data(pdf_path)
print("Parsing complete.")

# Step 2: Combine all parsed document texts into a single markdown string
markdown_text = "\n".join(doc.text for doc in llama_parse_documents)

# Count the number of documents
num_documents = len(llama_parse_documents)
print(f"Total number of documents: {num_documents}")


Parsing PDF...
Started parsing the file under job_id a0284efc-cb1f-4c34-8fa6-c5dda006b94c
Parsing complete.
Total number of documents: 25


In [32]:
# Display documents in a loop
doc_number = 8
if doc_number <= len(llama_parse_documents):
    print(f"--- Document {doc_number} ---")
    print(llama_parse_documents[doc_number - 1].text)
else:
    print(f"Document {doc_number} does not exist. Total documents: {len(llama_parse_documents)}")


--- Document 8 ---
# Key Areas in DevOps

# Reduce Organizational Silos

Everyone shares the ownership of production and information is shared among everyone.

# Accept Failure as Normal

Blameless PMs/ RCA. Risk taking mindset.

# Implement Gradual Changes

Frequent deployments, frequent deterministic releases in small chunks which can be rolled back.

# Leverage Tooling and Automation

Automate and reduce manual work as much as possible.

# Measure Everything

Application, systems monitoring and metrics etc...


2. Split text into chunks 

In [33]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Step 4: Split the parsed text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
text_chunks = text_splitter.split_text(markdown_text)

# Display info about chunks
print(f"\n✅ Total Chunks Created: {len(text_chunks)}")



✅ Total Chunks Created: 22


In [10]:
# print the first two chunks
text_chunks[:2]

['# Intro to DevOps and Beyond\n\n# Ravindu Nirmal Fernando\n# About Me\n\n- STL - DevOps @ Sysco LABS - Sri Lanka\n- MSc in Computer Science specialized in Cloud Computing (UOM)\n- AWS Certified Solutions Architect - Professional\n- Certified Kubernetes Administrator (CKA)\n- AWS Community Builder\n\nRavindu Nirmal Fernando\n\nhttps://ravindunfernando.com\n# The Era before DevOps\n# Developers\n\nFocused on Agility\n\n# Operators\n\nFocused on Stability\n# Destructive downward spiral in IT\n\n# Act 01 - Operations teams',
 "# Operators\n\nFocused on Stability\n# Destructive downward spiral in IT\n\n# Act 01 - Operations teams\n\nmaintaining large fragile applications\n\nDoesn't have any visibility on the application, whether or not its working as expected\n\n# Act 02 - The product managers\n\nLarger, unrealistic commitments made to the outside world (client/investors) without understanding the complexities behind development and operations\n\n# Act 03 - The Developers"]

3. Embedding Chunks

In [34]:
# huggingface embeddings models lot of them available there
import torch
from sentence_transformers import SentenceTransformer

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "BAAI/bge-small-en-v1.5"
# model_name = "all-MiniLM-L6-v2"

embedding_model = SentenceTransformer(model_name, device=device)

In [35]:
embeddings = embedding_model.encode(text_chunks, show_progress_bar=True)

Batches: 100%|██████████| 1/1 [00:14<00:00, 14.69s/it]


In [36]:
embeddings[0].shape # store the embeddings in a list dimension

(384,)

4. Store in the Vector Database

In [None]:
# Import client library
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Load environment variables from .env file
load_dotenv()

# Access environment variables
qdrant_api_key = os.getenv("QDRANT_API_KEY")



# Connect to Qdrant using credentials from .env
client = QdrantClient(
    url="https://74fbf056-a412-4035-9c9b-b85d0055af43.us-west-1-0.aws.cloud.qdrant.io",
    api_key=qdrant_api_key,
)

print(f"✅ Connected to Qdrant")


✅ Connected to Qdrant


Delete Collection If already Created

In [15]:
from qdrant_client import QdrantClient

# Delete the collection
client.delete_collection(collection_name="qa_index")

True

In [14]:
# embedding_model.get_sentence_embedding_dimension()
collection_name = "qa_index"
client.delete_collection(collection_name)

client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    
)
# demention of vector is 384
# create a collection in qdrant
# metrix is cosine for semantic similarity
# if return true collection is created we can stroe vectors in it


True

5. Create payloads and ids

In [42]:
ids = [] # list of ids for each vectors we can use to retrieve the vectors text chunks
payload = [] # metadata for each vector we can use to retrieve the text chunks

for id, text in enumerate(text_chunks):
    ids.append(id)
    payload.append({"source": pdf_path, "content": text})

payload[0]

{'source': './data/Lecture1-a.pdf',
 'content': '# Intro to DevOps and Beyond\n\n# Ravindu Nirmal Fernando\n# About Me\n\n- STL - DevOps @ Sysco LABS - Sri Lanka\n- MSc in Computer Science specialized in Cloud Computing (UOM)\n- AWS Certified Solutions Architect - Professional\n- Certified Kubernetes Administrator (CKA)\n- AWS Community Builder\n\nRavindu Nirmal Fernando\n\nhttps://ravindunfernando.com\n# The Era before DevOps\n# Developers\n\nFocused on Agility\n\n# Operators\n\nFocused on Stability\n# Destructive downward spiral in IT\n\n# Act 01 - Operations teams'}

In [43]:
# store the vectors in qdrant 
client.upload_collection(
    collection_name=collection_name,
    vectors=embeddings,
    payload=payload,
    ids=ids,
    batch_size=256,  # How many vectors will be uploaded in a single request?
)

In [44]:
# count the number of vectors in the collection
client.count(collection_name)

CountResult(count=22)

6. Retrieval Component

In [45]:
def search(text: str, top_k: int):# search for the text in the collection
    query_embedding = embedding_model.encode(text).tolist()
    
    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        query_filter=None,  
        limit=top_k
    )
    return search_result

In [56]:
# Retrieve the top 5 most similar vectors to the query
question = "Key Areas in DevOps?"
results = search(question, top_k=5) # retrieve the top 5 most similar vectors to the query
results

  search_result = client.search(


[ScoredPoint(id=2, version=0, score=0.73754996, payload={'source': './data/Lecture1-a.pdf', 'content': '# Act 03 - The Developers\n\nDevelopers taking shortcuts and putting more and more fragile code on top of existing ones\n\n# Act 04 - Dev and Ops at war\n\n"It worked on my machine" phenomenon\n# How can we overcome these issues?\n# What is DevOps?\n\n“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity” - AWS'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=5, version=0, score=0.7245599, payload={'source': './data/Lecture1-a.pdf', 'content': 'Automate and reduce manual work as much as possible.\n\n# Measure Everything\n\nApplication, systems monitoring and metrics etc...\nDevOps\nPractices\n# Continuous Integration (CI)\n\nSoftware development practice where developers regularly merge their code changes into a central repository, after which automated

In [53]:
text_chunks[2]

'# Act 03 - The Developers\n\nDevelopers taking shortcuts and putting more and more fragile code on top of existing ones\n\n# Act 04 - Dev and Ops at war\n\n"It worked on my machine" phenomenon\n# How can we overcome these issues?\n# What is DevOps?\n\n“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity” - AWS'

7. Response Generation

In [57]:
system_prompt = """You are an ai assistant for question-answering tasks. Answer the question according only to the given context.
If question cannot be answered using the context, simply say I don't know. Do not make stuff up.

Context: {context}
"""

user_prompt = """
Question: {question}

Answer:"""

references = [obj.payload["content"] for obj in results]


context = "\n\n".join(references)

8. Response with References

RAG (Retrieval-Augmented Generation) principles: discourage hallucinations, only answer from the given documents

In [62]:
import requests
import json

# 1. Format prompt
final_prompt = system_prompt.format(context=context) + user_prompt.format(question=question)

# 2. API URL for completion mode
api_url = "http://localhost:11434/api/generate"

# 3. Payload
payload = {
    "model": "gemma3:1b",
    "prompt": final_prompt,
    "stream": True,
    "temperature": 0.1
}

# 4. Send request
response = requests.post(api_url, json=payload, stream=True)

# 5. Read streamed chunks and build the answer
print("\n\nANSWER:\n")
full_answer = ""

if response.status_code == 200:
    for line in response.iter_lines():
        if line:
            data = line.decode('utf-8')
            chunk = json.loads(data)
            if 'response' in chunk:
                token = chunk['response']
                full_answer += token
                print(token, end='', flush=True)
else:
    print(f"Error: {response.status_code} - {response.text}")

# 6. After streaming is done, print references
print("\n\nREFERENCES:\n")
for index, ref in enumerate(references):
    cleaned_ref = ref.strip()
    if cleaned_ref:
        print(f"Reference [{index + 1}]: {cleaned_ref}\n")




ANSWER:

# Key Areas in DevOps

# Reduce Organizational Silos
# Cloud Infrastructure
# Continuous Monitoring, Logging and Alerting
# Continuous Delivery (CD)
# DevOps Tools and Technologies


REFERENCES:

Reference [1]: # Act 03 - The Developers

Developers taking shortcuts and putting more and more fragile code on top of existing ones

# Act 04 - Dev and Ops at war

"It worked on my machine" phenomenon
# How can we overcome these issues?
# What is DevOps?

“DevOps is the combination of cultural philosophies, practices, and tools that increases an organization’s ability to deliver applications and services at high velocity” - AWS

Reference [2]: Automate and reduce manual work as much as possible.

# Measure Everything

Application, systems monitoring and metrics etc...
DevOps
Practices
# Continuous Integration (CI)

Software development practice where developers regularly merge their code changes into a central repository, after which automated builds and tests are run.

# Continuou