1. Read PDF

In [1]:
from pypdf import PdfReader
import os

FILE_PATH = os.path.join("data","Lecture1-a.pdf")
reader = PdfReader(FILE_PATH)
number_of_pages = len(reader.pages)

entire_text = ""
for page_num in range(number_of_pages):
    page = reader.pages[page_num]
    entire_text += page.extract_text()

entire_text[:200]

'Intro to DevOps and Beyond\nRavindu Nirmal FernandoAbout Me\n• STL - DevOps @ Sysco LABS - Sri Lanka\n• MSc in Computer Science specialized in \nCloud Computing (UOM)\n• AWS Certified Solutions Architect -'

2. Split text into chunks 

In [2]:
# split text based on number of characters in the text
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

In [3]:
text_chunks = text_splitter.split_text(entire_text)
print(f"Total chunks: {len(text_chunks)}")

Total chunks: 13


In [4]:
# print the first two chunks
text_chunks[:2]

['Intro to DevOps and Beyond\nRavindu Nirmal FernandoAbout Me\n• STL - DevOps @ Sysco LABS - Sri Lanka\n• MSc in Computer Science specialized in \nCloud Computing (UOM)\n• AWS Certified Solutions Architect - \nProfessional \n• Certified Kubernetes Administrator \n(CKA)\n• AWS Community Builder\nRavindu Nirmal Fernando\nhttps://ravindunfernando.com\nThe Era before \nDevOpsDevelopers\nFocused on Agility\nOperators\nFocused on StabilityAct 01 - Operations teams \nmaintaining large fragile \napplications',
 'maintaining large fragile \napplications\nDoesn\'t have any visibility on the \napplication, whether or not its \nworking as expected\nAct 03 - The Developers\nDevelopers taking shortcuts and \nputting more and more fragile \ncode on top of existing ones \nAct 02 - The product \nmanagers\nLarger, unrealistic commitments \nmade to the outside world (client/ \ninvestors) without understanding \nthe complexities behind \ndevelopment and operations\nAct 04 - Dev and Ops at war\n"It worked 

3. Embedding Chunks

In [5]:
# huggingface embeddings models lot of them available there
import torch
from sentence_transformers import SentenceTransformer

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "BAAI/bge-small-en-v1.5"
# model_name = "all-MiniLM-L6-v2"

embedding_model = SentenceTransformer(model_name, device=device)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
embeddings = embedding_model.encode(text_chunks, show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches: 100%|██████████| 1/1 [00:07<00:00,  7.50s/it]


In [7]:
embeddings[0].shape
# store the embeddings in a list dimension

(384,)

4. Store in the Vector Database

In [8]:
# Import client library
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

client = QdrantClient("http://localhost:6333")

In [9]:
from qdrant_client import QdrantClient

# Connect to Qdrant inside Docker (default port 6333)
client = QdrantClient(host="localhost", port=6333)

# Delete the collection
client.delete_collection(collection_name="qa_index")

True

In [10]:
# embedding_model.get_sentence_embedding_dimension()
collection_name = "qa_index"
client.delete_collection(collection_name)

client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    
)
# demention of vector is 384
# create a collection in qdrant
# metrix is cosine for semantic similarity
# if return true collection is created we can stroe vectors in it


True

5. Create payloads and ids

In [11]:
ids = [] # list of ids for each vectors we can use to retrieve the vectors text chunks
payload = [] # metadata for each vector we can use to retrieve the text chunks

for id, text in enumerate(text_chunks):
    ids.append(id)
    payload.append({"source": FILE_PATH, "content": text})

payload[0]

{'source': 'data\\Lecture1-a.pdf',
 'content': 'Intro to DevOps and Beyond\nRavindu Nirmal FernandoAbout Me\n• STL - DevOps @ Sysco LABS - Sri Lanka\n• MSc in Computer Science specialized in \nCloud Computing (UOM)\n• AWS Certified Solutions Architect - \nProfessional \n• Certified Kubernetes Administrator \n(CKA)\n• AWS Community Builder\nRavindu Nirmal Fernando\nhttps://ravindunfernando.com\nThe Era before \nDevOpsDevelopers\nFocused on Agility\nOperators\nFocused on StabilityAct 01 - Operations teams \nmaintaining large fragile \napplications'}

In [12]:
# store the vectors in qdrant 
client.upload_collection(
    collection_name=collection_name,
    vectors=embeddings,
    payload=payload,
    ids=ids,
    batch_size=256,  # How many vectors will be uploaded in a single request?
)

In [13]:
# count the number of vectors in the collection
client.count(collection_name)

CountResult(count=13)

6. Retrieval Component

In [14]:
def search(text: str, top_k: int):# search for the text in the collection
    query_embedding = embedding_model.encode(text).tolist()
    
    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        query_filter=None,  
        limit=top_k
    )
    return search_result

In [22]:
# Retrieve the top 5 most similar vectors to the query
question = "who is lecturer?"
results = search(question, top_k=5) # retrieve the top 5 most similar vectors to the query
results

  search_result = client.search(


[ScoredPoint(id=12, version=0, score=0.5374555, payload={'source': 'data\\Lecture1-a.pdf', 'content': 'https://www.linkedin.com/in/ravindufernando/ \nLinkedIn\n@ravindunf'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=0, version=0, score=0.49508512, payload={'source': 'data\\Lecture1-a.pdf', 'content': 'Intro to DevOps and Beyond\nRavindu Nirmal FernandoAbout Me\n• STL - DevOps @ Sysco LABS - Sri Lanka\n• MSc in Computer Science specialized in \nCloud Computing (UOM)\n• AWS Certified Solutions Architect - \nProfessional \n• Certified Kubernetes Administrator \n(CKA)\n• AWS Community Builder\nRavindu Nirmal Fernando\nhttps://ravindunfernando.com\nThe Era before \nDevOpsDevelopers\nFocused on Agility\nOperators\nFocused on StabilityAct 01 - Operations teams \nmaintaining large fragile \napplications'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=11, version=0, score=0.45575845, payload={'source': 'data\\Lecture1-a.pdf', 'content': 'DevOps \nEnginee

In [16]:
text_chunks[2]

'"It worked on my machine" \nphenomenon \n"Destructive downward spiral in IT" - Gene Kim\nHow can we \novercome \nthese issues?“DevOps is the combination of cultural philosophies, practices, and tools \nthat increases an organization’s ability to deliver applications and services \nat high velocity”\n- What is DevOps? [AWS] -\n“A compound of development (Dev) and operations (Ops), DevOps is the \nunion of people, process, and technology to continually provide value to \ncustomers.”'

7. Response Generation

In [23]:
system_prompt = """You are an assistant for question-answering tasks. Answer the question according only to the given context.
If question cannot be answered using the context, simply say I don't know. Do not make stuff up.

Context: {context}
"""

user_prompt = """
Question: {question}

Answer:"""

references = [obj.payload["content"] for obj in results]


context = "\n\n".join(references)

In [24]:
import requests
import json

# 1. Format system and user messages
system_message = system_prompt.format(context=context)
user_message = user_prompt.format(question=question)

# 2. API URL
api_url = "http://localhost:11434/api/chat"

# 3. Payload
payload = {
    "model": "gemma3:1b",
    "messages": [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message}
    ],
    "stream": True   # <-- Tell Ollama to stream
}

# 4. Send request with streaming
response = requests.post(api_url, json=payload, stream=True)

# 5. Read the response stream
if response.status_code == 200:
    for line in response.iter_lines():
        if line:
            data = line.decode('utf-8')
            chunk = json.loads(data)
            if 'message' in chunk and 'content' in chunk['message']:
                print(chunk['message']['content'], end='', flush=True)
else:
    print(f"Error: {response.status_code} - {response.text}")


Ravindu Nirmal Fernando

8. Response with References

In [25]:
import requests
import json

# 1. Format system and user messages
system_message = system_prompt.format(context=context)
user_message = user_prompt.format(question=question)

# 2. API URL
api_url = "http://localhost:11434/api/chat"

# 3. Payload
payload = {
    "model": "gemma3:1b",
    "messages": [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message}
    ],
    "stream": True   # Stream the response
}

# 4. Send request
response = requests.post(api_url, json=payload, stream=True)

# 5. Read streamed chunks and build the answer
full_answer = ""

if response.status_code == 200:
    for line in response.iter_lines():
        if line:
            data = line.decode('utf-8')
            chunk = json.loads(data)
            if 'message' in chunk and 'content' in chunk['message']:
                token = chunk['message']['content']
                full_answer += token
                print(token, end='', flush=True)  # Live typing effect
else:
    print(f"Error: {response.status_code} - {response.text}")

# 6. After streaming is done, print references
print("\n\nREFERENCES:\n")
for index, ref in enumerate(references):
    cleaned_ref = ref.strip()  # Remove extra spaces
    if cleaned_ref:  # Skip any empty references
        print(f"Reference [{index + 1}]: {cleaned_ref}\n")


Ravindu Nirmal Fernando

REFERENCES:

Reference [1]: https://www.linkedin.com/in/ravindufernando/ 
LinkedIn
@ravindunf

Reference [2]: Intro to DevOps and Beyond
Ravindu Nirmal FernandoAbout Me
• STL - DevOps @ Sysco LABS - Sri Lanka
• MSc in Computer Science specialized in 
Cloud Computing (UOM)
• AWS Certified Solutions Architect - 
Professional 
• Certified Kubernetes Administrator 
(CKA)
• AWS Community Builder
Ravindu Nirmal Fernando
https://ravindunfernando.com
The Era before 
DevOpsDevelopers
Focused on Agility
Operators
Focused on StabilityAct 01 - Operations teams 
maintaining large fragile 
applications

Reference [3]: DevOps 
Engineer
CI/ CD Management & Automation
Writing Specifications and 
Documentation
Infrastructure Management
Cloud Deployment and 
Management
Performance Assessment and 
Monitoring
DevOps Engineer Role
Assisting with DevOps culture 
apdotion References
• https://sre.google/sre-book/table-of-contents/
• https://www.gartner.com/en/articles/what-is-platform