In [152]:
# pip install -qU langchain-text-splitters
# pip install qdrant-client
# pip install python-dotenv
# pip install sentence-transformers
# pip install qdrant-client
# pip install -qU langchain_community pypdf

import torch

from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
from litellm import completion
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

load_dotenv()

DATA_PATH = "data"
EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
COLLECTION_NAME = "qa_index"

LLM_MODEL = "ollama/llama3.2"
API_BASE = "http://localhost:11434"


SYSTEM_PROMPT = """You are an assistant for question-answering tasks. Answer the question according only to the given context.
If question cannot be answered using the context, simply say I don't know. Do not make stuff up.
Context: {context}

"""

USER_PROMPT = """
Question: Based on the provided documents {question} If the information is not provided on the documents then answer I don't know.

Answer:"""

QUESTION = "Which are the 12 Factors on the 12 Factor App Framework?"


In [153]:
# 1. Load all PDFs from the 'data' folder
loader = DirectoryLoader(
    path=DATA_PATH,
    glob="*.pdf",
    loader_cls=PyPDFLoader
)
documents = loader.load()

# 2. Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)
text_chunks = text_splitter.split_documents(documents)

In [154]:
# 3. Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 4. Initialize the embedding model
embedding_model = SentenceTransformer(EMBEDDING_MODEL, device=device)

# 5. Encode the page_content from each chunk
#    (rather than passing the entire Document object)
string_chunks = [chunk.page_content for chunk in text_chunks]
embeddings = embedding_model.encode(string_chunks, show_progress_bar=True)

# 6. Inspect the shape of the first embedding
print("Shape of the first embedding vector:", embeddings[0].shape)

Using device: cpu


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Shape of the first embedding vector: (384,)


docker pull qdrant/qdrant

docker run -p 6333:6333 -v $(pwd)/qdrant_storage:/qdrant/storage qdrant/qdrant

In [155]:
# 3. Upsert to Qdrant
client = QdrantClient("http://localhost:6333")
client.delete_collection(COLLECTION_NAME)

client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

True

In [156]:
ids = []
payload = []

for i, chunk in enumerate(text_chunks):
    ids.append(i)
    payload.append({
        "source": chunk.metadata,
        "content": chunk.page_content
    })

In [157]:
client.upload_collection(
    collection_name=COLLECTION_NAME,
    vectors=embeddings,
    payload=payload,
    ids=ids,
    batch_size=256,
)

client.count(COLLECTION_NAME)

CountResult(count=40)

In [158]:
def search(text: str, top_k: int):
    query_embedding = embedding_model.encode(text).tolist()

    search_result = client.search(
        collection_name=COLLECTION_NAME,
        query_vector=query_embedding,
        query_filter=None,
        limit=top_k
    )
    return search_result

In [159]:
results = search(QUESTION, top_k=5)
#results

  search_result = client.search(


In [160]:
# Retrieve the actual strings from Qdrant
references = [obj.payload["content"] for obj in results]

# 'references' is now a list of strings
context = "\n\n".join(references)

In [161]:
response = completion(
    model=LLM_MODEL,
    messages=[{"content": SYSTEM_PROMPT.format(context=context),"role": "system"}, {"content": USER_PROMPT.format(question=QUESTION),"role": "user"}],
    api_base=API_BASE,
    stream=True
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

The 12 factors of the 12-Factor App framework, as described in the provided documentation, are:

1. Codebase
2. Dependencies
3. Config
4. Backing Services
5. Build, Release, Run
6. Processes
7. Port Binding
8. Concurrency
9. Disposability
10. Dev/Prod Parity
11. Logs
12. Admin Processes

Note that the list provided in the document is not exactly as mentioned in your query but, it seems that this is what was provided by you