### Imports

In [1]:
import os
import pdfplumber
import openai
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
from qdrant_client import QdrantClient
from qdrant_client.http import models
from dotenv import load_dotenv

load_dotenv('./.env')

True

### Parse document

In [2]:
fulltext = ""
with pdfplumber.open("resume-2024.pdf") as pdf:
    # loop over all the pages
    for page in pdf.pages:
        fulltext += page.extract_text()

In [3]:
text = fulltext

chunks = []
while len(text) > 500:
    last_period_index = text[:500].rfind('.')
    if last_period_index == -1:
        last_period_index = 500
    chunks.append(text[:last_period_index])
    text = text[last_period_index+1:]
chunks.append(text)

In [4]:
chunks[1]

"\nFour years ago, in a recent shift to machine learning, I deepened myAIskillsetanddeployedrobustMLmodelsinproduction\nenvironments. Eager to leverage this well-rounded experience, I seek a full-time data scientist or machine learning engineer\nrole.I'mexcitedtocombinemyvastexperiencebyaddingvaluetoanewjourney"

### Qdrant connection

In [5]:
url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_KEY")
port = 6333

qdrant_client = QdrantClient(
    url=url,
    port=port,
    api_key=api_key,
)

qdrant_client.recreate_collection(
    collection_name="demo",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
)

True

### Generate embeddings

In [6]:
points = []
i = 1
for chunk in chunks:
    i += 1
    
    embeddings = openai.embeddings.create(
        input=chunk,
        model="text-embedding-3-small"
    ).data[0].embedding

    points.append(PointStruct(id=i, vector=embeddings, payload={"text": chunk}))

### Index the embeddings

In [7]:
operation_info = qdrant_client.upsert(
    collection_name="demo",
    wait=True,
    points=points
)

### Query index

In [8]:
def create_answer_with_context(query):
    embeddings = openai.embeddings.create(
        input=query,
        model="text-embedding-3-small"
    ).data[0].embedding

    search_result = qdrant_client.search(
        collection_name="demo",
        query_vector=embeddings, 
        limit=3
    )

    prompt = """You are a helpful HR assistant who answers 
                questions in brief based on the context below.
                Context:\n"""
    for result in search_result:
        prompt += result.payload['text'] + "\n---\n"
    prompt += "Question:" + query + "\n---\n" + "Answer:"

    completion = openai.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content

In [9]:
input = "How many years of experience does Daniel have?"
answer = create_answer_with_context(input)
print(answer)

More than two decades.


In [10]:
input = "When did Daniel shift to Machine Learning?"
answer = create_answer_with_context(input)
print(answer)

Four years ago.
