In [2]:
%pip install qdrant-client langchain-openai langchain-qdrant python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import numpy as np
import pandas as pd
import getpass
import os
import hashlib

from uuid import uuid4

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore
from qdrant_client.http.models import Distance, VectorParams
from dotenv import load_dotenv

In [4]:
load_dotenv()

if os.getenv("OPENAI_API_KEY"):
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
else:
    OPENAI_API_KEY = getpass.getpass("Enter key for OpenAI key: ")

if os.getenv("QDRANT_API_KEY"):
    QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
else:
    QDRANT_API_KEY = getpass.getpass("QDRANT_API_KEY")

if os.getenv("QDRANT_ENDPOINT"):
    QDRANT_ENDPOINT = os.getenv("QDRANT_ENDPOINT")
else:
    QDRANT_ENDPOINT = getpass.getpass("QDRANT_ENDPOINT")

In [5]:
qdrant_client = QdrantClient(
    api_key=QDRANT_API_KEY,
    url=QDRANT_ENDPOINT
)

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.3
)

## Retrieve Data 

In [6]:
INPUT_FILE = "./data/jobs.jsonl"

df = pd.read_json(INPUT_FILE, lines=True)
df.head()

Unnamed: 0,job_title,company_name,location,work_type,salary,job_description,_scrape_timestamp
0,Data Analyst,PT Matahari Department Store Tbk,"Karawaci, Banten",Full time,,"Responsibilities:\nCollect, manage, and analyz...",2025-11-24T11:33:16.277561
1,Data Analyst Intern,"PT Surya Semesta Internusa, Tbk","Jakarta Selatan, Jakarta Raya",Paruh waktu,,Job Description\nSupport in conducting thoroug...,2025-11-24T11:33:32.629908
2,Data Analyst Supervisor,PT Pangan Lestari,"Surabaya, Jawa Timur",Full time,,Tugas dan Tanggung Jawab:\nMembuat dan mengemb...,2025-11-24T11:33:48.982129
3,Data Analyst,PT 8 ELEMENTS,"Jakarta Barat, Jakarta Raya",Full time,,Job Description\nData Analysis and Interpretat...,2025-11-24T11:34:05.465459
4,Data Analyst,PT Quadra Inti Solusi,"Bogor, Jawa Barat",Full time,,"Kualifikasi:\nPendidikan minimal S1 Statistik,...",2025-11-24T11:34:22.379464


In [16]:
documents = []

for index, row in df.iterrows():

    job_title = str(row.get('job_title', ''))
    company_name = str(row.get('company_name', ''))
    job_location = str(row.get('location', '')).replace('\n', ' ')
    job_salary = str(row.get('salary', '')) if row.get('salary') != "None" else "Tidak Ditampilkan"
    job_type = str(row.get('work_type', ''))

    raw_desc = str(row.get('job_description', ''))
    clean_desc = " ".join(raw_desc.split())

    page_content_template = f"""
        Job: {job_title}
        Company: {company_name}
        Location: {job_location}
        Salary: {job_salary}
        Work Type: {job_type}
        Job Description: {clean_desc}
    """.strip()

    metadata_template = {
        "job_title": job_title,
        "company_name": company_name,
        "location": job_location,
        "salary": job_salary,
        "work_type": job_type,
    }
    
    unique_identifier = f"{job_title}_{company_name}_{job_location}".lower().encode('utf-8')
    doc_id = hashlib.md5(unique_identifier).hexdigest()

    doc = Document(
        page_content=page_content_template,
        metadata=metadata_template,
        id=doc_id
    )

    documents.append(doc)


print(f"Total Dokumen: {len(documents)}")
print("\nContoh Dokumen Pertama:")
print(documents[0].page_content)
print("\nMetadata:")
print(documents[0].metadata)
print("\nID:")
print(documents[0].id)

Total Dokumen: 473

Contoh Dokumen Pertama:
Job: Data Analyst
        Company: PT Matahari Department Store Tbk
        Location: Karawaci, Banten
        Salary: Tidak Ditampilkan
        Work Type: Full time
        Job Description: Responsibilities: Collect, manage, and analyze large datasets related to store performance and operational efficiency. Support ongoing business and improvement projects by providing data insights, reports, and dashboards. Collaborate with cross-functional teams (Operations, Merchandising, and Finance) to ensure accuracy and consistency of business data. Assist in identifying trends, issues, and opportunities based on data analysis to support decision-making. Develop and improve data tracking systems or templates to increase reporting efficiency. Requirements : Bachelor’s degree in Business, Statistics, Industrial Engineering, or a related field. 1–2 years of experience in data analysis, business operations, or project support. Strong analytical and numeri

## Store Documents to Qdrant

In [17]:
qdrant = QdrantVectorStore.from_documents(
    documents,
    embedding_model,
    url=QDRANT_ENDPOINT,
    prefer_grpc=True,
    api_key=QDRANT_API_KEY,
    collection_name="Jobs_Documents"
)   

collection_response = qdrant_client.get_collections()
print(f"Collections: {collection_response.collections}")

Collections: [CollectionDescription(name='autoness-collection'), CollectionDescription(name='Jobs_Documents'), CollectionDescription(name='capstone')]


In [18]:
qdrant_query = QdrantVectorStore.from_existing_collection(
    "Jobs_Documents",
    embedding_model,
    url=QDRANT_ENDPOINT,
    api_key=QDRANT_API_KEY
)

In [19]:
results = qdrant_query.similarity_search_with_score(
    "Pekerjaan Remote di Jakarta",
    k=3
)

results

[(Document(metadata={'work_type': 'Full time', 'salary': 'Tidak Ditampilkan', 'company_name': 'Savant Degrees Pte Ltd', 'job_title': 'Senior Software Developer', 'location': 'Jakarta Raya (Hibrid)', '_id': '8820eb8c-7cbc-fa24-8aba-64e7974c7bdb', '_collection_name': 'Jobs_Documents'}, page_content='Job: Senior Software Developer\n        Company: Savant Degrees Pte Ltd\n        Location: Jakarta Raya (Hibrid)\n        Salary: Tidak Ditampilkan\n        Work Type: Full time\n        Job Description: We’re looking for a passionate and technically talented individual to join us as a software developer in Indonesia. You will be working in an hybrid-remote working environment. The ideal candidate focuses on building great products with delightful user experiences. You’ll be expected to be super organized, proactive and have a strong point of view on how digital products should be made. Skills & Experience Needed Minimum 8 - 10 years of current, hands-on experience in software development Bac