----------------------------------------------------------------------------------------------------

In [1]:
import pandas as pd

df = pd.read_csv("Dataset/all_items_test.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,js/ ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,js/ CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,js/ DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,js/ FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,js/ GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [31]:
# Menghapus "/js" dari kolom item_name
df['item_name'] = df['item_name'].str.replace('/js', '', regex=False)

# Filter data dengan type_name = 'Drugs'
filtered_df = df[df['type_name'] == 'Drugs']

# Ekspor data yang telah difilter dan diubah ke file CSV baru
filtered_df.to_csv("Dataset/drugs_items_filtered.csv", index=False)

# Menampilkan 5 baris pertama dari hasil filter untuk verifikasi
print(filtered_df.head())

  item_code                    item_name type_name          group_name
0    D10002         js/ ARIMIDEX 1MG TAB     Drugs  Chemotherapy(BPJS)
1    D10003         js/ CASODEX 50MG TAB     Drugs  Chemotherapy(BPJS)
2    D10005         js/ DORNER 20MCG TAB     Drugs       Chronic(BPJS)
3    D10007  js/ FERRIPROX FCT 500MG TAB     Drugs         Other(BPJS)
4    D10008         js/ GLIVEC 100MG TAB     Drugs  Chemotherapy(BPJS)


In [32]:
import pandas as pd

df = pd.read_csv("Dataset/drugs_items_filtered.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,js/ ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,js/ CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,js/ DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,js/ FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,js/ GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [33]:
import os
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import pandas as pd
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Inisialisasi Qdrant Client
client = QdrantClient(host='localhost', port=6333)

# Load dataset
df = pd.read_csv("Dataset/drugs_items_filtered.csv")

# Inisialisasi model embeddings
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")  # pastikan Anda set GOOGLE_API_KEY di environment
)

# Konversi item_name menjadi embeddings
def generate_embeddings(texts):
    # Gunakan embed_documents untuk menghasilkan embeddings
    return embedding_model.embed_documents(texts)

# Generate embeddings untuk item_name
embeddings = generate_embeddings(df['item_name'].tolist())

# Periksa apakah koleksi sudah ada, jika tidak, buat koleksi baru
if not client.collection_exists("drug_collection"):
    client.create_collection(
        collection_name="drug_collection",
        vectors_config=VectorParams(size=len(embeddings[0]), distance=Distance.COSINE)
    )

# Menambah data ke Qdrant
points = [
    PointStruct(
        id=i,  # Setiap titik diberi ID unik
        vector=embedding,  # Menggunakan embedding untuk vektor
        payload={"item_name": item_name, "item_code": item_code}  # Payload berisi data tambahan
    )
    for i, (embedding, item_name, item_code) in enumerate(zip(embeddings, df['item_name'], df['item_code']))
]

# Sekarang upload titik menggunakan upload_points, bukan upload_collection
client.upload_points(
    collection_name="item_collection",
    points=points
)

# Memeriksa koleksi yang telah dibuat
collections = client.get_collections()
print("Daftar koleksi yang ada di Qdrant:", collections)

# Memeriksa jumlah titik data yang ada di dalam koleksi
# Memeriksa jumlah titik data yang ada di dalam koleksi
count_result = client.count(
    collection_name="item_collection",
    exact=True  # agar hasilnya benar-benar akurat
)
print(f"Jumlah titik dalam koleksi 'item_collection': {count_result.count}")


Daftar koleksi yang ada di Qdrant: collections=[CollectionDescription(name='drug_collection'), CollectionDescription(name='item_collection')]
Jumlah titik dalam koleksi 'item_collection': 12003


In [3]:
# 1. Load Environment Variables
from dotenv import load_dotenv
import os
load_dotenv()

# 1.5 Setup LangSmith
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")

from langchain.callbacks import tracing_v2_enabled

# 2. Import yang perlu
from qdrant_client import QdrantClient
from PIL import Image

# 3. Inisialisasi kembali
client = QdrantClient(host='localhost', port=6333)
print(client.get_collections())
count = client.count(collection_name="item_collection", exact=True)
print(count.count)

# 4. Import run_pipeline
from app.pipeline import run_pipeline

# 5. Load Images
images = [
    Image.open("Dataset/66a.JPG"),
    Image.open("Dataset/66b.JPG"),
    Image.open("Dataset/66c.JPG")
]

# 6. Run pipeline dengan LangSmith Tracking
with tracing_v2_enabled(project_name=LANGSMITH_PROJECT):
    output = run_pipeline(images)


collections=[CollectionDescription(name='item_collection'), CollectionDescription(name='drug_collection')]
12003


KeyboardInterrupt: 

In [54]:
output

{'item_id': 'LVD00966',
 'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml',
 'quantity': 14,
 'batch_number': 'D4H527GA',
 'expiry_date': 'AUG 27'}

In [15]:
import pandas as pd

df = pd.read_csv("Dataset/drugs_items_filtered.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,js/ ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,js/ CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,js/ DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,js/ FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,js/ GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [3]:
df[df['item_code'] == output['item_id']]


NameError: name 'output' is not defined

In [8]:
df[df['item_name'].str.contains("MINOSEP ", case=False, na=False)]

Unnamed: 0,item_code,item_name,type_name,group_name
539,D10643,MINOSEP 10ML SOL,Drugs,Other
3278,BPD00716,"MINOSEP (MERAH) 0,2% 150ML GARGLE -D/C-",Drugs,Other CE
3279,BPD00717,"MINOSEP (HIJAU) 0,1% 150ML GARGLE -D/C-",Drugs,Other CE
4775,D0001146,"MINOSEP (HIJAU) 0,1% 200ML GARGLE",Drugs,Other CE
4776,D0001147,"MINOSEP (MERAH) 0,2% 200ML GARGLE",Drugs,Other CE
5252,LCD00247,"MINOSEP (MERAH) 0,2% 60ML GARGLE",Drugs,Other
6956,DTD00094,js/ MINOSEP GARGLE 150ML GARGLE,Drugs,Other(BPJS)
9854,SRD00010,MINOSEP 100ML SOL (REPACK),Drugs,Other
10376,SRD00009,"MINOSEP 0,2 % 1000 ML SOLUTION",Drugs,Other
10685,LVD01685,MINOSEP 10% 30ML SOL,Drugs,Other


In [4]:
from qdrant_client import QdrantClient
client = QdrantClient(host='localhost', port=6333)
print(client.get_collections())
count = client.count(collection_name="item_collection", exact=True)
print(count.count)

collections=[CollectionDescription(name='item_collection'), CollectionDescription(name='drug_collection')]
12003


In [20]:
# 1. Import Library
from qdrant_client import QdrantClient
from qdrant_client.http.models import SearchRequest, Filter, FieldCondition, MatchValue
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# 4. Fungsi untuk cari item berdasarkan query LLM
def search_items_from_query(query: str, top_k: int = 10):
    # Step 1: Ubah query jadi embedding
    query_vector = embedding_model.embed_query(query)

    # Step 2: Cari ke Qdrant dengan vector tersebut
    search_result = client.search(
        collection_name="item_collection",
        query_vector=query_vector,
        limit=top_k
    )
    
    # Step 3: Ambil item_name dan item_code
    results = []
    for hit in search_result:
        payload = hit.payload
        item_name = payload.get("item_name", "Unknown")
        item_code = payload.get("item_code", "Unknown")
        results.append({"item_name": item_name, "item_code": item_code})
    
    return results


In [21]:
# Masukkan query pengguna
query = "NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml injeksi"

# Cari item
results = search_items_from_query(query, top_k=5)

# Tampilkan hasil
for item in results:
    print(f"Item Name: {item['item_name']}, Item Code: {item['item_code']}")


Item Name: NARFOZ 8MG TAB, Item Code: LVD00966
Item Name: FARNELTIK 200MG TAB COV (DONASI), Item Code: KJD01199
Item Name: NARFOZ 4MG/5ML-60ML SYR, Item Code: DN00131R
Item Name: NARFOZ 4MG TAB, Item Code: LVD00964
Item Name: ONBREZ 150MCG/CAP BREEZHALER + ALAT, Item Code: LVD02044


  search_result = client.search(


In [3]:
df[df['item_name'].str.contains("Repacor ", case=False, na=False)]

Unnamed: 0,item_code,item_name,type_name,group_name
23915,D0000875,REPACOR 40MG INJ,Drugs,Other


############################

In [5]:
# === STEP 1: Import & Setup ===

from qdrant_client import QdrantClient
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import os

# Inisialisasi LLM dan Embedding
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Inisialisasi Qdrant client
client = QdrantClient(host="localhost", port=6333)


In [6]:
# === STEP 2: Fungsi item_name dari query menggunakan LLM ===

def get_item_name(raw_query: str) -> str:
    prompt = f"""
You are a medical inventory assistant. From the following query, extract the most accurate and standardized item_name for searching in a medical product database. 
Only return the item_name as it appears in the database — no dosage, no packaging, unless it is part of the official name. Do not explain.

Query: "{raw_query}"
item_name:"""
    
    response = llm.invoke(prompt)
    return response.content.strip().strip('"')


# === STEP 3: Fungsi pencarian di Qdrant berdasarkan item_name  ===

def search_exact_item(item_name: str, top_k: int = 5):
    query_vector = embedding_model.embed_query(item_name)

    results = client.search(
        collection_name="item_collection",
        query_vector=query_vector,
        limit=top_k
    )

    # Cari hasil paling cocok
    best_match = None
    for result in results:
        payload = result.payload
        if "item_name" in payload and item_name.lower() in payload["item_name"].lower():
            best_match = payload
            break

    return best_match or (results[0].payload if results else None)

# === STEP 4: Fungsi Utama (gabungkan semua langkah) ===

def search_item_pipeline(user_query: str):
    print(f"Original Query: {user_query}")
    
    # Step 1: Normalisasi dengan LLM
    normalized_name = get_item_name(user_query)
    print(f"Normalized item_name: {normalized_name}")
    
    # Step 2: Cari ke Qdrant
    match = search_exact_item(normalized_name)
    
    if match:
        print("Match Found:")
        print(f"Item Name : {match['item_name']}")
        print(f"Item Code : {match['item_code']}")
        return match
    else:
        print("No match found.")
        return None




In [7]:
# === STEP 5: Contoh Penggunaan ===

# Contoh query dari user
query = "NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml injeksi"

# Jalankan pipeline
search_item_pipeline(query)


Original Query: NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml injeksi
Normalized item_name: Ondansetron
Match Found:
Item Name : ONDANSETRON 8MG TAB
Item Code : D10724


  results = client.search(


{'item_name': 'ONDANSETRON 8MG TAB', 'item_code': 'D10724'}

In [66]:
# === STEP 1 RAG: Ambil semua item dari Qdrant ===

def get_all_items_from_qdrant(limit: int = 12000):
    scroll = client.scroll(
        collection_name="item_collection",
        limit=limit,
        with_payload=True
    )
    items = [point.payload for point in scroll[0]]
    return items


In [67]:
# === STEP 3: Gunakan LLM untuk pilih item terbaik dari daftar ===

def llm_pick_best_item(query: str, items: list[dict]):
    # Siapkan daftar item sebagai string
    item_list_str = "\n".join([
        f"{i+1}. {item['item_name']} (item_code: {item['item_code']})"
        for i, item in enumerate(items)
    ])

    prompt = f"""
Given the following user query and a list of inventory items, your task is to find the item that best matches the user's query.

Your task:
1. Identify the item that best matches the query, focusing on its name.
2. The item with the closest match should be returned.
3. Only output the item_name and item_code in the format below (do not explain your reasoning).

### Format:
item_name | item_code

### User Query:
{query}

### List of Items:
{item_list_str}

### Your Answer:

"""

    response = llm.invoke(prompt)
    return response.content.strip()


In [68]:
# === STEP 4: Pipeline utama ===

def search_item_using_llm(user_query: str):
    print(f"User Query: {user_query}")

    # Step 1: Ambil semua item dari Qdrant
    all_items = get_all_items_from_qdrant()
    print(all_items)

    # Step 2: Minta LLM pilih yang paling cocok
    result = llm_pick_best_item(user_query, all_items)

    # Step 3: Tampilkan hasil
    print("LLM Match:")
    return result


In [69]:
search_item_using_llm("NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml injeksi")


User Query: NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml injeksi
[{'item_name': 'js/ ARIMIDEX 1MG TAB', 'item_code': 'D10002'}, {'item_name': 'js/ CASODEX 50MG TAB', 'item_code': 'D10003'}, {'item_name': 'js/ DORNER 20MCG TAB', 'item_code': 'D10005'}, {'item_name': 'js/ FERRIPROX FCT 500MG TAB', 'item_code': 'D10007'}, {'item_name': 'js/ GLIVEC 100MG TAB', 'item_code': 'D10008'}, {'item_name': 'js/ HARNAL OCAS 0,4MG TAB', 'item_code': 'D10009'}, {'item_name': 'js/ HEMAPO 3.000UNIT PFS', 'item_code': 'D10010'}, {'item_name': 'js/ KALITAKE 5GRAM SACH', 'item_code': 'D10011'}, {'item_name': 'js/ KOATE 250UNIT INJ', 'item_code': 'D10012'}, {'item_name': 'js/ LANTUS SOLOSTAR 100UNIT/ML-3ML FLEXPEN', 'item_code': 'D10013'}, {'item_name': 'js/ LOVENOX 40MG/0,4ML PFS', 'item_code': 'D10015'}, {'item_name': 'js/ NOVORAPID 100UNIT/ML-3ML FLEXPEN', 'item_code': 'D10016'}, {'item_name': 'SANDIMMUN NEORAL 100MG SOFT CAP', 'item_code': 'D10018'}, {'item_name': 'SEBIVO 600MG TAB -D/C-', 'item_code': 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised Cancelled: 499 The operation was cancelled..


LLM Match:


'NARFOZ 4MG/2ML INJ | D10678'

In [60]:
df[df['item_name'].str.contains("NARFOZ ", case=False, na=False)]

Unnamed: 0,item_code,item_name,type_name,group_name
565,D10678,NARFOZ 4MG/2ML INJ,Drugs,Other
2211,LVD00964,NARFOZ 4MG TAB,Drugs,Other
2212,LVD00965,NARFOZ 4MG/5ML-30ML SYR,Drugs,Other
2213,LVD00966,NARFOZ 8MG TAB,Drugs,Other
2214,LVD00967,NARFOZ 8MG/4ML INJ,Drugs,Other
5026,COV00110,NARFOZ 4MG (HO),Drugs,Other
7523,AMD00303,am/ NARFOZ 30ML SYR,Drugs,Other
11317,DN00131R,NARFOZ 4MG/5ML-60ML SYR,Drugs,Other


In [55]:
df[df['item_name'].str.contains("Ondansetron ", case=False, na=False)]

Unnamed: 0,item_code,item_name,type_name,group_name
596,D10722,ONDANSETRON 4MG TAB,Drugs,Other(Gen)
597,D10723,ONDANSETRON 4MG/2ML INJ,Drugs,Other(Gen)
598,D10724,ONDANSETRON 8MG TAB,Drugs,Other(Gen)
599,D10725,ONDANSETRON 8MG/4ML INJ,Drugs,Other(Gen)
3534,BPD00260,in/ ONDANSETRON 8MG TAB,Drugs,Other(Inhealth)
3535,BPD00261,in/ ONDANSETRON 4MG TAB,Drugs,Other(Inhealth)
5010,LVD02275,js/ ONDANSETRON 4MG/2ML INJ,Drugs,Other(BPJS)
6970,DTD00112,js/ ONDANSETRON 4MG TAB,Drugs,Other(BPJS)
6971,DTD00113,js/ ONDANSETRON 8MG/4ML INJ,Drugs,Other(BPJS)
9555,CRD00165,ONDANSETRON 4MG/5ML-60ML SYR,Drugs,Other(Gen)
