In [1]:
from dotenv import load_dotenv
import os
load_dotenv()
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
# main.ipynb
from app.pipeline import run_pipeline
from PIL import Image
import json

images = [
    Image.open("Dataset/66a.JPG"),
    Image.open("Dataset/66b.JPG"),
    Image.open("Dataset/66c.JPG")
]

output = run_pipeline(images)




Detection Result: {'batch_and_expiry_image': [2], 'quantity_detection_images': [0, 1, 2], 'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml'}
Batch and Expiry Result: {'batch_number': 'D4H527GA', 'expiry_date': 'AUG 27'}
Quantity Result: 14
Final Output: {'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml', 'quantity': 14, 'batch_number': 'D4H527GA', 'expiry_date': 'AUG 27'}


In [3]:
type(output)

dict

In [10]:
output

{'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml',
 'quantity': 14,
 'batch_number': 'D4H527GA',
 'expiry_date': 'AUG 27'}

----------------------------------------------------------------------------------------------------

In [11]:
import pandas as pd

df = pd.read_csv("Dataset/all_items_test.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,js/ ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,js/ CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,js/ DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,js/ FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,js/ GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [11]:
# Menghapus "/js" dari kolom item_name
df['item_name'] = df['item_name'].str.replace('/js', '', regex=False)

# Filter data dengan type_name = 'Drugs'
filtered_df = df[df['type_name'] == 'Drugs']

# Ekspor data yang telah difilter dan diubah ke file CSV baru
filtered_df.to_csv("Dataset/drugs_items_filtered.csv", index=False)

# Menampilkan 5 baris pertama dari hasil filter untuk verifikasi
print(filtered_df.head())

  item_code                 item_name type_name          group_name
0    D10002          ARIMIDEX 1MG TAB     Drugs  Chemotherapy(BPJS)
1    D10003          CASODEX 50MG TAB     Drugs  Chemotherapy(BPJS)
2    D10005          DORNER 20MCG TAB     Drugs       Chronic(BPJS)
3    D10007   FERRIPROX FCT 500MG TAB     Drugs         Other(BPJS)
4    D10008          GLIVEC 100MG TAB     Drugs  Chemotherapy(BPJS)


In [16]:
import pandas as pd

df = pd.read_csv("Dataset/drugs_items_filtered.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [29]:
import os
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
import pandas as pd
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Inisialisasi Qdrant Client
client = QdrantClient(host='localhost', port=6333)

# Load dataset
df = pd.read_csv("Dataset/drugs_items_filtered.csv")

# Inisialisasi model embeddings
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")  # pastikan Anda set GOOGLE_API_KEY di environment
)

# Konversi item_name menjadi embeddings
def generate_embeddings(texts):
    # Gunakan embed_documents untuk menghasilkan embeddings
    return embedding_model.embed_documents(texts)

# Generate embeddings untuk item_name
embeddings = generate_embeddings(df['item_name'].tolist())

# Periksa apakah koleksi sudah ada, jika tidak, buat koleksi baru
if not client.collection_exists("item_collection"):
    client.create_collection(
        collection_name="item_collection",
        vectors_config=VectorParams(size=len(embeddings[0]), distance=Distance.COSINE)
    )

# Menambah data ke Qdrant
points = [
    PointStruct(
        id=i,  # Setiap titik diberi ID unik
        vector=embedding,  # Menggunakan embedding untuk vektor
        payload={"item_name": item_name, "item_code": item_code}  # Payload berisi data tambahan
    )
    for i, (embedding, item_name, item_code) in enumerate(zip(embeddings, df['item_name'], df['item_code']))
]

# Sekarang upload titik menggunakan upload_points, bukan upload_collection
client.upload_points(
    collection_name="item_collection",
    points=points
)

# Memeriksa koleksi yang telah dibuat
collections = client.get_collections()
print("Daftar koleksi yang ada di Qdrant:", collections)

# Memeriksa jumlah titik data yang ada di dalam koleksi
# Memeriksa jumlah titik data yang ada di dalam koleksi
count_result = client.count(
    collection_name="item_collection",
    exact=True  # agar hasilnya benar-benar akurat
)
print(f"Jumlah titik dalam koleksi 'item_collection': {count_result.count}")


Daftar koleksi yang ada di Qdrant: collections=[CollectionDescription(name='item_collection')]
Jumlah titik dalam koleksi 'item_collection': 12003


In [1]:
# 1. Load Environment Variables
from dotenv import load_dotenv
import os
load_dotenv()

# 2. Import yang perlu
from qdrant_client import QdrantClient
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from PIL import Image
import json

# 3. Inisialisasi kembali
client = QdrantClient(host='localhost', port=6333)
print(client.get_collections())
count = client.count(collection_name="item_collection", exact=True)
print(count.count)


embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

# 4. Import run_pipeline
from app.pipeline import run_pipeline

# 5. Load Images
images = [
    Image.open("Dataset/66a.JPG"),
    Image.open("Dataset/66b.JPG"),
    Image.open("Dataset/66c.JPG")
]

# 6. Run pipeline
output = run_pipeline(images)

# 7. Print
print("Final Output:", json.dumps(output, indent=2))


collections=[CollectionDescription(name='item_collection')]
12003
Detection Result: {'batch_and_expiry_image': [2], 'quantity_detection_images': [0, 1, 2], 'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml'}
Batch and Expiry Result: {'batch_number': 'D4H527GA', 'expiry_date': 'AUG 27'}
Quantity Result: 14
Final Output: {'item_id': 'AMD00303', 'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml', 'quantity': 14, 'batch_number': 'D4H527GA', 'expiry_date': 'AUG 27'}
Final Output: {
  "item_id": "AMD00303",
  "item_name": "NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml",
  "quantity": 14,
  "batch_number": "D4H527GA",
  "expiry_date": "AUG 27"
}


In [6]:
output

{'item_id': 'AMD00303',
 'item_name': 'NARFOZ Ondansetron HCl dihydrate 8 mg / 4 ml',
 'quantity': 14,
 'batch_number': 'D4H527GA',
 'expiry_date': 'AUG 27'}

In [2]:
import pandas as pd

df = pd.read_csv("Dataset/drugs_items_filtered.csv")
df.head()

Unnamed: 0,item_code,item_name,type_name,group_name
0,D10002,ARIMIDEX 1MG TAB,Drugs,Chemotherapy(BPJS)
1,D10003,CASODEX 50MG TAB,Drugs,Chemotherapy(BPJS)
2,D10005,DORNER 20MCG TAB,Drugs,Chronic(BPJS)
3,D10007,FERRIPROX FCT 500MG TAB,Drugs,Other(BPJS)
4,D10008,GLIVEC 100MG TAB,Drugs,Chemotherapy(BPJS)


In [9]:
df[df['item_code'] == output['item_id']]


Unnamed: 0,item_code,item_name,type_name,group_name
7523,AMD00303,NARFOZ 30ML SYR,Drugs,Other
