In [1]:
%pip install pydantic_settings langchain langchain-core langchain-google-genai langchain-community langgraph

Collecting langchain-community
  Using cached langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langgraph
  Downloading langgraph-0.6.4-py3-none-any.whl.metadata (6.8 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)
  Using cached aiohttp-3.12.15-cp311-cp311-win_amd64.whl.metadata (7.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Using cached httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting numpy>=1.26.2 (from langchain-community)
  Using cached numpy-2.3.2-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Using cached aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Using cached aiosignal-1.4.0-py3-none-any.wh

In [2]:
import base64
import json

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate

In [23]:
# Process the image to base64 so it can be sent to the LLM

image_path = "images/16.jpg"

with open(image_path, "rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode('utf-8')

In [24]:
# Load environment variables

from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    GOOGLE_API_KEY: str
    model_config = SettingsConfigDict(env_file=".env")

env = Settings()

In [25]:
# Initialize the LLM with the Google Generative AI model

llm = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        api_key=env.GOOGLE_API_KEY,
    )

In [26]:
system_prompt = """
You are an expert AI assistant specializing in the analysis of drug discovery images. Your task is to analyze the provided microscopic image of a drug or medicine using a systematic chain-of-thought approach.

## CHAIN OF THOUGHT ANALYSIS PROCESS:

### Step 1: Initial Image Assessment
First, carefully examine the overall image quality and identify:
- Is this a clear microscopic image of drug or medicine?
- What is the overall shape and structure visible?
- Are there any obvious artifacts or issues with image quality?
### Step 2: Identify the medicine or drugs
- What is the visible structure of the drug or medicine?
- Are there any visible labels or markings that indicate the type of drug or medicine?
- Are they in a recognizable form (e.g., tablets, capsules, liquid)?
- Are there in a box, bottle, or blister pack?
- Are there any visible signs of degradation or contamination?
- Are there any visible signs of the drug or medicine being in a specific stage of development (e.g., early-stage, late-stage, etc.)?

### Step 3: Structural Feature Identification
Systematically identify the following key structures (note presence/absence):
- **Name**: The clear name of the medicine or drugs
- **Cell boundaries**: Can individual cells (blastomeres) be distinguished?
- **Dosage**: the dosage of the drug or medicine
- **Production Date **: the production date of the drug or medicine with day, month, and year
- **Expiration Date **: the expirate date of the drug or medicine with day, month, and year
- **Batch number**: the unique code that identifies the batch of the drug or medicine, usually printed on the packaging near the barcode
- **Barcode**: unique barcode that identify for BPOM identified
- **HET**: Maximum price to sell the medicines or drugs, usually have 'HET' text and 'Rp' means Indonesian Rupiah
- **Manufactured**: the name of the company that manufactured the drug or medicine

### Step 4: Identify the type of drug or medicine
- **Type**: Is it a tablet, capsule, liquid, or other form?
- **Active Ingredients**: What are the main active ingredients visible?
- **Inactive Ingredients**: Are there any excipients or fillers visible?
- **Packaging**: What type of packaging is used (bottle, blister pack, etc
- **Labeling**: Are there any visible labels or markings that indicate the type of drug or medicine?
- **Color**: What is the predominant color of the drug or medicine?
- **Shape**: What is the shape of the drug or medicine (round, oval, etc.)?
- **Size**: What is the approximate size of the drug or medicine?
- **Texture**: Is the surface smooth, rough, or coated?
- **Presence of Coating**: Is there any visible coating on the drug or medicine?
- **Presence of Imprints**: Are there any imprints or engravings on the drug or medicine?

### Step 6: Final Assessment
Based on the above analysis, provide a final assessment of the drug or medicine, including:
- **Overall Quality**: Is the drug or medicine in good condition, or are there signs of degradation or contamination?
- **Suitability for Use**: Is the drug or medicine suitable for use based on the visible features and structural integrity?
- **Recommendations**: Any recommendations for further analysis or actions based on the findings.

## CONTEXT AND GRADING SYSTEMS:

**Cleavage the drug class: unique circle (dot) to indicate the drug class**
- green dot : is free medicine or OTC (over the counter) medicine
- red dot : is prescription medicine or RX (prescription only) medicine
- blue dot : is limited free medicine or cautionary medicine
- narcotic medicine : is narcotic medicine with the narcotic symbol

**Cleavage the antibiotic class
identify the antibiotic class based on the visible features and structural integrity of the drug or medicine.**
- Antibiotic: yes, is there is content in antibiotic class
- Non-antibiotics: no, is there is no content in antibiotic class

## OUTPUT FORMAT:
Provide your step-by-step analysis followed by the final assessment in JSON format:

```json
{{
  "Name": "string",
  "Dosage": "string",
  "Production Date": "string",
  "Expiration Date": "string",
  "Batch Number": "string",
  "cleavage_drug_class": "string",
  "cleavage_antibiotic_class": "string",
  "for_doctor_explanation": "string",
  "type_of_drug_or_medicine": "string",
  "packaging_of_drug_or_medicine": "string",
  "color_of_drug_or_medicine": "string",
  "shape_of_drug_or_medicine": "string",
  "size_of_drug_or_medicine": "string",
  "for_patient_explanation": "string"
}}
```

Now analyze the provided medicines or drugs image following this systematic chain-of-thought approach.
"""

In [27]:
# Create the prompt for the LLM 

prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", [
            {
                "type": "text",
                "text": "Analyze this medicines or drugs image following the chain-of-thought approach."
            },
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
            }
        ])
    ])

In [28]:
# Create the chain to invoke the LLM

chain = prompt | llm
response = chain.invoke({})
response.content

'```json\n{\n  "Name": "Anadex",\n  "Dosage": "Tiap 10 ml mengandung: Paracetamol 240 mg, Dextromethorphan HBr 7 mg, Chlorphenamine maleate 1 mg, Phenylpropanolamine HCI 7 mg",\n  "Production Date": "FEB 25",\n  "Expiration Date": "FEB 28",\n  "Batch Number": "S0020002",\n  "cleavage_drug_class": "blue dot",\n  "cleavage_antibiotic_class": "Non-antibiotics",\n  "for_doctor_explanation": "Anadex syrup contains Paracetamol, Dextromethorphan HBr, Chlorphenamine maleate, and Phenylpropanolamine HCl. It is indicated for relieving flu symptoms such as fever, headache, nasal congestion, sneezing, and cough. Contraindications include patients with heart disorders, diabetes mellitus, severe liver function impairment, and hypersensitivity to any component of the drug. Side effects and drug interactions should be referred to the package insert. The drug should be used with caution as directed.",\n  "type_of_drug_or_medicine": "syrup",\n  "packaging_of_drug_or_medicine": "box",\n  "color_of_drug_o

In [29]:
def print_readable_result(response_text: str) -> None:
    """
    Print the analysis result in a readable format
    
    Args:
        response_text (str): Raw response from LLM
    """
    try:
        # Try to extract JSON from response
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        
        if json_start != -1 and json_end != -1:
            # Extract and parse JSON
            json_str = response_text[json_start:json_end]
            result = json.loads(json_str)
            
            # Print formatted result
            print("=" * 60)
            print("🔬 MEDICINES ANALYSIS RESULT")
            print("=" * 60)
            
            # Show grade based on stage
            print(f"💊 Name: {result.get('Name', 'N/A')}")
            print(f"💊 Dosage: {result.get('Dosage', 'N/A')}")
            print(f"📅 Production Date: {result.get('Production Date', 'N/A')}")
            print(f"📅 Expiration Date: {result.get('Expiration Date', 'N/A')}")
            print(f"📦 Batch Number: {result.get('Batch Number', 'N/A')}")
            print(f"💊 Cleavage Drug Class: {result.get('cleavage_drug_class', 'N/A')}")
            print(f"💊 Cleavage Antibiotic Class: {result.get('cleavage_antibiotic_class', 'N/A')}")
            print(f"💊 Type of Drug or Medicine: {result.get('type_of_drug_or_medicine', 'N/A')}")
            print(f"📦 Packaging of Drug or Medicine: {result.get('packaging_of_drug_or_medicine', 'N/A')}")
            print(f"🎨 Color of Drug or Medicine: {result.get('color_of_drug_or_medicine', 'N/A')}")
            print(f"🔵 Shape of Drug or Medicine: {result.get('shape_of_drug_or_medicine', 'N/A')}")
            print(f"🔵 Size of Drug or Medicine: {result.get('size_of_drug_or_medicine', 'N/A')}")
            
            print("\n📋 TECHNICAL EXPLANATION:")
            print("-" * 40)
            print(result.get('for_doctor_explanation', 'N/A'))
            
            print("\n👤 PATIENT EXPLANATION:")
            print("-" * 40)
            print(result.get('for_patient_explanation', 'N/A'))
            
            # Show chain of thought if available
            chain_of_thought = response_text[:json_start].strip()
            if chain_of_thought:
                print("\n🧠 ANALYSIS PROCESS:")
                print("-" * 40)
                print(chain_of_thought)
            
            print("=" * 60)
            
        else:
            # If no JSON found, just print the raw response
            print("🤖 RAW RESPONSE:")
            print("-" * 40)
            print(response_text)
            
    except Exception as e:
        print(f"❌ Error formatting result: {e}")
        print("\n🤖 RAW RESPONSE:")
        print("-" * 40)
        print(response_text)

In [30]:
print_readable_result(response.content)

🔬 MEDICINES ANALYSIS RESULT
💊 Name: Anadex
💊 Dosage: Tiap 10 ml mengandung: Paracetamol 240 mg, Dextromethorphan HBr 7 mg, Chlorphenamine maleate 1 mg, Phenylpropanolamine HCI 7 mg
📅 Production Date: FEB 25
📅 Expiration Date: FEB 28
📦 Batch Number: S0020002
💊 Cleavage Drug Class: blue dot
💊 Cleavage Antibiotic Class: Non-antibiotics
💊 Type of Drug or Medicine: syrup
📦 Packaging of Drug or Medicine: box
🎨 Color of Drug or Medicine: white
🔵 Shape of Drug or Medicine: box
🔵 Size of Drug or Medicine: 60 ml

📋 TECHNICAL EXPLANATION:
----------------------------------------
Anadex syrup contains Paracetamol, Dextromethorphan HBr, Chlorphenamine maleate, and Phenylpropanolamine HCl. It is indicated for relieving flu symptoms such as fever, headache, nasal congestion, sneezing, and cough. Contraindications include patients with heart disorders, diabetes mellitus, severe liver function impairment, and hypersensitivity to any component of the drug. Side effects and drug interactions should be re

In [11]:
import json

# Save response to a file
with open("insto.txt", "w", encoding="utf-8") as f:
    if isinstance(response.content, dict):
        f.write(json.dumps(response.content, ensure_ascii=False, indent=2))
    else:
        f.write(str(response.content))

In [31]:
import json

def save_readable_result(response_text: str, filename: str) -> None:
    try:
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1

        with open(filename, "w", encoding="utf-8") as f:
            if json_start != -1 and json_end != -1:
                json_str = response_text[json_start:json_end]
                result = json.loads(json_str)

                f.write("=" * 60 + "\n")
                f.write("🔬 MEDICINES ANALYSIS RESULT\n")
                f.write("=" * 60 + "\n")
                f.write(f"💊 Name: {result.get('Name', 'N/A')}\n")
                f.write(f"💊 Dosage: {result.get('Dosage', 'N/A')}\n")
                f.write(f"📅 Production Date: {result.get('Production Date', 'N/A')}\n")
                f.write(f"📅 Expiration Date: {result.get('Expiration Date', 'N/A')}\n")
                f.write(f"📦 Batch Number: {result.get('Batch Number', 'N/A')}\n")
                f.write(f"💊 Cleavage Drug Class: {result.get('cleavage_drug_class', 'N/A')}\n")
                f.write(f"💊 Cleavage Antibiotic Class: {result.get('cleavage_antibiotic_class', 'N/A')}\n")
                f.write(f"💊 Type of Drug or Medicine: {result.get('type_of_drug_or_medicine', 'N/A')}\n")
                f.write(f"📦 Packaging of Drug or Medicine: {result.get('packaging_of_drug_or_medicine', 'N/A')}\n")
                f.write(f"🎨 Color of Drug or Medicine: {result.get('color_of_drug_or_medicine', 'N/A')}\n")
                f.write(f"🔵 Shape of Drug or Medicine: {result.get('shape_of_drug_or_medicine', 'N/A')}\n")
                f.write(f"🔵 Size of Drug or Medicine: {result.get('size_of_drug_or_medicine', 'N/A')}\n")
                f.write("\n📋 TECHNICAL EXPLANATION:\n")
                f.write("-" * 40 + "\n")
                f.write(result.get('for_doctor_explanation', 'N/A') + "\n")
                f.write("\n👤 PATIENT EXPLANATION:\n")
                f.write("-" * 40 + "\n")
                f.write(result.get('for_patient_explanation', 'N/A') + "\n")

                chain_of_thought = response_text[:json_start].strip()
                if chain_of_thought:
                    f.write("\n🧠 ANALYSIS PROCESS:\n")
                    f.write("-" * 40 + "\n")
                    f.write(chain_of_thought + "\n")
                f.write("=" * 60 + "\n")
            else:
                f.write("🤖 RAW RESPONSE:\n")
                f.write("-" * 40 + "\n")
                f.write(response_text + "\n")
    except Exception as e:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"❌ Error formatting result: {e}\n")
            f.write("\n🤖 RAW RESPONSE:\n")
            f.write("-" * 40 + "\n")
            f.write(response_text + "\n")

# Usage example:
if isinstance(response.content, dict):
    response_text = json.dumps(response.content, ensure_ascii=False, indent=2)
else:
    response_text = str(response.content)

save_readable_result(response_text, "anadex.txt")

In [None]:
### JSON FILE

In [7]:
#try to fake the quota embedding

from langchain.embeddings import FakeEmbeddings
embeddings_3 = FakeEmbeddings(size=3072) #create fake embeddeing to reach the quota

In [25]:
from qdrant_client.http.models import Distance

collection_name = "drug_packages"
dimension = 3072
distance = Distance.COSINE

In [20]:
# load mcu.json data
import json

with open("drugs.json", "r") as f:
    drug_data = json.load(f)

print(drug_data[0])

{'item_code': 'A0001', 'item_name': 'AQUABIDEST STERIL 1000ML (OTSU)'}


In [21]:
from qdrant_client import QdrantClient

client = QdrantClient(":memory:")

In [22]:
from qdrant_client.http.models import VectorParams

if(client.collection_exists(collection_name=collection_name) == False):
    client.create_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=dimension, distance=distance),
    )

In [27]:
from qdrant_client.models import PointStruct
import uuid
i = 0
for row in drug_data:
    i += 1
    text = f"Drug Name: {row['item_name']}, Drug Code: {row['item_code']}"
    emb = embeddings_3.embed_query(text)
    print(i)
    client.upsert(
        collection_name=collection_name,
        points=[
            PointStruct(
                id=str(uuid.uuid4()),  # Generate a unique ID for each point
                vector=emb, 
                payload={
                    "page_content": text,
                    "metadata": {
                            "name": row['item_name'],
                            "description": row['item_code'],
                    },
                },
            )
        ],
    )
    print(text)

1
Drug Name: AQUABIDEST STERIL 1000ML (OTSU), Drug Code: A0001
2
Drug Name: BACTESYN 1.5 GRAM INJ, Drug Code: B0004
3
Drug Name: BACTESYN 250/125MG TAB, Drug Code: B0005
4
Drug Name: BACTESYN 375MG TAB, Drug Code: B0006
5
Drug Name: BRINTELLIX VOSTIOXETINE 10MG TAB, Drug Code: B0057
6
Drug Name: BRINTELLIX VOSTIOXETINE 15MG TAB, Drug Code: B0058
7
Drug Name: BRINTELLIX VOSTIOXETINE 20MG TAB, Drug Code: B0059
8
Drug Name: BRINTELLIX VOSTIOXETINE 5MG TAB, Drug Code: B0060
9
Drug Name: CALADINE 30ML LOTION, Drug Code: C0001
10
Drug Name: CALADINE 60ML LOTION, Drug Code: C0002
11
Drug Name: CALADINE 95ML LOTION, Drug Code: C0003
12
Drug Name: CALADINE POWDER, Drug Code: C0004
13
Drug Name: CALADINE SUSPENSION, Drug Code: C0005
14
Drug Name: CALADINE 60ML LOTION, Drug Code: C0046
15
Drug Name: CALADINE 95ML LOTION, Drug Code: C0047
16
Drug Name: CALAMIN LOTION, Drug Code: C0048
17
Drug Name: CALAMIN POWDER, Drug Code: C0049
18
Drug Name: CALCIFAR PLUS TAB, Drug Code: C0050
19
Drug Name: CAL

In [None]:
#untuk buat prompt nya harus combine anatara image and text di file json