# Medical Prescription Model - Inference Only

This notebook loads the fine-tuned clinic chatbot model and provides an interface for generating medical prescriptions based on patient symptoms.


In [None]:
import torch
import json
from unsloth import FastLanguageModel


## Load Fine-tuned Model

Loading the LoRA adapter merged with the base model.


In [None]:
max_seq_length = 1024

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="../models/clinic-chatbot-lora",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

# Enable fast inference mode
FastLanguageModel.for_inference(model)

print("✅ Model loaded successfully!")


## Inference Function

Function to generate structured prescription responses.


In [None]:
def get_prescription(patient_symptoms, max_new_tokens=512, temperature=0.3, top_p=0.9):
    """
    Generate prescription from patient symptoms.
    
    Args:
        patient_symptoms (str): Description of patient symptoms
        max_new_tokens (int): Maximum tokens to generate
        temperature (float): Sampling temperature (lower = more deterministic)
        top_p (float): Nucleus sampling parameter
    
    Returns:
        dict: Structured prescription with medicine details and speech
    """
    
    conversation = [
        {
            "role": "system",
            "content": "You are a professional medical doctor. When a patient describes their symptoms, provide a structured prescription response in JSON format with: prescription_text, medicine_name, dose_size, frequency, duration, and speech (natural language explanation)."
        },
        {
            "role": "user",
            "content": patient_symptoms
        }
    ]
    
    prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
    
    try:
        prescription = json.loads(response)
        return prescription
    except json.JSONDecodeError:
        return {"raw_response": response, "error": "Failed to parse JSON response"}

print("✅ Inference function ready!")


## Test Cases

Run inference on multiple test cases.


In [None]:
test_cases = [
    "I have leg pains and my nails are coming off",
]

print("🏥 Testing Fine-tuned Clinic Chatbot\n" + "="*80 + "\n")

for symptom in test_cases:
    print(f"PATIENT: {symptom}")
    prescription = get_prescription(symptom)
    print(f"DOCTOR PRESCRIPTION:")
    print(json.dumps(prescription, indent=2))
    
    if 'speech' in prescription:
        print(f"\n💬 DOCTOR SAYS: {prescription['speech']}")
    
    print("\n" + "-"*80 + "\n")
