<a href="https://colab.research.google.com/github/gayearmut/EmlakIsletmeProjesi/blob/main/model_yukleme_mistral.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================
# 1. KURULUM VE K√úT√úPHANELER
# ==========================================
import os

# Gerekli k√ºt√ºphaneleri sessiz modda (-q) kur
print("‚öôÔ∏è K√ºt√ºphaneler kuruluyor (Bu i≈ülem 1-2 dakika s√ºrebilir)...")
os.system("pip install -q -U torch transformers bitsandbytes accelerate tqdm")

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import json
import time
import re
import gc
from tqdm import tqdm

# ==========================================
# 2. MODELƒ∞ Y√úKLEME (BioMistral-7B)
# ==========================================

# √ñnceki oturumdan kalan RAM'i temizle
try:
    del model
    del tokenizer
    torch.cuda.empty_cache()
    gc.collect()
except:
    pass

model_id = "BioMistral/BioMistral-7B"
print(f"üè• Medikal Model Y√ºkleniyor: {model_id}...")

# 4-bit Quantization Ayarlarƒ± (Colab T4 GPU i√ßin kritik)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

try:
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
    device_map={"": 0}  # <-- "auto" yerine bu. T√ºm katmanlarƒ± GPU 0'a zorlar.
    )
    print("‚úÖ Model ba≈üarƒ±yla y√ºklendi.")
except Exception as e:
    print(f"‚ùå Model y√ºkleme hatasƒ±: {e}")
    print("L√ºtfen Colab men√ºs√ºnden 'Runtime > Change runtime type' kƒ±smƒ±ndan T4 GPU se√ßili olduƒüundan emin olun.")

# ==========================================
# 3. YARDIMCI FONKSƒ∞YONLAR VE PROMPT
# ==========================================

def extract_json_from_text(text):
    """
    Modelin √ßƒ±ktƒ±sƒ± ne kadar kirli olursa olsun (a√ßƒ±klamalar, giri≈ü metinleri vb.),
    Regex kullanarak i√ßindeki ilk ge√ßerli JSON objesini ({...}) bulup √ßƒ±karƒ±r.
    """
    # Markdown kod bloklarƒ±nƒ± temizle
    text = text.replace("```json", "").replace("```", "")

    # Regex ile en dƒ±≈ütaki { ile } arasƒ±nƒ± bul
    match = re.search(r'\{.*\}', text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except json.JSONDecodeError:
            return None
    return None

def create_medical_case_prompt(diagnosis):
    """
    BioMistral i√ßin √∂zel prompt. Modele 'ƒ∞ngilizce d√º≈ü√ºn, T√ºrk√ße yaz' talimatƒ± verir.
    """
    return f"""[INST] You are an expert Internal Medicine specialist creating a high-quality synthetic patient dataset for medical students.
Your task is to generate a realistic "Virtual Patient" case in JSON format for the diagnosis: **{diagnosis}**.

### INSTRUCTIONS:
1. **Medical Accuracy:** Think step-by-step. Recall the specific pathophysiology, symptoms, and lab values for {diagnosis}.
2. **Language:** The content of the JSON (complaints, findings) MUST be in **TURKISH**.
3. **Format:** Output ONLY a valid JSON object. Do not add introductory text.
4. **Data Integrity:**
   - Vital signs must reflect the severity of {diagnosis}.
   - Lab results must include values with reference ranges, e.g., "WBC: 18.000 (4-10)".

### JSON TEMPLATE:
{{
    "id": "AUTO_ID",
    "category": "Internal Medicine",
    "role": {{
        "name": "Turkish Name",
        "age": 45,
        "gender": "M/F",
        "job": "Job"
    }},
    "chief_complaint": "Patient's primary complaint in Turkish (Hastanƒ±n aƒüzƒ±ndan)",
    "diagnosis": "{diagnosis}",
    "history_instruction": "Detailed instructions for the actor playing the patient (in Turkish). Explain duration, triggers, and pain quality.",
    "vitals": "BP, Pulse, Temp, SpO2 (Pathological values)",
    "physical_exam": "Physical exam findings in Turkish medical terminology (e.g., Ral, Defans, √úf√ºr√ºm)",
    "labs": "Pathological lab results relevant to {diagnosis} with reference ranges.",
    "imaging": "Key imaging findings (CT/USG/MRI) in Turkish radiology terminology."
}}
[/INST]"""

# ==========================================
# 4. HASTALIK Lƒ∞STESƒ∞ (Buraya 500'l√ºk listeni yapƒ±≈ütƒ±rabilirsin)
# ==========================================
# ≈ûimdilik √∂rnek olarak en sƒ±k g√∂r√ºlen 20 hastalƒ±ƒüƒ± koyuyorum.
# Tam listeyi buraya kopyala-yapƒ±≈ütƒ±r yapabilirsin.
target_diagnoses = [
    # --- EN SIK G√ñR√úLENLER (TOP 50) ---
    "Essential Hypertension",
    "Type 2 Diabetes Mellitus",
    "Hyperlipidemia",
    "Acute Upper Respiratory Infection",
    "Gastroesophageal Reflux Disease (GERD)",
    "Anxiety Disorder",
    "Major Depressive Disorder",
    "Back Pain (Dorsalgia)",
    "Acute Pharyngitis",
    "Acute Bronchitis",
    "Hypothyroidism",
    "Obesity",
    "Osteoarthritis",
    "Urinary Tract Infection",
    "Allergic Rhinitis",
    "Asthma",
    "Vitamin D Deficiency",
    "Insomnia",
    "Migraine",
    "Tension Headache",
    "Acute Otitis Media",
    "Dermatitis/Eczema",
    "Gastritis",
    "Iron Deficiency Anemia",
    "Acne Vulgaris",
    "Sinusitis (Acute/Chronic)",
    "Constipation",
    "Diarrhea (Acute)",
    "Fever of Unknown Origin",
    "Chest Pain (Non-specific)",
    "Palpitations",
    "Dyspepsia",
    "Irritable Bowel Syndrome (IBS)",
    "Vertigo (Benign Paroxysmal Positional)",
    "Tinea Pedis (Athlete's Foot)",
    "Viral Gastroenteritis",
    "Conjunctivitis",
    "Sprain/Strain (Ankle/Wrist)",
    "Dysmenorrhea",
    "Vaginitis",
    "Erectile Dysfunction",
    "Benign Prostatic Hyperplasia (BPH)",
    "Hemorrhoids",
    "Varicose Veins",
    "Vitamin B12 Deficiency",
    "Folate Deficiency",
    "Pre-diabetes",
    "Fatty Liver Disease (NAFLD)",
    "Carpal Tunnel Syndrome",
    "Plantar Fasciitis",

    # --- YAYGIN KRONƒ∞K VE AKUT DURUMLAR (51-150) ---
    "Chronic Obstructive Pulmonary Disease (COPD)",
    "Atrial Fibrillation",
    "Heart Failure (Congestive)",
    "Coronary Artery Disease (CAD)",
    "Chronic Kidney Disease (Stage 1-5)",
    "Pneumonia (Community Acquired)",
    "Influenza",
    "Cellulitis",
    "Deep Vein Thrombosis (DVT)",
    "Pulmonary Embolism",
    "Stroke (Ischemic)",
    "Transient Ischemic Attack (TIA)",
    "Myocardial Infarction (Acute)",
    "Angina Pectoris (Stable)",
    "Angina Pectoris (Unstable)",
    "Peripheral Artery Disease (PAD)",
    "Aortic Stenosis",
    "Mitral Regurgitation",
    "Pericarditis",
    "Endocarditis",
    "Myocarditis",
    "Rheumatoid Arthritis",
    "Systemic Lupus Erythematosus (SLE)",
    "Gout",
    "Psoriatic Arthritis",
    "Ankylosing Spondylitis",
    "Sjogren's Syndrome",
    "Osteoporosis",
    "Hyperthyroidism (Graves' Disease)",
    "Hashimoto's Thyroiditis",
    "Type 1 Diabetes Mellitus",
    "Adrenal Insufficiency (Addison's)",
    "Cushing's Syndrome",
    "Hyperparathyroidism",
    "Polycystic Ovary Syndrome (PCOS)",
    "Helicobacter Pylori Infection",
    "Peptic Ulcer Disease",
    "Celiac Disease",
    "Crohn's Disease",
    "Ulcerative Colitis",
    "Diverticulosis",
    "Diverticulitis",
    "Cholelithiasis (Gallstones)",
    "Cholecystitis",
    "Cirrhosis",
    "Hepatitis B (Chronic)",
    "Hepatitis C (Chronic)",
    "Pancreatitis (Acute)",
    "Pancreatitis (Chronic)",
    "Appendicitis",
    "Hernia (Inguinal)",
    "Hernia (Hiatal)",
    "Kidney Stones (Nephrolithiasis)",
    "Pyelonephritis",
    "Glomerulonephritis",
    "Nephrotic Syndrome",
    "Prostatitis",
    "Testicular Torsion",
    "Epididymitis",
    "Endometriosis",
    "Uterine Fibroids",
    "Ovarian Cysts",
    "Pelvic Inflammatory Disease (PID)",
    "Menopause Symptoms",
    "Pregnancy (Normal)",
    "Preeclampsia",
    "Gestational Diabetes",
    "Breast Cancer",
    "Lung Cancer",
    "Colorectal Cancer",
    "Prostate Cancer",
    "Skin Cancer (Basal Cell)",
    "Skin Cancer (Squamous Cell)",
    "Melanoma",
    "Leukemia (AML)",
    "Leukemia (CLL)",
    "Lymphoma (Hodgkin's)",
    "Lymphoma (Non-Hodgkin's)",
    "Multiple Myeloma",
    "Thrombocytopenia",
    "Hemophilia A",
    "Von Willebrand Disease",
    "Sickle Cell Anemia",
    "Thalassemia",
    "Epilepsy",
    "Multiple Sclerosis",
    "Parkinson's Disease",
    "Alzheimer's Disease",
    "Dementia (Vascular)",
    "Neuropathy (Peripheral)",
    "Sciatica",
    "Bell's Palsy",
    "Trigeminal Neuralgia",

    # --- ENFEKSƒ∞YON VE Sƒ∞STEMƒ∞K (151-250) ---
    "Meningitis (Bacterial)",
    "Meningitis (Viral)",
    "Encephalitis",
    "Sepsis",
    "Septic Shock",
    "Tuberculosis",
    "HIV/AIDS",
    "Malaria",
    "Lyme Disease",
    "Syphilis",
    "Gonorrhea",
    "Chlamydia",
    "Herpes Zoster (Shingles)",
    "Herpes Simplex (Oral/Genital)",
    "Scabies",
    "Candidiasis (Oral Thrush)",
    "Psoriasis",
    "Rosacea",
    "Urticaria (Hives)",
    "Alopecia Areata",
    "Vitiligo",
    "Glaucoma",
    "Cataract",
    "Macular Degeneration",
    "Diabetic Retinopathy",
    "Otitis Externa",
    "Hearing Loss (Sensorineural)",
    "Tinnitus",
    "Epistaxis",
    "Allergic Reaction (Anaphylaxis)",
    "Food Allergy",
    "Schizophrenia",
    "Bipolar Disorder",
    "Obsessive-Compulsive Disorder (OCD)",
    "Post-Traumatic Stress Disorder (PTSD)",
    "Anorexia Nervosa",
    "Bulimia Nervosa",
    "Alcohol Use Disorder",
    "Substance Use Disorder",
    "ADHD (Adult/Child)",
    "Autism Spectrum Disorder",
    "Hyperkalemia",
    "Hypokalemia",
    "Hyponatremia",
    "Hypernatremia",
    "Hypocalcemia",
    "Hypercalcemia",
    "Metabolic Acidosis",
    "Metabolic Alkalosis",
    "Respiratory Acidosis",
    "Respiratory Alkalosis",
    "Pleural Effusion",
    "Pneumothorax",
    "Sleep Apnea (Obstructive)",
    "Sarcoidosis",
    "Pulmonary Fibrosis",
    "Cystic Fibrosis",
    "Bronchiectasis",
    "Aortic Aneurysm (Abdominal)",
    "Aortic Dissection",
    "Carotid Artery Stenosis",
    "Raynaud's Phenomenon",
    "Vasculitis",
    "Giant Cell Arteritis",
    "Polymyalgia Rheumatica",
    "Fibromyalgia",
    "Chronic Fatigue Syndrome",
    "Scleroderma",
    "Myasthenia Gravis",
    "Guillain-Barre Syndrome",
    "Amyotrophic Lateral Sclerosis (ALS)",
    "Huntington's Disease",
    "Restless Legs Syndrome",
    "Essential Tremor",
    "Subarachnoid Hemorrhage",
    "Subdural Hematoma",
    "Epidural Hematoma",
    "Concussion",
    "Esophageal Varices",
    "Barrett's Esophagus",
    "Achalasia",
    "Gastric Ulcer",
    "Duodenal Ulcer",
    "Small Intestinal Bacterial Overgrowth (SIBO)",
    "Lactose Intolerance",
    "Hepatitis A",
    "Hepatitis E",
    "Liver Abscess",
    "Primary Biliary Cholangitis",
    "Primary Sclerosing Cholangitis",
    "Hemochromatosis",
    "Wilson's Disease",
    "Splenomegaly",
    "Polycythemia Vera",
    "Aplastic Anemia",
    "Hemolytic Anemia",
    "G6PD Deficiency",
    "Disseminated Intravascular Coagulation (DIC)",

    # --- SPESƒ∞Fƒ∞K VE NADƒ∞R DURUMLAR (251-500) ---
    "Idiopathic Thrombocytopenic Purpura (ITP)",
    "Thrombotic Thrombocytopenic Purpura (TTP)",
    "Diabetes Insipidus",
    "Acromegaly",
    "Prolactinoma",
    "Pheochromocytoma",
    "Conn's Syndrome (Hyperaldosteronism)",
    "Hypopituitarism",
    "Thyroid Nodule",
    "Thyroid Cancer",
    "Bladder Cancer",
    "Kidney Cancer (Renal Cell)",
    "Pancreatic Cancer",
    "Esophageal Cancer",
    "Stomach Cancer",
    "Liver Cancer (HCC)",
    "Brain Tumor (Glioblastoma)",
    "Bone Cancer (Osteosarcoma)",
    "Cervical Cancer",
    "Ovarian Cancer",
    "Testicular Cancer",
    "Hodgkin's Lymphoma",
    "Mesothelioma",
    "Kaposi Sarcoma",
    "Impetigo",
    "Folliculitis",
    "Abscess (Cutaneous)",
    "Lipoma",
    "Seborrheic Dermatitis",
    "Contact Dermatitis",
    "Pemphigus Vulgaris",
    "Bullous Pemphigoid",
    "Lichen Planus",
    "Erythema Multiforme",
    "Stevens-Johnson Syndrome",
    "Toxic Epidermal Necrolysis",
    "Molluscum Contagiosum",
    "Warts (Verruca Vulgaris)",
    "Ganglion Cyst",
    "Baker's Cyst",
    "Rotator Cuff Tear",
    "Frozen Shoulder",
    "Tennis Elbow (Lateral Epicondylitis)",
    "Golfer's Elbow (Medial Epicondylitis)",
    "ACL Tear",
    "Meniscus Tear",
    "Hip Fracture",
    "Scoliosis",
    "Kyphosis",
    "Spinal Stenosis",
    "Herniated Disc",
    "Spondylolisthesis",
    "Bursitis",
    "Tendonitis",
    "Rhabdomyolysis",
    "Osteomyelitis",
    "Septic Arthritis",
    "Rabies",
    "Tetanus",
    "Botulism",
    "Cholera",
    "Typhoid Fever",
    "Dengue Fever",
    "Yellow Fever",
    "Zika Virus",
    "Ebola Virus",
    "Measles",
    "Mumps",
    "Rubella",
    "Pertussis (Whooping Cough)",
    "Diphtheria",
    "Polio",
    "Varicella (Chickenpox)",
    "Infectious Mononucleosis",
    "Cytomegalovirus (CMV)",
    "Toxoplasmosis",
    "Brucellosis",
    "Leishmaniasis",
    "Hydatid Cyst",
    "Amebiasis",
    "Giardiasis",
    "Pinworm Infection",
    "Tapeworm Infection",
    "Ascariasis",
    "Pneumocystis Pneumonia",
    "Histoplasmosis",
    "Aspergillosis",
    "Cryptococcosis",
    "Blastomycosis",
    "Coccidioidomycosis",
    "Hypothermia",
    "Heat Stroke",
    "Dehydration",
    "Burn (1st/2nd/3rd Degree)",
    "Electric Shock",
    "Drowning",
    "Carbon Monoxide Poisoning",
    "Paracetamol Poisoning",
    "Salicylate Poisoning",
    "Alcohol Intoxication",
    "Opioid Overdose",
    "Snake Bite",
    "Insect Sting Allergy",
    "Foreign Body Aspiration",
    "Pneumomediastinum",
    "Pulmonary Hypertension",
    "Cor Pulmonale",
    "Brugada Syndrome",
    "Long QT Syndrome",
    "Wolff-Parkinson-White Syndrome",
    "Hypertrophic Cardiomyopathy",
    "Dilated Cardiomyopathy",
    "Restrictive Cardiomyopathy",
    "Takotsubo Cardiomyopathy",
    "Rheumatic Heart Disease",
    "Coarctation of the Aorta",
    "Patent Foramen Ovale",
    "Atrial Septal Defect",
    "Ventricular Septal Defect",
    "Tetralogy of Fallot",
    "Kawasaki Disease",
    "Henoch-Schonlein Purpura",
    "Croup",
    "Epiglottitis",
    "Bronchiolitis",
    "Hand, Foot, and Mouth Disease",
    "Fifth Disease (Erythema Infectiosum)",
    "Roseola",
    "Scarlet Fever",
    "Rickets",
    "Intussusception",
    "Pyloric Stenosis",
    "Hirschsprung Disease",
    "Meckel's Diverticulum",
    "Wilms Tumor",
    "Neuroblastoma",
    "Retinoblastoma",
    "Turner Syndrome",
    "Klinefelter Syndrome",
    "Down Syndrome",
    "Marfan Syndrome",
    "Ehlers-Danlos Syndrome",
    "Cystic Kidney Disease",
    "Alport Syndrome",
    "Goodpasture Syndrome",
    "Wegener's Granulomatosis",
    "Behcet's Disease",
    "Familial Mediterranean Fever",
    "Amyloidosis",
    "Sarcoma",
    "Myelodysplastic Syndrome",
    "Essential Thrombocythemia",
    "Primary Myelofibrosis",
    "Hairy Cell Leukemia",
    "Burkitt Lymphoma",
    "Mycosis Fungoides",
    "Sezary Syndrome",
    "Waldenstrom Macroglobulinemia",
    "Mastocytosis",
    "Porphyria",
    "Hemochromatosis (Hereditary)",
    "Alpha-1 Antitrypsin Deficiency",
    "Gilbert's Syndrome",
    "Dubin-Johnson Syndrome",
    "Rotor Syndrome",
    "Crigler-Najjar Syndrome",
    "Zollinger-Ellison Syndrome",
    "VIPoma",
    "Insulinoma",
    "Glucagonoma",
    "Somatostatinoma",
    "Carcinoid Syndrome",
    "Multiple Endocrine Neoplasia (MEN 1)",
    "Multiple Endocrine Neoplasia (MEN 2)",
    "Autoimmune Polyendocrine Syndrome",
    "Diabetes Mellitus Type 1.5 (LADA)",
    "MODY (Maturity Onset Diabetes of Young)",
    "Central Pontine Myelinolysis",
    "Wernicke's Encephalopathy",
    "Korsakoff Syndrome",
    "Normal Pressure Hydrocephalus",
    "Pseudotumor Cerebri",
    "Transverse Myelitis",
    "Creutzfeldt-Jakob Disease",
    "Reye Syndrome",
    "Sudden Infant Death Syndrome (SIDS)",
    "Failure to Thrive",
    "Preterm Birth Complications",
    "Neonatal Jaundice",
    "Neonatal Sepsis",
    "Respiratory Distress Syndrome (Newborn)",
    "Meconium Aspiration Syndrome",
    "Transient Tachypnea of Newborn",
    "Necrotizing Enterocolitis",
    "Cleft Lip/Palate",
    "Clubfoot",
    "Developmental Dysplasia of Hip",
    "Osgood-Schlatter Disease",
    "Legg-Calve-Perthes Disease",
    "Slipped Capital Femoral Epiphysis",
    "Osteogenesis Imperfecta",
    "Achondroplasia",
    "Spinal Muscular Atrophy",
    "Duchenne Muscular Dystrophy",
    "Becker Muscular Dystrophy",
    "Charcot-Marie-Tooth Disease",
    "Friedreich's Ataxia",
    "Neurofibromatosis",
    "Tuberous Sclerosis",
    "Sturge-Weber Syndrome",
    "Von Hippel-Lindau Disease",
    "Li-Fraumeni Syndrome",
    "Lynch Syndrome",
    "FAP (Familial Adenomatous Polyposis)",
    "Peutz-Jeghers Syndrome",
    "Cowden Syndrome",
    "Gardner Syndrome",
    "Turcot Syndrome",
    "Cronkhite-Canada Syndrome",
    "Menetrier's Disease",
    "Whipple's Disease",
    "Tropical Sprue",
    "Short Bowel Syndrome",
    "Blind Loop Syndrome",
    "Dumping Syndrome",
    "Afferent Loop Syndrome",
    "Mallory-Weiss Tear",
    "Boerhaave Syndrome",
    "Plummer-Vinson Syndrome",
    "Zenker's Diverticulum",
    "Sialadenitis",
    "Sialolithiasis",
    "Parotitis",
    "Ludwig's Angina",
    "Retropharyngeal Abscess",
    "Peritonsillar Abscess",
    "Vocal Cord Nodules",
    "Vocal Cord Paralysis",
    "Laryngitis",
    "Pharyngitis (Streptococcal)",
    "Dacryocystitis",
    "Blepharitis",
    "Chalazion",
    "Hordeolum (Stye)",
    "Pterygium",
    "Pinguecula",
    "Keratitis",
    "Uveitis",
    "Iritis",
    "Retinal Detachment",
    "Central Retinal Artery Occlusion",
    "Central Retinal Vein Occlusion",
    "Optic Neuritis",
    "Papilledema",
    "Amaurosis Fugax",
    "Strabismus",
    "Amblyopia",
    "Nystagmus",
    "Presbycusis",
    "Meniere's Disease",
    "Labyrinthitis",
    "Vestibular Neuritis",
    "Acoustic Neuroma",
    "Cholesteatoma",
    "Otosclerosis",
    "Mastoiditis",
    "Barotrauma",
    "Motion Sickness",
    "Altitude Sickness",
    "Decompression Sickness",
    "Radiation Sickness",
    "Lead Poisoning",
    "Mercury Poisoning",
    "Arsenic Poisoning",
    "Cyanide Poisoning",
    "Organophosphate Poisoning",
    "Mushroom Poisoning",
    "Botulism (Foodborne)",
    "Food Poisoning (Staph Aureus)",
    "Food Poisoning (Bacillus Cereus)",
    "Food Poisoning (E. Coli)",
    "Food Poisoning (Salmonella)",
    "Food Poisoning (Campylobacter)",
    "Food Poisoning (Shigella)",
    "Food Poisoning (Listeria)",
    "Food Poisoning (Yersinia)",
    "Food Poisoning (Vibrio)",
    "Pseudomembranous Colitis (C. Diff)",
    "Gas Gangrene",
    "Necrotizing Fasciitis",
    "Fournier's Gangrene",
    "Toxic Shock Syndrome",
    "Scalded Skin Syndrome",
    "Erysipelas",
    "Carbuncle",
    "Furuncle",
    "Paronychia",
    "Onychomycosis",
    "Tinea Corporis (Ringworm)",
    "Tinea Capitis",
    "Tinea Cruris (Jock Itch)",
    "Tinea Versicolor",
    "Sporotrichosis",
    "Actinomycosis",
    "Nocardiosis",
    "Leprosy",
    "Plague",
    "Tularemia",
    "Anthrax",
    "Q Fever",
    "Typhus",
    "Rocky Mountain Spotted Fever",
    "Ehrlichiosis",
    "Babesiosis",
    "Chagas Disease",
    "Sleeping Sickness (Trypanosomiasis)",
    "Filariasis",
    "Schistosomiasis",
    "Strongyloidiasis",
    "Trichinosis",
    "Hookworm Infection",
    "Whipworm Infection",
    "Guinea Worm Disease",
    "River Blindness (Onchocerciasis)",
    "Trachoma",
    "Mycetoma",
    "Chromoblastomycosis",
    "Granuloma Inguinale",
    "Chancroid",
    "Lymphogranuloma Venereum",
    "Bacterial Vaginosis",
    "Trichomoniasis",
    "Atrophic Vaginitis",
    "Bartholin's Cyst",
    "Lichen Sclerosus",
    "Vulvodynia",
    "Vaginismus",
    "Premenstrual Syndrome (PMS)",
    "Premenstrual Dysphoric Disorder",
    "Mittelschmerz",
    "Ovarian Torsion",
    "Ectopic Pregnancy",
    "Molar Pregnancy",
    "Hyperemesis Gravidarum",
    "Placenta Previa",
    "Placental Abruption",
    "Postpartum Hemorrhage",
    "Postpartum Depression",
    "Mastitis",
    "Galactorrhea",
    "Gynecomastia",
    "Male Hypogonadism",
    "Male Infertility",
    "Female Infertility",
    "Premature Ovarian Failure",
    "Asherman's Syndrome",
    "Sheehan's Syndrome",
    "Empty Sella Syndrome",
    "Kallmann Syndrome",
    "Prader-Willi Syndrome",
    "Angelman Syndrome",
    "Fragile X Syndrome",
    "Huntington's Disease",
    "Tay-Sachs Disease",
    "Gaucher's Disease",
    "Niemann-Pick Disease",
    "Fabry Disease",
    "Pompe Disease",
    "McArdle Disease",
    "Von Gierke Disease",
    "Galactosemia",
    "Phenylketonuria (PKU)",
    "Maple Syrup Urine Disease",
    "Homocystinuria",
    "Albinism",
    "Progeria",
    "Werner Syndrome",
    "Munchausen Syndrome",
    "Munchausen by Proxy",
    "Factitious Disorder",
    "Conversion Disorder",
    "Hypochondriasis (Illness Anxiety)",
    "Body Dysmorphic Disorder",
    "Trichotillomania",
    "Kleptomania",
    "Night Terrors",
    "Sleepwalking",
    "Bruxism",
    "Temporomandibular Joint Disorder (TMJ)",
    "Gingivitis",
    "Periodontitis",
    "Dental Caries",
    "Oral Leukoplakia",
    "Oral Lichen Planus",
    "Geographic Tongue",
    "Black Hairy Tongue",
    "Burning Mouth Syndrome",
    "Xerostomia",
    "Halitosis",
    "Aphthous Ulcer"
]

# ==========================================
# 5. ANA √úRETƒ∞M D√ñNG√úS√ú (G√úVENLƒ∞ KAYIT)
# ==========================================

output_filename = "medikal_vaka_dataset_final.jsonl"
print(f"\nüöÄ Vaka √úretimi Ba≈ülƒ±yor... Hedef: {len(target_diagnoses)} Vaka")
print(f"üíæ Veriler anlƒ±k olarak '{output_filename}' dosyasƒ±na kaydedilecek.\n")

# Zaten √ºretilenleri kontrol et (Resume √∂zelliƒüi)
existing_ids = set()
if os.path.exists(output_filename):
    with open(output_filename, "r", encoding="utf-8") as f:
        for line in f:
            try:
                data = json.loads(line)
                existing_ids.add(data.get("diagnosis"))
            except:
                pass
    print(f"‚ôªÔ∏è {len(existing_ids)} vaka daha √∂nce √ºretilmi≈ü, bunlar atlanacak.")

# Dosyayƒ± ekleme (append) modunda a√ß
with open(output_filename, "a", encoding="utf-8") as f_out:

    progress_bar = tqdm(target_diagnoses, desc="√úretiliyor")

    for i, diagnosis in enumerate(progress_bar):
        # Eƒüer bu hastalƒ±k zaten varsa atla
        if diagnosis in existing_ids:
            continue

        try:
            # 1. Prompt Hazƒ±rla
            prompt = create_medical_case_prompt(diagnosis)
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

            # 2. √úretim (BioMistral)
            outputs = model.generate(
                **inputs,
                max_new_tokens=1024,
                do_sample=True,
                temperature=0.4, # Doƒüruluk i√ßin d√º≈ü√ºk sƒ±caklƒ±k
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id
            )

            # 3. √áƒ±ktƒ±yƒ± Al ve Temizle
            response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

            # 4. JSON Ayƒ±kla
            case_data = extract_json_from_text(response)

            if case_data:
                # ID ve Zaman Damgasƒ± Ekle
                case_data["id"] = f"AUTO_{int(time.time()*1000)}_{i}"

                # 5. Dƒ∞SKE YAZ (Flush ile garantiye al)
                json_line = json.dumps(case_data, ensure_ascii=False)
                f_out.write(json_line + "\n")
                f_out.flush()

                # Hafƒ±za temizliƒüi (Her 10 vakada bir)
                if i % 10 == 0:
                    torch.cuda.empty_cache()
            else:
                # Hata durumunda log bas (ama durma)
                # print(f"\n‚ö†Ô∏è JSON olu≈üturulamadƒ±: {diagnosis}")
                pass

        except Exception as e:
            # Kritik hata olsa bile d√∂ng√ºy√º kƒ±rma
            # print(f"\n‚ùå Hata ({diagnosis}): {e}")
            pass

print(f"\nüéâ ƒ∞≈ûLEM TAMAMLANDI! T√ºm vakalar '{output_filename}' dosyasƒ±na kaydedildi.")
print("Dosyayƒ± sol paneldeki klas√∂r simgesinden indirebilirsin.")

‚öôÔ∏è K√ºt√ºphaneler kuruluyor (Bu i≈ülem 1-2 dakika s√ºrebilir)...
