# mdr_text ÌîÑÎ°¨ÌîÑÌåÖ Í≥ºÏ†ï (vLLM Î≤ÑÏ†Ñ)

In [12]:
from typing import (
    Tuple,
    List,
    Dict,
    Any,
    Sequence,
    Union,
    Optional,
)

import sys
import time
import json
import re
from pathlib import Path
from enum import Enum
import shutil

import pandas as pd
import polars as pl
import polars.selectors as cs
import psutil

from tqdm import tqdm, trange
from pprint import pprint, pformat

# pydantic
from pydantic import BaseModel, Field, field_validator

# vLLM
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
from vllm.sampling_params import StructuredOutputsParams

In [None]:
import sys
from pathlib import Path

# ÏÉÅÎåÄ Í≤ΩÎ°ú ÏÇ¨Ïö©
PROJECT_ROOT = Path.cwd().parent
DATA_DIR = PROJECT_ROOT / 'data'

# Îß® ÏïûÏóê Ï∂îÍ∞Ä
sys.path.insert(0, str(PROJECT_ROOT))

# Ïù¥Ï†ú import
from src.loading import DataLoader
from src.utils import increment_path

output_path = DATA_DIR / 'silver' / 'maude_256_2025-12-192.parquet'
loader = DataLoader(
    # output_file= DATA_DIR / 'gold' / 'maude.parquet',
    output_file= output_path,
)

In [18]:
adapter = 'polars'
polars_kwargs = {
    'use_statistics': True,
    'parallel': 'auto',
    'low_memory': False,
    'rechunk': False,
    'cache': True,
}
maude_lf = loader.load(adapter=adapter, **polars_kwargs)
maude_lf


üìñ /home/dataiku/eric/Sparta/Project3/data/silver/maude_256_2025-12-192.parquet Î°úÎî© Ï§ë... (adapter=polars)


In [19]:
MDR_COLS = ['patient_harm', 'problem_components', 'incident_summary', 'defect_confirmed', 'defect_type', 'inspection_actions']
maude_lf.select(MDR_COLS).collect().to_pandas()

Unnamed: 0,patient_harm,problem_components,incident_summary,defect_confirmed,defect_type,inspection_actions
0,Serious Injury,[implant],"Implant ruptured, resulting in capsular contra...",True,Mechanical/Structural,Manufacturing record evaluation found no anoma...
1,No Harm,[implant],Implant caused anisomastia requiring surgical ...,False,Unknown,No inspection reported
2,Serious Injury,[implant],Implant caused bilateral anisomastia requiring...,False,Unknown,No inspection reported
3,Serious Injury,[implant],Breast implant caused capsular contracture req...,False,Other,No inspection reported
4,Serious Injury,"[implant, capsule]",Implant rupture and capsular contracture requi...,True,Mechanical/Structural,No product failure analysis conducted due to d...
...,...,...,...,...,...,...
251,Serious Injury,[implant],Implant caused systemic symptoms requiring med...,False,Other,No inspection reported
252,Serious Injury,[implant],Implant rupture resulted in generalized illnes...,False,Other,No inspection reported
253,Serious Injury,[implant],Implant rupture resulted in generalized illnes...,False,Other,No inspection reported
254,Serious Injury,"[shell, implant]","Implant ruptured, requiring explantation and r...",True,Mechanical/Structural,Visual inspection revealed ruptured implant wi...


In [20]:
maude_lf.collect().to_pandas()['defect_type'].value_counts()

defect_type
Other                    85
Unknown                  74
Mechanical/Structural    60
Functional Failure       16
Sensor/Accuracy          14
Electrical/Power          2
Alarm/Alert               2
Name: count, dtype: int64

In [None]:
maude_lf

In [None]:
# Enum Ï†ïÏùò
class PatientHarm(str, Enum):
    NO_HARM = "No Harm"
    MINOR_INJURY = "Minor Injury"
    SERIOUS_INJURY = "Serious Injury"
    DEATH = "Death"
    UNKNOWN = "Unknown"

class DefectType(str, Enum):
    FUNCTIONAL_FAILURE = "Functional Failure"
    MECHANICAL_STRUCTURAL = "Mechanical/Structural"
    ELECTRICAL_POWER = "Electrical/Power"
    SOFTWARE_INTERFACE = "Software/Interface"
    ALARM_ALERT = "Alarm/Alert"
    SENSOR_ACCURACY = "Sensor/Accuracy"
    COMMUNICATION_CONNECTIVITY = "Communication/Connectivity"
    LABELING_PACKAGING = "Labeling/Packaging"
    STERILITY_CONTAMINATION = "Sterility/Contamination"
    USER_HUMAN_FACTOR = "User/Human Factor"
    ENVIRONMENTAL_COMPATIBILITY = "Environmental/Compatibility"
    OTHER = "Other"
    UNKNOWN = "Unknown"

# BaseModel Ï†ïÏùò
class IncidentDetails(BaseModel):
    patient_harm: PatientHarm = Field(description="Level of patient harm associated with the incident")
    problem_components: List[str] = Field(
        default_factory=list,
        description="List of problematic component keywords found in the text",
        min_length=0,
        max_length=5
    )
    incident_summary: str = Field(max_length=200, description="Concise summary of the incident")

class ManufacturerInspection(BaseModel):
    defect_confirmed: bool | None = Field(None, description="Whether the defect was confirmed")
    defect_type: DefectType | None = Field(None, description="Type of defect identified during inspection")
    inspection_actions: str | None = Field(None, max_length=200)

class MAUDEExtraction(BaseModel):
    incident_details: IncidentDetails
    manufacturer_inspection: ManufacturerInspection


SYSTEM_INSTRUCTION = """
# ROLE & EXPERTISE
You are a medical device safety analyst with 10+ years of FDA MAUDE report analysis experience, specializing in defect classification and patient harm assessment.

# PRIMARY OBJECTIVE
Extract 6 structured variables from medical device adverse event reports with:
- Minimal UNKNOWN classifications (<2% target)
- High accuracy through systematic reasoning

# CORE VARIABLES TO EXTRACT
1. patient_harm (PatientHarm enum)
2. problem_components (List[str], max 5)
3. incident_summary (str, max 200 chars)
4. defect_confirmed (bool)
5. defect_type (DefectType enum)
6. inspection_actions (str | None, max 200 chars)

---

## EXTRACTION WORKFLOW (Execute in Order)

### STEP 1: INFORMATION GATHERING
Read the entire MDR text and product_problem field to understand:
- What happened? (observable symptoms)
- What failed? (components/functions)
- What was the outcome? (patient impact)
- What did manufacturer find? (investigation results)

### STEP 2: PATIENT HARM CLASSIFICATION
Apply strict criteria:
- **Death**: Explicitly states patient died
- **Serious Injury**: Required medical intervention, hospitalization, or permanent impairment
- **Minor Injury**: Temporary discomfort, minimal intervention needed
- **No Harm**: Explicitly states no patient harm OR only device malfunction mentioned
- **Unknown**: ONLY if report provides absolutely no patient outcome information

Decision Logic:
- IF text mentions patient outcome ‚Üí classify accordingly
- IF only device issue described + no harm mentioned ‚Üí "No Harm"
- IF text says "no adverse event" or "no patient injury" ‚Üí "No Harm"

### STEP 3: PROBLEM COMPONENTS EXTRACTION
Identify up to 5 specific component keywords from:
- Hardware: battery, circuit board, sensor, cable, connector, display, pump, valve, tubing
- Software: firmware, interface, algorithm, software module
- Structural: housing, enclosure, casing, lead, catheter tip

Rules:
- Extract exact component names mentioned
- Prioritize components directly related to the failure
- Use singular form (e.g., "battery" not "batteries")

### STEP 4: INCIDENT SUMMARY CREATION
Write a concise factual summary (max 200 chars) covering:
- What malfunctioned
- Primary symptom
- Immediate consequence

Template: "[Component] [failed/error type], resulting in [consequence]."
Example: "RV lead exhibited high impedance and thresholds, requiring lead replacement."

### STEP 5: DEFECT TYPE CLASSIFICATION (CRITICAL - FOLLOW RIGOROUSLY)

#### 5.1 MANDATORY 3-STEP INFERENCE PROCESS

**STEP 5A: SYMPTOM EXTRACTION**
List ALL symptoms mentioned:
- What stopped working?
- What abnormal behavior occurred?
- What error/warning appeared?
- What physical change happened?

**STEP 5B: ROOT CAUSE MAPPING**
Use this decision tree (check in order):

1. **Power/Energy Issue?**
   - Keywords: won't turn on, no power, battery, charging, overheating, thermal
   - ‚Üí Electrical/Power

2. **Software/Display Issue?**
   - Keywords: error code, frozen screen, crashed, unresponsive interface, software error
   - ‚Üí Software/Interface

3. **Measurement/Reading Wrong?**
   - Keywords: inaccurate reading, wrong value, measurement error, calibration issue
   - ‚Üí Sensor/Accuracy

4. **Physical Breakage?**
   - Keywords: broke, cracked, fractured, separated, leak, ruptured, detached
   - ‚Üí Mechanical/Structural

5. **Core Function Failed?**
   - Keywords: stopped working, didn't deliver, failed to perform, no output
   - ‚Üí Functional Failure

6. **Alarm System Issue?**
   - Keywords: alarm didn't sound, false alarm, alarm failure
   - ‚Üí Alarm/Alert

7. **Connection/Data Transfer Failed?**
   - Keywords: connection lost, wireless failed, Bluetooth issue, data didn't transfer
   - ‚Üí Communication/Connectivity

8. **Sterility/Foreign Matter?**
   - Keywords: contamination, non-sterile, particles, debris
   - ‚Üí Sterility/Contamination

9. **Wrong Label/Package?**
   - Keywords: mislabeled, wrong documentation, packaging damaged
   - ‚Üí Labeling/Packaging

10. **Design Caused User Error?**
    - Keywords: confusing design, look-alike parts, poor usability
    - ‚Üí User/Human Factor

11. **Environmental/Compatibility?**
    - Keywords: temperature issue, humidity, incompatible accessory
    - ‚Üí Environmental/Compatibility

12. **None of Above BUT Clearly Described?**
    - ‚Üí Other

13. **Text Says "Unknown Cause" OR Zero Symptom Info?**
    - ‚Üí Unknown (LAST RESORT ONLY)

**STEP 5C: VERIFICATION CHECK**
Ask yourself:
- "Does this classification match the ROOT CAUSE (not just surface symptom)?"
- "Did I thoroughly check all 12 categories before selecting Unknown?"
- "Is there ANY symptom I can infer the defect type from?"

#### 5.2 UNKNOWN USAGE RESTRICTION
Use "Unknown" ONLY when ALL these conditions are met:
- [ ] Text explicitly states "cause unknown" or "under investigation with no findings"
- [ ] Zero observable symptoms described
- [ ] No inference possible from context

REJECT Unknown if:
- Any symptom is mentioned (even vague ones like "stopped working")
- product_problem field contains specific defect types
- Context suggests probable cause

#### 5.3 CLASSIFICATION EXAMPLES

Example 1 - Power Issue:
Input: "Device wouldn't power on after battery fully charged"
STEP 5A: Symptoms = won't power on, battery mentioned
STEP 5B: Decision tree #1 matches ‚Üí Electrical/Power
STEP 5C: Verified - root cause is power system failure
OUTPUT: Electrical/Power

Example 2 - Vague but Classifiable:
Input: "Pump stopped delivering medication"
STEP 5A: Symptom = core function failure (not delivering)
STEP 5B: Decision tree #5 matches ‚Üí Functional Failure
STEP 5C: Verified - primary function didn't work
OUTPUT: Functional Failure (NOT Unknown!)

Example 3 - Multi-symptom:
Input: "Display showed error E-203, then device overheated and shut down"
STEP 5A: Symptoms = error code (software) + overheating (power)
STEP 5B: TWO categories match, but overheating is PRIMARY danger
STEP 5C: Root cause = thermal/power issue
OUTPUT: Electrical/Power

Example 4 - Legitimate Unknown:
Input: "Device malfunctioned during procedure, cause under investigation, no findings yet"
STEP 5A: Zero specific symptoms provided
STEP 5B: Cannot map to any category
STEP 5C: Genuinely no information
OUTPUT: Unknown (acceptable case)

### STEP 6: DEFECT CONFIRMATION DECISION

Apply this logic (check in order):

1. **IF manufacturer explicitly confirmed defect** ‚Üí defect_confirmed = true (1)
2. **IF you classified defect_type as specific category (NOT "Unknown" or "Other")** ‚Üí defect_confirmed = true (1)
3. **IF text says "no defect found" or "functioning normally"** ‚Üí defect_confirmed = false (0)

Null Minimization Rule:
- Use null ONLY when genuinely zero information exists about defect confirmation
- If you successfully classified defect_type (Steps 5A-5C), you have enough info ‚Üí set to true

### STEP 7: INSPECTION ACTIONS EXTRACTION

Summarize manufacturer's findings/actions:
- What tests were conducted?
- What was discovered?
- What corrective actions taken?

IF no investigation mentioned:
- "No inspection reported" (if truly absent)
- "Pending investigation" (if stated as ongoing)
- null (if section is entirely empty)

---

## SELF-VERIFICATION CHECKLIST (Execute Before Output)

### Phase 1: Accuracy Review
- [ ] Did I check ALL 12 defect categories before using "Unknown"?
- [ ] Did I infer patient_harm from context (not just explicit statements)?
- [ ] Did I set defect_confirmed=true if I classified defect_type successfully?
- [ ] Is incident_summary factual and concise (<200 chars)?
- [ ] Did I extract actual component names (not generic terms)?

### Phase 2: Constraint Compliance
- [ ] Is defect_type "Unknown" only because text says "unknown" or has zero symptoms?
- [ ] Is defect_confirmed null only because truly no information exists?
- [ ] Did I remove any emojis from extracted text?

### Phase 3: Quality Check
- [ ] Would a medical device safety expert agree with my classification?
- [ ] Did I prioritize root cause over surface symptoms?
- [ ] Is my output consistent with the symptom-to-defect mapping?

IF any checkbox is unchecked ‚Üí REVISE before outputting

---

## CRITICAL REMINDERS

1. **Inference Over Uncertainty**: When symptoms exist, infer the defect type rather than defaulting to "Unknown"
2. **Defect Confirmation Logic**: If you classified defect_type (not Unknown/Other), set defect_confirmed=true
3. **Context Utilization**: Use product_problem field as supporting evidence
4. **Systematic Approach**: Follow Steps 1-7 sequentially, don't skip verification
5. **Target Metrics**: <2% Unknown rate, <5% null defect_confirmed rate

Your classification quality directly impacts patient safety analysis and device improvement initiatives.
"""


USER_PROMPT_TEMPLATE = """
# MEDICAL DEVICE ADVERSE EVENT REPORT

## MDR Text
{text}

## Product Problem (Reference)
{product_problem}

---

# TASK
Following the systematic extraction workflow (Steps 1-7) and self-verification checklist in the system instructions:

1. Analyze the MDR text and product problem field
2. Extract all 6 variables with minimal UNKNOWN/null values
3. Apply the 3-step defect classification process (5A ‚Üí 5B ‚Üí 5C)
4. Complete self-verification checklist
5. Return JSON output matching MAUDEExtraction schema

# OUTPUT REQUIREMENTS:
Return JSON with incident_details and manufacturer_inspection.

Remember:
- Infer defect type from symptoms (don't default to Unknown)
- Set defect_confirmed=true if you successfully classified defect_type
- Remove any emojis from extracted text
- Prioritize accuracy over speed

Begin extraction:
"""


In [None]:
class BatchMAUDEExtractor:
    def __init__(self, 
                 model_path='Qwen/Qwen2.5-7B-Instruct',
                 tensor_parallel_size=1,
                 gpu_memory_utilization=0.85,
                 max_model_len=8192,
                 batch_size=32,
                 max_retries=2):
        """
        vLLM ÏµúÏ†ÅÌôî Î∞∞Ïπò Ï∂îÏ∂úÍ∏∞
        
        Args:
            model_path: Î™®Îç∏ Í≤ΩÎ°ú (HuggingFace ÎòêÎäî Î°úÏª¨)
            tensor_parallel_size: ÏÇ¨Ïö©Ìï† GPU Ïàò
            gpu_memory_utilization: GPU Î©îÎ™®Î¶¨ ÏÇ¨Ïö©Î•†
            max_model_len: ÏµúÎåÄ ÏãúÌÄÄÏä§ Í∏∏Ïù¥
            batch_size: Î∞∞Ïπò ÌÅ¨Í∏∞
            max_retries: Ïû¨ÏãúÎèÑ ÌöüÏàò
        """
        self.batch_size = batch_size
        self.max_retries = max_retries
        self.model_path = model_path
        
        print(f"Loading vLLM model: {model_path}...")
        
        # vLLM Î™®Îç∏ Ï¥àÍ∏∞Ìôî
        self.llm = LLM(
            model=model_path,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=gpu_memory_utilization,
            max_model_len=max_model_len,
            trust_remote_code=True,
            enforce_eager=False,  # CUDA graph ÏÇ¨Ïö©
        )
        
        # Tokenizer Î°úÎìú (chat template Ï†ÅÏö©Ïö©)
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_path,
            trust_remote_code=True
        )
        
        print("Model loaded successfully!")
        
        self.json_schema = MAUDEExtraction.model_json_schema()
        # Sampling parameters with guided JSON
        self.sampling_params = SamplingParams(
            temperature=0.1,
            max_tokens=512,
            top_p=0.95,
            # Guided JSON decoding - Ïä§ÌÇ§ÎßàÏóê ÎßûÎäî JSONÎßå ÏÉùÏÑ±
            structured_outputs=StructuredOutputsParams(
                json=self.json_schema,
            )
        )

    def _create_prompts(self, rows: List[pd.Series]) -> List[str]:
        """Chat templateÏùÑ Ï†ÅÏö©Ìïú ÌîÑÎ°¨ÌîÑÌä∏ ÏÉùÏÑ±"""
        prompts = []
        
        for row in rows:
            text = row['mdr_text']
            product_problem = row['product_problems']
            
            user_content = USER_PROMPT_TEMPLATE.format(
                text=text,
                product_problem=product_problem
            )
            
            # Chat template Ï†ÅÏö©
            messages = [
                {"role": "system", "content": SYSTEM_INSTRUCTION},
                {"role": "user", "content": user_content}
            ]
            
            # TokenizerÏùò chat template ÏÇ¨Ïö©
            formatted_prompt = self.tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True
            )
            
            prompts.append(formatted_prompt)
        
        return prompts

    def _parse_and_validate(self, response_text: str) -> dict:
        """ÏùëÎãµ ÌååÏã± Î∞è Í≤ÄÏ¶ù"""
        # Guided JSONÏù¥ÎØÄÎ°ú Ïù¥ÎØ∏ JSON ÌòïÌÉú
        data = json.loads(response_text)
        validated = MAUDEExtraction(**data)
        return validated.model_dump()

    def extract_batch(self, rows: List[pd.Series]) -> List[dict]:
        """
        vLLM Î∞∞Ïπò Ï∂îÎ°†
        - ÏàúÏàò vLLM Î∞∞Ïπò Ï≤òÎ¶¨Îßå ÏÇ¨Ïö©
        - Guided JSONÏúºÎ°ú ÌååÏã± ÏóêÎü¨ ÏµúÏÜåÌôî
        """
        # ÌîÑÎ°¨ÌîÑÌä∏ ÏÉùÏÑ±
        prompts = self._create_prompts(rows)
        
        # vLLM Î∞∞Ïπò Ï∂îÎ°† (Ïó¨Í∏∞ÏÑú ÏûêÎèôÏúºÎ°ú ÏµúÏ†ÅÌôîÎê®)
        outputs = self.llm.generate(prompts, self.sampling_params, use_tqdm=True)
        
        # Í≤∞Í≥º ÌååÏã±
        results = []
        for i, output in enumerate(outputs):
            try:
                response_text = output.outputs[0].text
                validated_data = self._parse_and_validate(response_text)
                
                result = {
                    **validated_data,
                    '_row_id': rows[i].name,
                    '_success': True,
                    '_input_tokens': len(output.prompt_token_ids),      # Ï∂îÍ∞Ä
                    '_output_tokens': len(output.outputs[0].token_ids),  # Í∏∞Ï°¥
                    '_total_tokens': len(output.prompt_token_ids) + len(output.outputs[0].token_ids)  # Ï∂îÍ∞Ä
                }
                results.append(result)
                
            except Exception as e:
                results.append({
                    '_row_id': rows[i].name,
                    '_success': False,
                    '_error': str(e)[:200],
                    '_raw_response': output.outputs[0].text[:200]
                })
        
        return results

    def process_with_retry(self, df: pd.DataFrame) -> pd.DataFrame:
        all_results = {}
        pending_df = df.copy()
        attempt = 1

        while not pending_df.empty and attempt <= self.max_retries:
            print(f"Attempt {attempt}: processing {len(pending_df)} samples")

            rows = [row for _, row in pending_df.iterrows()]
            results = self.extract_batch(rows)

            failed_indices = []

            for row, result in zip(pending_df.itertuples(), results):
                result['_attempts'] = attempt
                all_results[row.Index] = result

                if not result['_success']:
                    failed_indices.append(row.Index)

            pending_df = df.loc[failed_indices]
            attempt += 1

        # retry Ï¥àÍ≥º Ìï≠Î™©
        for idx in pending_df.index:
            all_results[idx] = {
                '_row_id': idx,
                '_success': False,
                '_error': 'Max retries exceeded',
                '_attempts': self.max_retries
            }

        # ÏõêÎûò row ÏàúÏÑúÎ°ú Ï†ïÎ†¨
        ordered = [all_results[idx] for idx in sorted(all_results)]
        return pd.json_normalize(ordered)

    def process_batch(self, 
                     df: pd.DataFrame, 
                     checkpoint_dir: Union[str|Path], 
                     checkpoint_interval: int = 1000,
                     checkpoint_prefix: str = 'checkpoint',
        ) -> pd.DataFrame:
        """
        Ï†ÑÏ≤¥ Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑ Ï≤òÎ¶¨ with Ï≤¥ÌÅ¨Ìè¨Ïù∏Ìä∏
        """
        print(f"="*60)
        print(f"vLLM Batch Processing")
        print(f"="*60)
        print(f"Total records: {len(df):,}")
        print(f"Batch size: {self.batch_size}")
        print(f"Max retries: {self.max_retries}")
        print(f"Checkpoint every: {checkpoint_interval} records\n")
        
        overall_start = time.time()
        all_results = []
        
        Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
        # Ï≤¥ÌÅ¨Ìè¨Ïù∏Ìä∏ Îã®ÏúÑÎ°ú Ï≤òÎ¶¨
        try:
            num_chunks = (len(df) - 1) // checkpoint_interval + 1
            
            for chunk_idx in tqdm(range(num_chunks), desc="Processing chunks"):
                start_idx = chunk_idx * checkpoint_interval
                end_idx = min((chunk_idx + 1) * checkpoint_interval, len(df))
                chunk_df = df.iloc[start_idx:end_idx]
                
                # print(f"\n{'='*60}")
                # print(f"Chunk {chunk_idx + 1}/{num_chunks}: Rows {start_idx:,}-{end_idx-1:,}")
                # print(f"{'='*60}")
                
                chunk_start = time.time()
                
                # Ïû¨ÏãúÎèÑ Ìè¨Ìï® Ï≤òÎ¶¨
                chunk_result_df = self.process_with_retry(chunk_df)
                all_results.append(chunk_result_df)
                
                # Ï≤≠ÌÅ¨ ÌÜµÍ≥Ñ
                elapsed = time.time() - chunk_start
                success = chunk_result_df['_success'].sum()
                throughput = len(chunk_df) / elapsed
                
                # print(f"\nChunk completed:")
                # print(f"  Success: {success}/{len(chunk_df)} ({100*success/len(chunk_df):.1f}%)")
                # print(f"  Time: {elapsed:.1f}s")
                # print(f"  Throughput: {throughput:.2f} samples/s")
                
                # Ï≤¥ÌÅ¨Ìè¨Ïù∏Ìä∏ Ï†ÄÏû•
                checkpoint_file = f'{checkpoint_prefix}_chunk{chunk_idx+1}.csv'
                checkpoint_path = Path(checkpoint_dir) / checkpoint_file
                chunk_result_df.to_csv(checkpoint_path, index=False)
                # print(f"  Checkpoint: {checkpoint_file}")
            
            # ÏµúÏ¢Ö Í≤∞Í≥º Ìï©ÏπòÍ∏∞
            final_df = pd.concat(all_results, ignore_index=True)
            
            # ÏµúÏ¢Ö ÌÜµÍ≥Ñ
            total_time = time.time() - overall_start
            total_success = final_df['_success'].sum()
            
            print(f"\n{'='*60}")
            print(f"FINAL RESULTS")
            print(f"{'='*60}")
            print(f"Total processed: {len(final_df):,}")
            print(f"Success: {total_success:,} ({100*total_success/len(final_df):.1f}%)")
            print(f"Failed: {len(final_df)-total_success:,}")
            print(f"Total time: {total_time/60:.1f} min")
            print(f"Throughput: {len(final_df)/total_time:.2f} samples/s")
            print(f"Total tokens: {final_df['_total_tokens'].sum():,}")
            print(f"Avg input: {final_df['_input_tokens'].mean():.1f}")
            print(f"Avg output: {final_df['_output_tokens'].mean():.1f}")
            print(f"{'='*60}")
            
            return final_df
        
        finally:
            # 5. ÏûÑÏãú ÌååÏùº Ï†ïÎ¶¨
            if checkpoint_dir.exists():
                shutil.rmtree(checkpoint_dir)

In [None]:
sampled_df = maude_lf.select(
    pl.all().sample(
        n=1024,
        with_replacement=False,
        shuffle=True, # Shuffle the order of sampled rows
        seed=4242
    )
).collect().to_pandas()

sampled_df.head()

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
start_time = time.time()

extractor = BatchMAUDEExtractor(
    model_path='Qwen/Qwen3-8B',  # ÎòêÎäî Î°úÏª¨ Í≤ΩÎ°ú
    tensor_parallel_size=1,  # GPU Í∞úÏàò
    batch_size=64,  # Î∞∞Ïπò ÌÅ¨Í∏∞
    max_retries=2
)

checkpoint_dir = DATA_DIR / 'temp'
# Ï≤òÎ¶¨
result_df = extractor.process_batch(sampled_df, checkpoint_interval=128, checkpoint_dir=checkpoint_dir)

elapsed = time.time() - start_time
print(f"Elapsed time: {elapsed:.2f} seconds")

In [None]:
print(f"\n{'='*60}")
print(f"FINAL RESULTS")
print(f"{'='*60}")
print(f"Total processed: {len(result_df):,}")
print(f"Total tokens: {result_df['_total_tokens'].sum():,}")
print(f"Avg input: {result_df['_input_tokens'].mean():.1f}")
print(f"Avg output: {result_df['_output_tokens'].mean():.1f}")
print(f"{'='*60}")

In [None]:
# ÌïÑÏöîÌïú Ïó¥Îßå ÏÑ†ÌÉù ÌõÑ Ïó¥ Ïù¥Î¶Ñ Î≥ÄÍ≤Ω
result_df2 = result_df[[
    'incident_details.patient_harm',
    'incident_details.problem_components',
    'incident_details.incident_summary',
    'manufacturer_inspection.defect_confirmed',
    'manufacturer_inspection.defect_type',
    'manufacturer_inspection.inspection_actions'
    ]]

result_df2 = result_df2.rename(columns={
    'incident_details.patient_harm': 'patient_harm',
    'incident_details.problem_components': 'problem_components',
    'incident_details.incident_summary': 'incident_summary',
    'manufacturer_inspection.defect_confirmed': 'defect_confirmed',
    'manufacturer_inspection.defect_type': 'defect_type',
    'manufacturer_inspection.inspection_actions': 'inspection_actions'
})

result_df2.head(100)

In [None]:
result_df2['defect_type'].value_counts()

In [None]:
result_df2['defect_confirmed'].value_counts()

In [None]:
save_dir = DATA_DIR / 'adhoc'
prompt_name = 'prompt.txt'
result_name = 'maude_extracted_sample.csv'

save_dir = increment_path(save_dir, exist_ok=True, mkdir=True)
prompt_path = increment_path(save_dir / prompt_name)
result_path = increment_path(save_dir / result_name)


In [None]:
prompt = '[SYSTEM]\n' + SYSTEM_INSTRUCTION + '\n[USER]' + USER_PROMPT_TEMPLATE

with open(prompt_path, mode='w', encoding='utf-8') as f:
    f.write(prompt)

In [None]:
df_concat = pd.concat([sampled_df, result_df2], axis=1)
df_concat[['mdr_text', 'patient_harm', 'defect_type']]

df_concat.to_csv(result_path, index=False)

## vLLM Î≤ÑÏ†ÑÏùò Ï£ºÏöî Í∞úÏÑ†ÏÇ¨Ìï≠

### 1. ÏÑ±Îä• Ìñ•ÏÉÅ
- **Î∞∞Ïπò Ï∂îÎ°†**: vLLMÏùò ÎÑ§Ïù¥Ìã∞Î∏å Î∞∞Ïπò Ï≤òÎ¶¨Î°ú Ï≤òÎ¶¨ ÏÜçÎèÑ ÎåÄÌè≠ Ìñ•ÏÉÅ
- **PagedAttention**: Î©îÎ™®Î¶¨ Ìö®Ïú®Ï†ÅÏù∏ attention Î©îÏª§ÎãàÏ¶ò
- **Continuous Batching**: ÎèôÏ†Å Î∞∞Ïπò Ïä§ÏºÄÏ§ÑÎßÅÏúºÎ°ú Ï≤òÎ¶¨Îüâ ÏµúÏ†ÅÌôî

### 2. GPU ÌôúÏö© ÏµúÏ†ÅÌôî
- Tensor Parallelism ÏßÄÏõê (Îã§Ï§ë GPU)
- ÎÜíÏùÄ GPU Î©îÎ™®Î¶¨ ÌôúÏö©Î•† (Í∏∞Î≥∏ 0.9)
- Ìö®Ïú®Ï†ÅÏù∏ KV Ï∫êÏãú Í¥ÄÎ¶¨

### 3. Ï≤òÎ¶¨ ÏÜçÎèÑ ÎπÑÍµê (ÏòàÏÉÅ)
- **Ollama Î≤ÑÏ†Ñ**: ~1-2 samples/s (CPU ÎòêÎäî Îã®Ïùº GPU)
- **vLLM Î≤ÑÏ†Ñ**: ~10-50 samples/s (GPU, Î∞∞Ïπò ÌÅ¨Í∏∞Ïóê Îî∞Îùº)
- **ÏÜçÎèÑ Ìñ•ÏÉÅ**: 10-30Î∞∞ Îπ†Î¶Ñ

### 4. ÏÇ¨Ïö©Î≤ï
```python
# ÏÑ§Ïπò
# pip install vllm

# Îã®Ïùº GPU
extractor = BatchMAUDEExtractor(
    model_path='Qwen/Qwen2.5-7B-Instruct',
    tensor_parallel_size=1,
    batch_size=32
)

# Îã§Ï§ë GPU (4Í∞ú ÏÇ¨Ïö©)
extractor = BatchMAUDEExtractor(
    model_path='meta-llama/Llama-3.1-70B-Instruct',
    tensor_parallel_size=4,
    batch_size=64
)
```

### 5. Ï∂îÍ∞Ä ÏµúÏ†ÅÌôî ÏòµÏÖò
- `max_model_len`: ÏãúÌÄÄÏä§ Í∏∏Ïù¥ Ï†úÌïú (Î©îÎ™®Î¶¨ Ï†àÏïΩ)
- `gpu_memory_utilization`: GPU Î©îÎ™®Î¶¨ ÏÇ¨Ïö©Î•† Ï°∞Ï†à
- `quantization`: ÏñëÏûêÌôî (AWQ, GPTQ Îì±) ÏßÄÏõê

### 6. Ï£ºÏùòÏÇ¨Ìï≠
- Chat ÌÖúÌîåÎ¶øÏùÄ Î™®Îç∏Ïóê Îî∞Îùº Ï°∞Ï†ï ÌïÑÏöî (Qwen, Llama Îì±)
- GPU Î©îÎ™®Î¶¨Í∞Ä Î∂ÄÏ°±ÌïòÎ©¥ `batch_size` ÎòêÎäî `max_model_len` Ï§ÑÏù¥Í∏∞
- `tensor_parallel_size`Îäî ÏÇ¨Ïö© Í∞ÄÎä•Ìïú GPU ÏàòÏôÄ ÏùºÏπòÌï¥Ïïº Ìï®

# ÌîÑÎ°¨ÌîÑÌåÖ Í¥ÄÎ†® Î¨∏Ï†ú (ÏõêÎ≥∏)
1. ÏòàÏô∏ Ï≤òÎ¶¨Í∞Ä ÏóÜÏñ¥ÏÑú llm Îã§Ï∞®ÏõêÎ∂ÑÎ¶¨Ïóê Ïã§Ìå®ÌïòÎçîÎùºÎèÑ Í∑∏ÎåÄÎ°ú Í∑∏ ÌñâÏù¥ Îπà Ï±ÑÎ°ú ÎÑòÏñ¥Í∞ê <- Í∞úÏÑ† ÌïÑÏöî
2. ÎìúÎäî ÏãúÍ∞ÑÏù¥ ÎÑàÎ¨¥ ÎßéÏù¥ Í±∏Î†§ÏÑú ÌîÑÎ°¨ÌîÑÌä∏Î•º Ï¢Ä ÌÅ¨Í∏∞Î•º Îã®Ï∂ïÏãúÏºúÏïº Îê®.
    * Ïã§Ï†úÎ°úÎäî Ïó¨Í∏∞ÏÑú Îçî Îã®Ï∂ïÏãúÌÇ§Í∏∞Í∞Ä ÌûòÎì¶.
3. system promptÎäî Îçî Í∏∏Ïñ¥Ï†∏ÎèÑ ÌïúÎ≤àÎßå Îì§Ïñ¥Í∞ÄÍ∏∞ ÎïåÎ¨∏Ïóê Î∂ÄÎã¥ ÏóÜÏù¥ Í∏∏Í≤å Ìï† Ïàò ÏûàÏùå
    * Ïó¨Í∏∞Í∞Ä Ï£ºÎ°ú ÎßåÏ†∏Ïïº ÎêòÎäî Î∂ÄÎ∂Ñ(ÌÄÑÎ¶¨Ìã∞ ÏÉÅÏäπÏùÑ ÏúÑÌï¥ÏÑú)

# vLLM Î≤ÑÏ†ÑÏóêÏÑúÏùò Í∞úÏÑ†ÏÇ¨Ìï≠

## 1. ÏòàÏô∏ Ï≤òÎ¶¨ Í∞ïÌôî ‚úì
- Í∞úÎ≥Ñ ÏÉòÌîå Ïã§Ìå®ÏãúÏóêÎèÑ Îã§Î•∏ ÏÉòÌîåÏùÄ Ï†ïÏÉÅ Ï≤òÎ¶¨
- `_success`, `_error`, `_raw_response` ÌïÑÎìúÎ°ú Ïã§Ìå® ÏõêÏù∏ Ï∂îÏ†Å
- Ïã§Ìå®Ìïú Ìï≠Î™© ÏûêÎèô Ïû¨ÏãúÎèÑ (ÏµúÎåÄ 2Ìöå)

## 2. Ï≤òÎ¶¨ ÏãúÍ∞Ñ ÎåÄÌè≠ Îã®Ï∂ï ‚úì
- **10-30Î∞∞ Îπ†Î•∏ Ï≤òÎ¶¨ ÏÜçÎèÑ**
- Î∞∞Ïπò Ï∂îÎ°†ÏúºÎ°ú GPU Ìö®Ïú® Í∑πÎåÄÌôî
- 1000Í∞ú ÏÉòÌîå Í∏∞Ï§Ä: Ollama 15-20Î∂Ñ ‚Üí vLLM 1-2Î∂Ñ

## 3. System Prompt ÌôúÏö©
- System promptÏóê ÏÉÅÏÑ∏Ìïú Í∞ÄÏù¥ÎìúÎùºÏù∏ Ï∂îÍ∞Ä Í∞ÄÎä•
- ÌíàÏßà Ìñ•ÏÉÅÏùÑ ÏúÑÌïú ÏòàÏãú Î∞è ÏÑ§Î™Ö Ìè¨Ìï®
- Ìïú Î≤àÎßå Ïù∏ÏΩîÎî©ÎêòÎØÄÎ°ú ÏÑ±Îä• ÏòÅÌñ• ÏµúÏÜåÌôî

## 4. Ï∂îÍ∞Ä Í∞úÏÑ†ÏÇ¨Ìï≠
- Ïã§ÏãúÍ∞Ñ Ï≤òÎ¶¨ ÏßÑÌñâÎ•† ÌëúÏãú
- ÏûêÎèô Ï≤¥ÌÅ¨Ìè¨Ïù∏Ìä∏ Ï†ÄÏû•
- ÏÉÅÏÑ∏Ìïú ÌÜµÍ≥Ñ Ï†ïÎ≥¥ Ï†úÍ≥µ
- Î©îÎ™®Î¶¨ Ìö®Ïú®Ï†ÅÏù∏ Ï≤òÎ¶¨