In [None]:

BIOMED_CHECK_PROMPT = """
You are a data classifier. Your task is to determine if the provided text context describes Biomedical, Medical, or Clinical content (e.g., pathology, anatomy, cell biology, medical imaging, clinical reports).

Input Context:
{context}

Output Requirement:
Return ONLY a JSON object with a single boolean field `is_biomedical`.
Example: {{"is_biomedical": true}} or {{"is_biomedical": false}}
"""



BACKGROUND_DISTILLATION_PROMPT_TEMPLATE = """
As an expert biomedically scientific editor, your task is to distill the provided [Background] text into a concise, focusing on biomedical entity information.

Input:
[Background]:
{back_info}

The distilled summary MUST meet the following requirements:
1.  Length: a reasonable summary between 100 and 200 words.
2.  Focus: Focus on explaining the core scientific problems within the background context, including the important knowledge related to them..
3.  Style: Use formal, clear, and objective scientific language.

Output:
"""



KEYWORD_Category_PROMPT_TEMPLATE = """
You are a top-tier biomedical research analyst, skilled at structured information extraction and thematic classification. Your task is to perform a two-step analysis on the provided [Context] and [Image_caption].

I. INPUT DATA

[Context]:
{context}
*(Note: The [Context] may contain `[Image]` tokens indicating image positions. You CANNOT see these images; you MUST rely *only* on the Observation for all visual details.)*

[Image_caption]:
{image_caption}

II. STEP 1: THEMATIC CLASSIFICATION
Analyze the content and select **ONE** Main Category (1-4) that best describes the core research domain. Use the professional framework below.

CLASSIFICATION FRAMEWORK:
- Basic Medical Science :
  Focuses on the fundamental mechanisms of life and disease.
  (Keywords: Molecular biology, genetics, biochemistry, immunology, physiology, anatomy, neurosciences, cellular pathways, pathogenesis models).
- Clinical Medicine :
  Focuses on the diagnosis, treatment, and management of human diseases in patients.
  (Keywords: Specific diseases (e.g., Heart Disease, Endocarditis), surgical procedures, patient case studies, treatment outcomes, clinical neurology, ophthalmology, urology, orthopaedics).
- Diagnostics & Laboratory Medicine :
  Focuses on the methods and technologies used to detect and diagnose diseases.
  (Keywords: Pathology, histopathology, cytopathology, medical imaging (Radiology, MRI, CT), biomarkers, lab tests, assay development, neuropathology, forensic analysis, electrocardiorgraphy).
- Pharmacy & Therapeutics :
  Focuses on the discovery, development, and application of drugs.
  (Keywords: Pharmacology, drug synthesis, medicinal chemistry, drug targets, therapeutic strategies, drug resistance, clinical trials for drugs, pharmaceutical sciences).

III. STEP 2: THEME-GUIDED KEYWORD EXTRACTION
Based on the [Context] and the provided Observation and Interpretation as well as the classification result from STEP 1, extract a list of 10-15 highly specific biological or medical keywords.
- **CRITICAL:** Ensure keywords are directly relevant to the selected Main Category.
- **Focus on:** Specific protein/gene names, cell types/morphologies, disease names, diagnostic criteria (e.g., grading), specific drugs, or key experimental findings.
- **Avoid:** Generic words or phrases.



IV. REQUIRED OUTPUT FORMAT
Output *only* the result in the following exact structure:
[Main Category Name]: keyword1, keyword2, keyword3, ..., keyword15
"""


VLM_PROMPT_TEMPLATE = """
You are an expert biologist and biomedical researcher. You will be given a image and [Image Captions].Your task is to describe the visual content of the image. 

# Input Data
[Image Caption]: 
{caption}

# Task
Provide A detailed description of the visual features present in the image,  grounded in the [Image Captions], Avoid using any conclusive statements. Focusing on the observation of visual features in biomedical images.

# Constraints
- Do NOT output a list. 
- Do NOT mention "Image 1" or other image indices.
- Output ONLY the description paragraph.
"""


CONSENSUS_PROMPT_TEMPLATE = """
You are a senior biomedical image analyst. You are a senior biomedical image analyst. You will receive observations from four different biomedical experts regarding the same biomedical image.These observations may include their interpretations or inferences based on the image, which you should disregard.

#Task:
You should limit yourself to purely visual descriptions, avoid adding any explanatory logic, and extract the common visual information from these observations, while avoiding contradictions and ensuring the information is biomedically right. Generate a highly accurate "comprehensive observation report.

## Bad example:It combines Qwenvl's incorrect "cytoplasmic" description with a correct "prominent nuclear staining" description later,  resulting in a confusing and anatomically impossible description for a single stain.

# Input Observations:

[Model: Fleming]:
{desc_fleming}

[Model: Hulu]:
{desc_hulu}

[Model: Lingshu]:
{desc_lingshu}

[Model: QwenVL]:
{desc_qwenvl}

# Key Requirements:
1. Voting & Merging Strategy: 
    - For overlapping features mentioned by multiple models, use **majority voting** to establish the **corroborated facts**.
    - For distinct/unique details mentioned by only one model, **naturally merge** them into the description to enrich detail, provided they DO NOT contradict the **corroborated facts** or biomedical logic.

2. Pure Observation: Describe ONLY the visible morphological features ( e.g. cells, staining, structures   ). Do not include any reasoning, such as "consistent with...", "indicates...", "seems to...", "suggests some expression...", "may represent...".  Focus on the visual features of the image itself; do not draw conclusions or inferences based on interpretations or deductions from the image.
3. Integration: Output a single, coherent paragraph merging the corroborated facts and valid unique details naturally.

# Output
(Output ONLY the Integration description paragraph.)
"""



ENHANCED_CAPTION_PROMPT_TEMPLATE = """
You are an expert biologist and biomedical researcher. You will be given [Context], [Background], [Keywords], and a set of initial [Image_captions].

# Your goal is to generate a "Context_Enhanced_Captions" object. You must process the data in a distinct step for each image: Verification ([Observation]).

## Verification ([Observation]): Rigorously validate the [Image_captions] to correct only factual errors based on [Context] while strictly preserving all non-conflicting visual details, ensuring the output remains a purely descriptive report devoid of any explanatory logic.

# Input Data

[Background]:
{distilled_background}

[Keywords]:
{keywords}

[Context]:
{context}

[Image_captions]:
{vl_captions_json}

# Task Guidelines & Logic

# 1. Alignment Strategy
The [Context] text contains `[Image]` tokens (e.g., [Image 1], [Image 2]). These tokens mark the exact location where the image is discussed.
You must use the text immediately surrounding these `[Image]` tokens to verify the identity and features of the corresponding image in [Image_captions].

# 2. Field: "observations" (Strict Visual Verification)
Goal: Correct the [Image_captions] ONLY if they are factually wrong based on the [Context], while preserving correct visual details.
## Minimal Modification Rule: Do not rewrite the caption if it is consistent with the text. Only edit specific words or phrases that contradict the [Context].
### Correction Protocol:
    * If [Image_captions] says "blue stain" but [Context] specifies "red stain", change it to "red stain".
    * If [Image_captions] mentions a visual detail (e.g., "irregular shape") that is NOT mentioned in [Context], PRESERVE IT. Do not delete valid visual details just because the text doesn't mention them.
### Anti-Hallucination Rule**: Do NOT add biological reasoning, causal relationships, or background knowledge into this field. Keep it purely descriptive (shapes, colors, positions ).

# 3. Summary Generation
## [Observation] summary: If there is only one image, provide a concise visual overview of that specific image. If there are multiple images, synthesize the common visual themes across all panels. MUST remain purely descriptive (no reasoning).

# Output Format
Provide the final answer as a JSON object with a single root key "Context_Enhanced_Captions".
The output must strictly separate visual descriptions from analytical insights.
Ensure the output is a valid JSON list of strings within the structure, corresponding one-to-one with the original captions.

```json
{{
  "Context_Enhanced_Captions": {{
    "observations": {{
      "Image 1": "...",
      "Image 2": "...",
      ...
      "summary": "..."
    }}
  }}
}}

# Key Requirements
1. JSON Validity: The output must be directly parseable by json.loads.
2. Context Fidelity: Do not hallucinate details not present in the image or the text.
3. Count Match: The number of keys in the dictionary must match the number of input images.

Generate [Context-Enhanced Captions]: 
"""


VISUAL_ELEMENT_QA_PROMPT_TEMPLATE = """
You are an expert in biomedical image analysis. Your task is to generate a vision-centered [Question]-[Answer] pairs based ONLY on the provided [Observation].Extract a list of all unique biomedical entities (e.g., cell types, staining, anatomical structures) mentioned in the [Observation].

# Input Data
[Observation] refers to objective visual descriptions of each images, and the summary consolidates the visual findings to provide a holistic overview,inter image relationship across the images
{Observation}

# Output

Generate a [Question]-[Answer] pair that asks for the specific biomedical visual features mentioned in the [Observation]. If there is only one image available, then only use that single image for the question.If there are multiple images, selecting several (but not necessarily all) closely related images can generate reasonable questions.
The goal is to verify if the model can "see" the low-level details before performing high-level reasoning.

## Key Requirements (MUST FOLLOW):
1. Strictly Visual: The [Question] MUST focus ONLY on visual attributes. Aim for high diversity in question suitable for multi-image biomedical analysis. 
IF [Observation] contains Only One Image**: You MUST generate a Descriptive question specific to that image (e.g., "Describe the staining intensity of the cytoplasm in Image 1."). Do NOT hallucinate other images.

IF [Observation] contains multiple images, you can generate questions about the relationships between these images.

Examples include:
Comparative Morphology: 'Compare the nuclear irregularity observed in different images.'
Feature Characterization: 'Describe the texture and staining intensity of the cytoplasm in...'
Structural Architecture: 'How does the arrangement of inflammatory cells differ between different images?'

2. No Interpretation: The [Question] and [Answer] MUST NOT contain diagnostic conclusions, biological significance, or "Why" reasoning. Do not use words like "suggests", "indicates", or "diagnosis".
3. Image Reference in [Question]:
    - **IF [Observation] contains Multiple Images**:  The [Question] string MUST include at least 2 explicit image references (e.g., [Image 1, Image2, Image3, ...]).
    - **IF [Observation] contains Only One Image**: You MUST generate a  question specific to that image. Do NOT hallucinate other images.The [Question] string MUST include  explicit image references (e.g., Image 1).
4. Fact-Based: The [Answer] must be rely on the [Observation] text.
5. Atomic [Question]: the [Question] string must be a single query. It MUST NOT be a compound question or contain any sub-questions.

## OUTPUT FORMAT AND CONSTRAINTS (MUST FOLLOW):
Return a valid JSON **List** of objects:
```json
[
  {{
    "qa_pairs" : {{
      "qa1": {{
        "question": "...",
        "answer": "...",
      }},
      "qa2": {{
        "question": "...",
        "answer": "...",
      }},
      ...
    }}
    "image_indices": [...],
    "biomedical_entities": ["entity1", "entity2", "..."]
  }}
]

Task
Generate exactly mltiple visual description QA pair based on the [Observation]. 
2. Extract a list of all unique biomedical entities (e.g., cell types, staining, anatomical structures) mentioned in the [Observation] and put them in "biomedical_entities".
[Your Output]: """


LOGIC_CHAIN_PROMPT_TEMPLATE = """
You are a rigorous biomedical expert. You need to construct logical reasoning chains based on the provided Original Text and Visual Evidence.

Original Text:
{context}

Visual Evidence:
{observation}

Please integrate the Context and Visual Evidence to form detailed logical reasoning chains that lead to conclusions.
Requirements:
- Each independent research in the Context should correspond to a separate logical reasoning chain.
- Each logical reasoning chain should follow the logic:
  Research Context -> Experiments -> Conclusion
- Each Experiment should follow the logic:
  Experimental Setting -> Experiment Goal -> Visual Phenomenon -> Interpretation -> Sub-Conclusion
    - Experimental Setting: Describe the experimental setup, including materials, methods, and conditions.
    - Experiment Goal: Purpose of the experiment.
    - Visual Phenomenon: Specific **visual** observations from the experiment, not interpretations.
    - Interpretation: Scientific explanation of the visual phenomenon.
    - Sub-Conclusion: Conclusion drawn from the interpretation, related to the final conclusion.
- If a visual phenomenon of experiment is mentioned in the Visual Evidence, mark which image it corresponds to in the format [Image X].
- Some experiments may not have any visual phenomenon in the Visual Evidence. There are two cases:
    - The Context provides the visual phenomenon directly. In this case, provide the visual phenomenon and mark it as [Context].
    - The visual phenomenon is missing. In this case, provide [Missing] as the visual phenomenon.
- Avoid precise numerical measurement data unless exactly the same numbers are present in Experimental Setting.
- The process of achieving the conclusion should include all necessary intermediate sub-conclusions and corresponding experiments.
- Each logical reasoning chain should end with a clear conclusion.
- If a certain experiment has no contribution to the final conclusion, it should be omitted from the whole logical reasoning chain.
Output Format:
Provide the logical reasoning chains in JSON format as a list of objects with the following structure:
```json
[
  {{
    "research_context": "Description of the research context.",
    "experiments": [
      {{
        "experimental_setting": "Description of the experimental setting.",
        "experiment_goal": "Description of the experiment goal.",
        "visual_phenomenon": "Visual phenomenon details with [Image X] or [Context] or [Missing].",
        "interpretation": "Interpretation of the visual phenomenon.",
        "sub_conclusion": "Conclusion drawn from the interpretation, related to the final conclusion."
      }},
      ...
    ],
    "reasoning": {{
      "intermediate_inferences": [
        {{
          "sub_conclusion": "Description of the intermediate inference.",
          "based_on_experiments": [Indices of experiments contributing to this inference]
        }},
        ...
      ],
      "content": "Detailed reasoning process leading to the conclusion.",
      "conclusion": "Final conclusion derived from the reasoning."
    }}
  }}
]
```
"""

OPEN_ENDED_QA_GENERATION_PROMPT_TEMPLATE = """
You are a biomedical expert.
You are given a logic chain, and you need to generate a exam question to test students' comprehension of the logic chain.

Logic Chain:
{logic_chain}

Some extra information that may help you:
Visual Evidence:
{visual_evidence}

Original Text:
{original_text}

The questions is expected to be hard, requiring both accurate observations and deep understanding of the logic chain. This includes the following aspects:
- For the answer:
    - The question should be open-ended.
    - The answer should contain the whole logical reasoning chain above.

- For the information provided in the question:
    - The Research Context should be provided.
    - The Setting of each Experiment should be provided.
    - The Goal of each Experiment should NEVER be provided.
    - For Visual Phenomenon of each Experiment:
        - If at least one visual phenomenon of the experiment is mentioned in the Visual Evidence, do NOT provide the Visual Phenomenon or Result. I.e. sentence like "[Image X] shows ..." should NEVER appear in the question.
          - Only one EXEMPT: If Visual Phenomenon contains Scale Bars, mention the scale ratio in the question.
        - If all visual phenomena of the experiment are provided in Context, provide the Visual Phenomenon. Do NOT provide the Result.
        - If the visual phenomenon is marked as Missing, provide the experiment result instead.
    - The direct result (NOT further Interpretation or Sub-conclusion) of each experiment should be provided only if the Visual Phenomenon is marked as Missing.
    - The Interpretation and Sub-Conclusion of each Experiment should NEVER be provided.
    - Intermediate Inferences should NEVER be provided.
    - Reasoning from Intermediate Inferences to Conclusion should NEVER be provided.
    - The Conclusion should NEVER be provided.

- For how to ask the question:
    - The question should not easily guide students to the answer. That is:
        - The question should not give any clues about how to reason to the answer.
        - The question should not give away intermediate steps or conclusions.

For example:
The logic chain is:

Research Context RC
Experiment E1:
  Setting: S1
  Visual Phenomenon: P1 [Image X]
  Interpretation: I1
  Sub-Conclusion: SC1
Experiment E2:
  Setting: S2
  Visual Phenomenon: P2 [Context]
  Interpretation: I2
  Sub-Conclusion: SC2
Experiment E3:
  Setting: S3
  Visual Phenomenon: [Missing]
  Interpretation: I3
  Sub-Conclusion: SC3
Final Reasoning:
  Based on SC1, SC2 and SC3, we conclude Conclusion C.

Do NOT ask:
- [BAD CASE] "How is conclusion C derived?" (gives away the conclusion)
- [BAD CASE] "Research RC, conducted experiment E1, setting S1, observed P1, ..." (gives away visible phenomenon in the images)
- [BAD CASE] "Research RC, ..., conducted experiment E2, setting S2, result R2, ..." (gives away experiment result where phenomenon is provided in Context)
- [BAD CASE] "Research RC, ..., conducted experiment E3, interpretation I3, ..." (gives away interpretation)
- [BAD CASE] "Research RC, conducted experiment E1, (Did not provide S1), ..." (misses the setup of an experiment)
- [BAD CASE] "Research RC, ..., conducted experiment E2, setting S2, (Did not provide P2), ..." (misses the visual phenomenon that is not provided in the images but provided in Context)
- [BAD CASE] "Research RC, ..., conducted experiment E3, setting S3, (Neither phenomenon or direct result), ..." (misses the direct result of an experiment where the visual phenomenon is marked as Missing)
- [BAD CASE] "Based on SC1, SC2 and SC3, what is the conclusion?" (gives away intermediate inferences)
Ask instead:
[GOOD CASE] "Research RC, conducted experiment E1, setting S1; conducted experiment E2, setting S2, observed P2; conducted experiment E3, setting S3, got result R3. What can be concluded from these experiments?"
Note that you do not need to directly ask "Please give a detailed reasoning process". A clever student should know to provide the reasoning process to reach the conclusion.


Format your output as a JSON object with three fields: "question" and "answer", where "question" contains the generated question, "answer" and "explanation", where "explanation" explains how you generated this question-answer pair according to the requirements above.
```json
{{
  "explanation": "{{your explanation here}}",
  "question": "{{our question here}}",
  "answer": "{{your answer here}}"
}}
```
"""



LOGIC_CHAIN_QC_1_TEMPLATE = \
"""
You are an expert in biomedical reasoning and logic evaluation.
Your task is to evaluate the integrity and coherence of a logic chain.
The input is a structured list of strings representing the progression from experimental facts to intermediate inferences, and finally to a conclusion.

# Input Data

[Logic Chain] (The reasoning path to evaluate):
{flattened_logic_chain}

# Evaluation Criteria (1-5 Scale)

1. Evidence Support Strength
   Assess if the intermediate inferences provide sufficient and accurate support for the final reasoning content.
   - Score 1 (Critical Fail): Contradictory or Unsupported. The final content makes claims that contradict the intermediate inferences or relies on evidence not present in the chain.
   - Score 3 (Borderline): Weak or Partial Support. The final content is somewhat related but contains major leaps in logic or includes details not fully backed by the intermediate steps.
   - Score 5 (Pass): Strong Support. The final content is a robust and accurate synthesis strictly derived from the provided intermediate inferences.

2. Logical Flow and Coherence
   Assess if the transition from Intermediate Inferences to the Final Conclusion is logically sound and seamless.
   - Score 1 (Critical Fail): Fragmented or Disjointed. The logic jumps randomly; the connection between the inference layer and the conclusion layer is broken or nonsensical.
   - Score 3 (Borderline): Rough or Repetitive. The flow is understandable but clunky, redundant, or requires the reader to guess the connection between steps.
   - Score 5 (Pass): Seamless and Coherent. The reasoning flows naturally like a scientific argument; the conclusion feels like the inevitable result of the preceding steps.

# Output Format (Strict JSON)

You must return the result strictly in the following format:

<scores>
{{
  "Evidence Support Strength": A,
  "Logical Flow and Coherence": B
}}
</scores>

<explanation>
[Provide a brief explanation for your scoring. explicitly stating if there are logical gaps, contradictions, or if the chain is solid.]
</explanation>

(Where A, B are integer scores from 1 to 5)
"""



LOGIC_CHAIN_QC_2_TEMPLATE = \
"""
You are an expert in biomedical text verification and fact-checking.
Your task is to verify if the [Visual Phenomena] described in the logic chain are supported by the provided Source Data ([Observation] and [Context]).

# Input Data

[Observation] (Objective visual descriptions of the images):
{Observation}

[Context] (Background containing [Image] tags):
{Context}

[Visual Phenomena] (The descriptions extracted from the logic chain to be verified):
{VisualPhenomena}

# Evaluation Criteria (1-5 Scale)

1. Source Grounding & Verification
   Assess if every visual phenomenon listed in the Target is explicitly mentioned or clearly visible in the [Context] or [Observation].
   - Score 1 (Critical Fail): Hallucination. The target describes features that are completely absent from both the Observation and Context, or contradicts them.
   - Score 3 (Borderline): Partial Match. Some descriptions are supported, but others are missing source evidence, or the target adds significant details not found in the source.
   - Score 5 (Pass): Fully Grounded. Every statement in the [Visual Phenomena] is directly supported by evidence found in the Source Observation or Source Context (textual descriptions of visual outcomes).

# Output Format (Strict JSON)

You must return the result strictly in the following format:

<scores>
{{
  "Source Grounding & Verification": A
}}
</scores>

<explanation>
[Provide a brief explanation. If there is a hallucination or missing reference, explicitly quote the unsupported part.]
</explanation>

(Where A is an integer score from 1 to 5)
"""


LOGIC_CHAIN_QC_3_TEMPLATE = \
"""
You are an expert in evaluating question-answering logic.
Your task is to verify if the provided Conclusion effectively answers or corresponds to the specific Question asked.

# Input Data

Question:
{Question}

Observation (Visual Evidence containing scale info):
{Observation}

Logic Chain:
{LogicChain}

Conclusion (Derived from Logic Chain):
{Conclusion}

# Evaluation Criteria (1-5 Scale)

1. Question-Conclusion Alignment
   Assess if the Conclusion directly addresses the core inquiry of the Question.
   - Score 1 (Fail): The conclusion is irrelevant, unrelated, or contradicts the premise of the question. It does not provide an answer.
   - Score 3 (Passable): The conclusion is related and provides a partial answer, but may be slightly tangential or misses the specific format requested.
   - Score 5 (Pass): The conclusion provides a clear, logical, and direct answer to the question. It functions effectively as the final output.

2. Scale/Legend Consistency Check
Check if the problem statement lacks a scale/legend, but the observation results, reasoning content, and conclusion clearly include scale numbers or scale information.
- Score 1 point (Serious Failure): The problem statement lacks a scale/legend, but the observation results contain explicit scale numbers (e.g., "50 nm," "scale"), and the reasoning content utilizes this scale information from the observation.
- Score 5 points (Pass): The problem statement and observation results are consistent; either both include a scale/legend, or neither includes scale-related information. If the problem statement includes scale-related information, but the conclusion and reasoning content do not use it, it is not considered an error.

3. Reasoning Validity 
   Assess if the Logic Chain steps contains excessive speculation or hallucinations not supported by the Observation.
   - Score 1 (Critical Fail): Given ONLY Research Context, Experimental Settings, and Visual Phenomenon, the "inference", "sub_conclusion", "content", "conclusion" parts contains details impossible to know.
   - Score 5 (Pass): Given ONLY Research Context, Experimental Settings, and Visual Phenomenon, the "inference", "sub_conclusion", "content", "conclusion" parts are all supported without any hallucination.

# Output Format (Strict JSON)

You must return the result strictly in the following format:

<scores>
{{
  "Question-Conclusion Alignment": A,
  "Scale/Legend Consistency Check" : B,
  "Reasoning Validity" : C
}}
</scores>

<explanation>
[Briefly explain why the conclusion satisfies or fails to answer the question.]
</explanation>


(Where A, B, C is an integer score from 1 to 5)
"""

In [None]:
def pack_content(prompt, images):
    image_list = images or [] 
    content = [
        {"type": "image_url", "image_url": img_url}
        for img_url in image_list
    ] + [
        {"type": "text", "text": prompt}
    ]
    return content

def openai_pack_content(prompt, images):
    image_list = images or []
    content = [
        {"type": "image_url", "image_url": {
            "url": img_url,
            "detail": "auto"
        }}
        for img_url in image_list
    ] + [
        {"type": "text", "text": prompt}
    ]
    return content

In [None]:
def process_qa_output(output_str):
    output_str = output_str.strip()
    if output_str.startswith("```json") and output_str.endswith("```"):
        output_str = output_str[len("```json"): -len("```")].strip()
    try:
        qa_list = json.loads(output_str)
        return qa_list
    except json.JSONDecodeError as e:
        print(f"JSON decoding error: {e}")
        return None

In [None]:
def extract_quality_score(quality_check_output_str):
    score = None
    explanation = None
    

    score_start = quality_check_output_str.index("<scores>") + len("<scores>")
    score_end = quality_check_output_str.index("</scores>")
    score_json_str = quality_check_output_str[score_start:score_end].strip()
    score = json.loads(score_json_str)


    explanation_start = quality_check_output_str.index("<explanation>") + len("<explanation>")
    explanation_end = quality_check_output_str.index("</explanation>")
    explanation = quality_check_output_str[explanation_start:explanation_end].strip()

    return score, explanation

In [None]:
def check_answer_format_rule(answer_string: str):
    required_tags = {
        "description": ["<description>", "</description>"],
        "reason": ["<reason>", "</reason>"],
        "boxed": ["\\boxed{", "}"]
    }
    extracted_contents = {}
    for tag, (start_tag, end_tag) in required_tags.items():
        start_index = answer_string.find(start_tag)
        end_index = answer_string.find(end_tag)
        if start_index == -1 or end_index == -1 or start_index >= end_index:
            return False, None
        extracted_contents[tag] = answer_string[start_index + len(start_tag): end_index].strip()
    return True, extracted_contents

In [None]:
def alter_tags(answer_string: str):
    answer_string = answer_string.replace("<description>", "\\<description\\>")
    answer_string = answer_string.replace("</description>", "\\</description\\>")
    answer_string = answer_string.replace("<reason>", "\\<reason\\>")
    answer_string = answer_string.replace("</reason>", "\\</reason\\>")
    return answer_string

def format_data_md(results):
    md_lines = []
    for idx, res in enumerate(results):
        if res["qa_pair"] is None:
            continue
        md_lines.append(f"## Sample {idx + 1}\n")
        md_lines.append(f"**Original Sample Index:** {res['index']}\n")
        md_lines.append("**Context:**\n")
        md_lines.append(f"{res['context']}\n")
        md_lines.append("**Image Captions:**\n")
        for idx, caption in enumerate(res["image_captions"], 1):
            md_lines.append(f"- Image {idx}: {caption}\n")

        keyword_result = res.get('keyword_category_result', 'N/A')
        md_lines.append("**I. Thematic Classification and Keywords:**\n")
        md_lines.append(f"> {keyword_result}\n")
        md_lines.append("---\n") 

        md_lines.append("**Question-Answer Pair:**\n")
        image_indices = res['qa_pair'].get('image_indices', [])
        md_lines.append(f"**Image Indices Used (1-indexed):** {image_indices}\n")
        md_lines.append(f"**Question:**\n{res['qa_pair']['question']}\n")
        md_lines.append(f"**Answer:**\n{alter_tags(res['qa_pair']['answer'])}\n")
        md_lines.append("**Format Check Result:**\n")
        md_lines.append(f"{res['format_check']}\n")
        md_lines.append("**Quality Scores:**\n")
        md_lines.append(f"```json\n{json.dumps(res['quality_score'], indent=2)}\n```\n")
        md_lines.append("**Quality Explanation:**\n")
        md_lines.append(f"{alter_tags(res['quality_explanation'])}\n")
        md_lines.append("---\n")
    return "\n".join(md_lines)

In [None]:
import re 

def build_caption_with_id(img_info):
    caption = img_info.get("caption", "")
    fig_id = img_info.get("fig_id", "")
    sub_label = img_info.get("subfig_label", "")


    if fig_id:

        prefix = f"This is {fig_id}{sub_label}. "
        return prefix + caption
    else:
        return caption



def extract_specific_context(item, target_indices):
    captions_list = item.get("context_enhanced_captions", [])
    summary_data = item.get("context_enhanced_summary", {})
    
    obs_parts = []
    int_parts = []
    
    for entry in captions_list:
        idx = entry.get("image_index")
        if idx in target_indices:
            obs = entry.get("observation", "")
            if obs: obs_parts.append(f"[Image {idx}]: {obs}")
            interp = entry.get("interpretation", "")
            if interp: int_parts.append(f"[Image {idx}]: {interp}")
    
    if summary_data.get("observation_summary"):
        obs_parts.append(f"[observation_summary]: {summary_data['observation_summary']}")
    if summary_data.get("interpretation_summary"):
        int_parts.append(f"[interpretation_summary]: {summary_data['interpretation_summary']}")
        
    return "\n".join(obs_parts), "\n".join(int_parts)


def extract_interpretation_text(item, target_indices=None):
    captions_list = item.get("context_enhanced_captions", [])
    summary_data = item.get("context_enhanced_summary", {})
    int_summary = summary_data.get("interpretation_summary", "")
    combined_int_parts = []
    for entry in captions_list:
        idx = entry.get("image_index")
        if target_indices and idx not in target_indices:
            continue
        int_text = entry.get("interpretation", "")
        if int_text and int_text != "Not found":
            combined_int_parts.append(f"[Image {idx} Interpretation]: {int_text}")
    if int_summary:
        combined_int_parts.append(f"[Overall Summary]: {int_summary}")
    return "\n".join(combined_int_parts)


def format_background_intro(theme_data, target_indices):
    if not theme_data:
        return ""
    

    exp_bg =theme_data.get("Experimental background", "N/A")
    

    all_themes = theme_data.get("Image Settings", {})
    
    selected_themes = {}
    indices_to_check = target_indices if target_indices else [int(k.replace("Image ", "")) for k in all_themes.keys() if "Image" in k]
    
    for idx in indices_to_check:
        key = f"Image {idx}"
        if key in all_themes:
            selected_themes[key] = all_themes[key]
            

    background_dict = {
        "Experimental background": exp_bg,
        "Image Settings": selected_themes
    }
    
    return json.dumps(background_dict, ensure_ascii=False, indent=2)


In [None]:
from openai import OpenAI
import asyncio
import time
import os
import json
import pickle
import base64
from io import BytesIO
from PIL import Image
import numpy as np
from openai import AsyncOpenAI, APIConnectionError, InternalServerError
from asyncio import as_completed
from tqdm import tqdm
import httpx

import logging
from datetime import datetime, timezone, timedelta

dashscope_api_key = os.getenv("DASHSCOPE_API_KEY") or "1"
vl_model = "qwen3-vl-plus"
text_model = "qwen-plus"

local_vl_api_key = "rk_test_wA9qF7sB2mXpLcN8zYtVdRkHjMnPqWsT3"
local_vl_model = "qwen3_vl_235b_instruct"
local_text_api_key = "sk_live_123456789"
local_text_model = "qwen3_235b_instruct"


# --- Async Clients ---
dashscope_client = AsyncOpenAI(
    api_key=dashscope_api_key,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    timeout=120.0
)

local_vl_client = AsyncOpenAI(
    api_key=local_vl_api_key,
    base_url="http://183.221.202.124:18080/v1",
    timeout=120.0
)

local_text_client = AsyncOpenAI(
    api_key=local_text_api_key,
    base_url="http://8.152.194.113:8091/v1",
    timeout=120.0
)


# Configuration for Text Model (e.g., Gemini-2.0-flash via OpenAI compat)
local_text_api_key = "xxx"
local_text_model = "xxx"
local_text_client = AsyncOpenAI(
    api_key=local_text_api_key,
    base_url="xxx",
    timeout=120.0
)


# Configuration for VL Model
local_vl_api_key = "xxx"
local_vl_model = "xxx"
local_vl_client = AsyncOpenAI(
    api_key=local_vl_api_key,
    base_url="xxx", 
    timeout=120.0
)

async def get_response_async(prev_messages,
                             next_content,
                             model,
                             client,
                             tools=None,
                             max_retries=3):

    # Handle content type automatically
    if isinstance(next_content, str):
        user_content = next_content  # For text-only
    else:
        user_content = next_content  # For multimodal

    messages = prev_messages + [{"role": "user", "content": user_content}]
    MAX_TOKENS_LIMIT = 4096 

    for attempt in range(max_retries):
        try:
            reasoning_content = ""
            answer_content = ""
            tool_info = []
            is_answering = False

            if tools is not None:
                response = await client.chat.completions.create(
                    model=model,
                    messages=messages,
                    tools=tools,
                    parallel_tool_calls=True,
                    stream=True,
                    max_tokens=MAX_TOKENS_LIMIT
                )
            else:
                response = await client.chat.completions.create(
                    model=model,
                    messages=messages,
                    stream=True,
                    max_tokens=MAX_TOKENS_LIMIT
                )

            async for chunk in response:
                if chunk.choices:
                    delta = chunk.choices[0].delta
                    # Extract reasoning content if supported by the model
                    if hasattr(delta, 'reasoning_content') and delta.reasoning_content != None:
                        reasoning_content += delta.reasoning_content
                    else:
                        if not is_answering:
                            is_answering = True
                        if delta.content is not None:
                            answer_content += delta.content
                        
                        # Handle tool calls in stream
                        if delta.tool_calls is not None:
                            for tool_call in delta.tool_calls:
                                index = tool_call.index
                                while len(tool_info) <= index:
                                    tool_info.append({})
                                if tool_call.id:
                                    tool_info[index]['id'] = tool_info[index].get('id', '') + tool_call.id
                                if tool_call.function and tool_call.function.name:
                                    tool_info[index]['name'] = tool_info[index].get('name', '') + tool_call.function.name
                                if tool_call.function and tool_call.function.arguments:
                                    tool_info[index]['arguments'] = tool_info[index].get('arguments', '') + tool_call.function.arguments
                                if tool_call.type:
                                    tool_info[index]['type'] = tool_call.type

            # Fallback for models that wrap reasoning in <think> tags
            if not reasoning_content:
                if answer_content.startswith("<think>"):
                    end_think_idx = answer_content.find("</think>")
                    if end_think_idx != -1:
                        reasoning_content = answer_content[len("<think>"):end_think_idx]
                        answer_content = answer_content[end_think_idx + len("</think>"):]

            new_message = {
                "role": "assistant",
                "content": answer_content,
            }
            
            if len(tool_info) > 0:
                tool_calls = [{
                    "id": tool_call["id"],
                    "function": {
                        "name": tool_call["name"],
                        "arguments": tool_call["arguments"]
                    },
                    "type": tool_call["type"],
                    "index": i
                } for i, tool_call in enumerate(tool_info)]
                new_message["tool_calls"] = tool_calls
            
            messages.append(new_message)

            return {
                "content": answer_content,
                "reasoning_content": reasoning_content,
                "usage": None,  
                "prev_messages": messages,
                "tool_info": tool_info
            }

        except (APIConnectionError, InternalServerError) as e:
            print(f"--- [Retryable Error] (Attempt {attempt + 1}/{max_retries}): {e}")
            if attempt == max_retries - 1: raise e
            await asyncio.sleep(5)

        except Exception as e:
            error_str = str(e).lower()
            # Handle specific network/stream cutoff issues
            if any(msg in error_str for msg in ["incomplete chunked read", "peer closed connection", "connection closed"]):
                print(f"--- [Network/Server Cutoff] (Attempt {attempt + 1}/{max_retries}): {e}")
                if attempt == max_retries - 1:
                    print("--- Max retries reached for cutoff error.")
                    raise e
                await asyncio.sleep(10)  
            else:
                print(f"--- [Fatal Error]: {e}")
                raise e

In [None]:
import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio

INPUT_FILE = 'sorce_data.json'
OUTPUT_FILE_STEP0 = "step0_filtered_data.json"
OUTPUT_FILE_DISCARDED = "step0_discarded_data.json"
CONCURRENT_LIMIT_STEP0 = 10

if os.path.exists(INPUT_FILE):
    with open(INPUT_FILE, 'r', encoding='utf-8') as f:
        data = json.load(f)
    print(f"Successfully loaded {len(data)} items from {INPUT_FILE}")
    RAW_DATA_SOURCE = data
else:
    print(f"Error: File {INPUT_FILE} not found.")
    RAW_DATA_SOURCE = []


async def check_biomedical_async(sample):
    try:
        back_info = sample.get("back_info", "")
        if not back_info:
             text_list = sample.get("text_list", [])
             back_info = " ".join([t for t in text_list if isinstance(t, str)])
        
        context_for_judge = back_info
        prompt = BIOMED_CHECK_PROMPT.format(context=context_for_judge) 

        response = await get_response_async(
            [], prompt, local_text_model, local_text_client
        )
        content = response["content"].strip()

        if content.startswith("```json"): content = content[7:].strip()
        if content.endswith("```"): content = content[:-3].strip()
        
        is_biomedical = False
        try:
            res_json = json.loads(content)
            is_biomedical = res_json.get("is_biomedical", False)
        except:
            if "true" in content.lower():
                is_biomedical = True

        if is_biomedical:
            raw_image_info = sample.get("image_info", [])
            formatted_captions = []
            
            for i, img in enumerate(raw_image_info):
                formatted_captions.append({
                    "image_index": i + 1,
                    "caption": img.get("caption", "")
                })

            lightweight_sample = {
                "original_sample_index": sample.get("original_sample_index"),
                "text_list": sample.get("text_list", []),
                "back_info": sample.get("back_info", ""),
                "image_captions": formatted_captions
            }

            return {
                "status": "valid",
                "data": lightweight_sample
            }
        else:
            return {
                "status": "filtered",
                "original_sample_index": sample.get("original_sample_index"),
                "context_preview": context_for_judge[:100], 
                "llm_raw_response": content
            }

    except Exception as e:
        return {"status": "error", "error": str(e), "original_sample_index": sample.get("original_sample_index")}

async def main_step0_filter():
    semaphore = asyncio.Semaphore(CONCURRENT_LIMIT_STEP0)
    tasks = []
    print(f"Step 0: Processing {len(RAW_DATA_SOURCE)} items...")

    for sample in RAW_DATA_SOURCE:
        async def wrapped(s):
            async with semaphore:
                return await check_biomedical_async(s)
        tasks.append(wrapped(sample))

    results = await tqdm_asyncio.gather(*tasks)

    valid_samples = []
    discarded_samples = [] 
    
    for res in results:
        if res["status"] == "valid":
            valid_samples.append(res["data"])
        elif res["status"] == "filtered":
            discarded_samples.append(res) 
            
    with open(OUTPUT_FILE_STEP0, "w", encoding="utf-8") as f:
        json.dump(valid_samples, f, indent=4, ensure_ascii=False)

    with open(OUTPUT_FILE_DISCARDED, "w", encoding="utf-8") as f:
        json.dump(discarded_samples, f, indent=4, ensure_ascii=False)

    print(f"\nProcessing Complete")
    print(f"Total Input: {len(RAW_DATA_SOURCE)}")
    print(f"Kept (Valid): {len(valid_samples)}")
    print(f"Discarded: {len(discarded_samples)}")

if __name__ == "__main__":
    await main_step0_filter()

In [None]:
import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio

INPUT_FILE_FROM_STEP0 = "step0_filtered_data.json"
OUTPUT_FILE_STEP1 = "step1_keywords_output.json" 
NUM_SAMPLES_TO_TEST = 200
CONCURRENT_LIMIT_STEP1 = 5 

async def extract_keywords_from_filtered_async(sample):
    orig_idx = sample.get("original_sample_index", "N/A")
    try:
        context = sample.get("context", "")
        image_captions = sample.get("image_captions", [])
        if not context:
            text_list = sample.get("text_list", [])
            modified_text_list = []
            image_insert_counter = 0 
            num_available_images = len(image_captions)
            if not text_list:
                processed_text = ""
            else:
                for text in text_list:
                    has_images_left = (image_insert_counter < num_available_images)
                    if text == "" and has_images_left:
                        modified_text_list.append(f" [Image {image_insert_counter + 1}]")
                        image_insert_counter += 1
                    elif isinstance(text, str) and text.startswith(")") and has_images_left:
                        modified_text_list.append(f" [Image {image_insert_counter + 1}]{text}")
                        image_insert_counter += 1
                    else:
                        modified_text_list.append(str(text))
                processed_text = "".join(modified_text_list)
            context = processed_text

        formatted_captions_for_llm = []
        for img_item in image_captions:
            idx = img_item.get("image_index", "?")
            cap = img_item.get("caption", "No caption")
            formatted_captions_for_llm.append(f"Image {idx}: {cap}")

        captions_str_for_prompt = json.dumps(formatted_captions_for_llm, ensure_ascii=False, indent=2)
        kw_prompt = KEYWORD_Category_PROMPT_TEMPLATE.format(
            context=context, 
            image_caption=captions_str_for_prompt 
        )
        response = await get_response_async([], kw_prompt, local_text_model, local_text_client)
        keywords = response["content"].strip()
        return {
            "status": "success", 
            "original_sample_index": orig_idx,
            "context": context,  
            "image_captions": image_captions, 
            "extracted_keywords": keywords,
            "back_info": sample.get("back_info", "")
        }
    except Exception as e:
        return {"status": "failed", "original_sample_index": orig_idx, "error": str(e)}

async def main_keyword_extraction_step1():
    if not os.path.exists(INPUT_FILE_FROM_STEP0):
        print(f"[Error] Step 0 file not found: {INPUT_FILE_FROM_STEP0}")
        return
    with open(INPUT_FILE_FROM_STEP0, "r", encoding="utf-8") as f:
        filtered_data = json.load(f)
    if not filtered_data:
        print("[Warning] Data empty.")
        return
    test_data = filtered_data[:NUM_SAMPLES_TO_TEST]
    print(f"Step 1: Processing {len(test_data)} items...")
    semaphore = asyncio.Semaphore(CONCURRENT_LIMIT_STEP1) 
    tasks = []
    for sample in test_data:
        async def wrapped(s):
            async with semaphore:
                return await extract_keywords_from_filtered_async(s)
        tasks.append(wrapped(sample))
    results = await tqdm_asyncio.gather(*tasks)
    final_output = []
    success_count = 0
    for res in results:
        if res["status"] == "success":
            final_output.append({
                "original_sample_index": res["original_sample_index"],
                "context": res["context"],
                "image_captions": res["image_captions"],
                "extracted_keywords": res["extracted_keywords"],
                "back_info": res["back_info"]
            })
            success_count += 1
        else:
            print(f"[ID: {res.get('original_sample_index')}] Failed: {res.get('error')}")
    if final_output:
        with open(OUTPUT_FILE_STEP1, "w", encoding="utf-8") as f:
            json.dump(final_output, f, indent=4, ensure_ascii=False)
        print(f"\n[SUCCESS] Completed. Success: {success_count}/{len(test_data)} | Saved to: {OUTPUT_FILE_STEP1}")

await main_keyword_extraction_step1()

In [None]:
import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio

# ================= Configuration =================
INPUT_FILE = "step1_keywords_output.json"
OUTPUT_FILE = "step2_distilled_output.json"
MAX_CONCURRENT_TASKS = 20

def should_skip_distillation(background_text: str) -> bool:
    if not background_text:
        return True
    return False

async def process_single_sample_background(result_item, client, model):
    oid = result_item.get("original_sample_index")
    
    if oid is None:
        return oid, {"status": "error", "error_message": "Missing 'original_sample_index'"}

    back_info = result_item.get("back_info")
    
    if not back_info or should_skip_distillation(back_info):
         return oid, {"status": "skipped", "message": "Background content missing or empty"}

    word_count = len(back_info.split()) 
    THRESHOLD = 200 

    if word_count < THRESHOLD:
        return oid, {
            "status": "success", 
            "distilled_background": back_info, 
            "note": "Short background, used original."
        }
    
    prompt = BACKGROUND_DISTILLATION_PROMPT_TEMPLATE.format(back_info=back_info)
    
    try:
        response = await get_response_async(
            prev_messages=[], 
            next_content=prompt, 
            model=model, 
            client=client
        )
        distilled_content = response['content'].strip()
        
        if distilled_content.startswith("```"):
            lines = distilled_content.split('\n')
            if len(lines) >= 3:
                distilled_content = '\n'.join(lines[1:-1]).strip()
        
        return oid, {"status": "success", "distilled_background": distilled_content}

    except Exception as e:
        return oid, {"status": "error", "error_message": str(e)}

async def main_background_distillation(input_path, output_path, client, model):
    if not os.path.exists(input_path):
        print(f"[Error] Input file not found: {input_path}")
        return

    print(f"Reading input file: {input_path} ...")
    with open(input_path, 'r', encoding='utf-8') as f:
        current_results = json.load(f)
    
    print(f"Load successful. Total items: {len(current_results)}")
    if not current_results: return

    semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS)

    async def sem_task(item):
        async with semaphore:
            return await process_single_sample_background(item, client, model)

    tasks = [sem_task(item) for item in current_results]
    
    print(f"Starting distillation process...")
    results = await tqdm_asyncio.gather(*tasks)
    
    results_map = {oid: res for oid, res in results if oid is not None}
    stats = {"original": 0, "distilled": 0, "error": 0, "skipped": 0}

    for item in current_results:
        oid = item.get("original_sample_index")
        
        if oid in results_map:
            res = results_map[oid]
            if res['status'] == 'success':
                item["distilled_background"] = res['distilled_background']
                if res.get("note") == "Short background, used original.":
                    stats["original"] += 1
                else:
                    stats["distilled"] += 1
            elif res['status'] == 'skipped':
                item["distilled_background"] = "SKIPPED_MISSING"
                stats["skipped"] += 1
            else:
                item["distilled_background"] = f"ERROR: {res.get('error_message')}"
                stats["error"] += 1
        else:
            item["distilled_background"] = "ERROR_ID_NOT_FOUND"
            stats["error"] += 1

    try:
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(current_results, f, indent=4, ensure_ascii=False)
        print(f"\n[SUCCESS] Processing complete. Saved to: {output_path}")
        print(f"Stats: Kept Original: {stats['original']} | LLM Distilled: {stats['distilled']} | Errors: {stats['error']} | Skipped: {stats['skipped']}")
            
    except Exception as e:
        print(f"\n[ERROR] Failed to save file: {e}")

# ================= Execution =================
await main_background_distillation(
    INPUT_FILE, 
    OUTPUT_FILE, 
    local_text_client,   
    local_text_model     
)

In [None]:
# When using other VL models here, please configure the corresponding settings and file names.
import asyncio
import json
import os
import base64
from tqdm.asyncio import tqdm_asyncio
from openai import AsyncOpenAI

INPUT_FILE = "step0_filtered_data.json"
SOURCE_DATA_FILE = "qa_generation_quickly.json"
OUTPUT_FILE = "step2_model_enhanced_captions_qwenvl.json"

MAX_CONCURRENT_VLM_TASKS = 10

async def call_vlm_model_async(image_data_base64, prompt_text):
    try:
        response = await local_vl_client.chat.completions.create(
            model=local_vl_model,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt_text},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data_base64}"}}
                    ]
                }
            ],
            max_tokens=512,
            temperature=0.2
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"ERROR_VLM_CALL: {str(e)}"

def get_image_base64_from_source(img_info):
    if img_info.get("image_base64"):
        return img_info["image_base64"]
    
    local_path = img_info.get("local_path")
    if local_path and os.path.exists(local_path):
        try:
            with open(local_path, "rb") as f:
                return base64.b64encode(f.read()).decode('utf-8')
        except:
            return None
    return None

async def process_single_image_vlm(img_info, semaphore):
    caption = img_info.get("caption", "")
    base64_str = get_image_base64_from_source(img_info)
    
    if not base64_str:
        return "ERROR: Image data missing"

    prompt = VLM_PROMPT_TEMPLATE.format(caption=caption)
    
    async with semaphore:
        return await call_vlm_model_async(base64_str, prompt)

async def process_sample_by_id(lightweight_item, source_map, semaphore):
    orig_id = lightweight_item.get("original_sample_index")
    raw_sample = source_map.get(orig_id)
    
    if not raw_sample:
        raw_sample = source_map.get(str(orig_id))

    if not raw_sample:
        return orig_id, [{"error": "ID not found in source file"}]

    image_info_list = raw_sample.get("image_info", [])
    if not image_info_list:
        return orig_id, []

    image_tasks = []
    for img_info in image_info_list:
        image_tasks.append(process_single_image_vlm(img_info, semaphore))
    
    enhanced_results = await asyncio.gather(*image_tasks)
    
    structured_captions = []
    for i, res in enumerate(enhanced_results):
        clean_res = str(res).replace("[Enhanced Captions]:", "").strip()
        
        entry = {
            "image_index": i + 1,
            "description": clean_res
        }
        structured_captions.append(entry)
        
    return orig_id, structured_captions

async def main_vlm_enhancement():
    if not os.path.exists(INPUT_FILE):
        print(f"[Error] Input file not found: {INPUT_FILE}")
        return

    if not os.path.exists(SOURCE_DATA_FILE):
        print(f"[Error] Source data file not found: {SOURCE_DATA_FILE}")
        return
        
    print(f"Loading filtered data: {INPUT_FILE} ...")
    with open(INPUT_FILE, 'r', encoding='utf-8') as f:
        step1_data = json.load(f)
    
    print(f"Loading source data (images): {SOURCE_DATA_FILE} ...")
    with open(SOURCE_DATA_FILE, 'r', encoding='utf-8') as f:
        raw_source_list = json.load(f)

    source_map = {}
    for item in raw_source_list:
        oid = item.get("original_sample_index")
        if oid is not None:
            source_map[oid] = item
            source_map[str(oid)] = item

    print(f"Source map built. Total items: {len(raw_source_list)}")

    if not step1_data:
        print("[Warning] Input data is empty")
        return

    semaphore = asyncio.Semaphore(MAX_CONCURRENT_VLM_TASKS)
    tasks = []
    
    print(f"Starting VLM processing for {len(step1_data)} items...")
    
    for item in step1_data:
        tasks.append(process_sample_by_id(item, source_map, semaphore))

    results = await tqdm_asyncio.gather(*tasks)
    
    results_map = {oid: res for oid, res in results if oid is not None}

    success_count = 0
    for item in step1_data:
        oid = item.get("original_sample_index")
        new_captions = results_map.get(oid, [])
        item["model-enhanced captions"] = new_captions
        
        if new_captions and isinstance(new_captions, list):
            if len(new_captions) > 0:
                first_desc = new_captions[0].get("description", "")
                if "ERROR" not in first_desc:
                    success_count += 1
            else:
                success_count += 1

    try:
        print(f"Saving results to {OUTPUT_FILE} ...")
        with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
            json.dump(step1_data, f, indent=4, ensure_ascii=False)
        print(f"\n[SUCCESS] Saved to: {OUTPUT_FILE}")
        print(f"Success count: {success_count}/{len(step1_data)}")
        
        if len(step1_data) > 0:
             print("\n[Preview Sample 0 - model-enhanced captions]:")
             print(json.dumps(step1_data[0].get("model-enhanced captions"), indent=2, ensure_ascii=False))
             
    except Exception as e:
        print(f"Failed to save file: {e}")

await main_vlm_enhancement()

In [None]:
import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio

INPUT_FILES_MAP = {
    "fleming": "step1_model_enhanced_captions_fleming.json",
    "hulu":    "step1_model_enhanced_captions_hulu.json",
    "lingshu": "step1_model_enhanced_captions_lingshu.json",
    "qwenvl":  "step2_model_enhanced_captions_qwenvl.json"
}

FILTERED_DATA_FILE = "step2_distilled_output.json"
OUTPUT_FILE_STEP3_5 = "step2_5_Integration_vl_output.json"

MAX_CONCURRENT_CONSENSUS = 20

def build_lookup_map(file_path, model_name):
    lookup = {}
    if not os.path.exists(file_path):
        print(f"[Warning] Model file not found: {file_path} ({model_name})")
        return lookup
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data_list = json.load(f)
        for sample in data_list:
            s_idx = sample.get("original_sample_index") 
            if s_idx is None: continue
            if s_idx not in lookup:
                lookup[s_idx] = {}
            img_descs = sample.get("model-enhanced captions", [])
            for img_item in img_descs:
                i_idx = img_item.get("image_index")
                desc = img_item.get("description", "")
                if i_idx is not None:
                    lookup[s_idx][i_idx] = desc
    except Exception as e:
        print(f"[Error] Failed to parse {model_name}: {e}")
    return lookup

async def generate_consensus_async(s_idx, i_idx, desc_map, client, model):
    valid_count = sum(1 for d in desc_map.values() if d and len(d) > 5)
    if valid_count < 1:
        for d in desc_map.values():
            if d and len(d) > 5: return d 
        return "ERROR: Insufficient model outputs for consensus."
    prompt = CONSENSUS_PROMPT_TEMPLATE.format(
        desc_fleming=desc_map.get("fleming", "N/A"),
        desc_hulu=desc_map.get("hulu", "N/A"),
        desc_lingshu=desc_map.get("lingshu", "N/A"),
        desc_qwenvl=desc_map.get("qwenvl", "N/A")
    )
    try:
        response = await get_response_async([], prompt, model, client)
        return response['content'].strip()
    except Exception as e:
        return f"ERROR_CONSENSUS_GEN: {str(e)}"

async def process_sample_step3_5(raw_sample, lookup_maps, semaphore, client, model):
    s_idx = raw_sample.get("original_sample_index")
    if s_idx is None: return None

    # [CRITICAL CHANGE] Directly inherit EVERYTHING from the distilled input file
    output_entry = raw_sample.copy()
    
    # Initialize the new field
    output_entry["consensus_image_descriptions"] = [] 

    image_info_list = raw_sample.get("image_captions") or raw_sample.get("original_captions", [])
    
    if not image_info_list: 
        return output_entry

    async with semaphore:
        for i, img_info in enumerate(image_info_list):
            image_idx = img_info.get("image_index", i + 1)
            
            current_desc_map = {}
            for model_name, map_data in lookup_maps.items():
                val = map_data.get(s_idx, {}).get(image_idx, "")
                current_desc_map[model_name] = val
            
            consensus_text = await generate_consensus_async(s_idx, image_idx, current_desc_map, client, model)
            
            img_result = {
                "image_index": image_idx,
                "description": consensus_text,
            }
            output_entry["consensus_image_descriptions"].append(img_result)

    return output_entry

async def main_step3_5_ensemble_structure():
    print("--- Step 3.5: Starting Multi-Model Integration ---")
    
    if not os.path.exists(FILTERED_DATA_FILE):
        print(f"[Error] Input distilled file not found: {FILTERED_DATA_FILE}")
        return

    print(f"Loading distilled data (Base): {FILTERED_DATA_FILE} ...")
    with open(FILTERED_DATA_FILE, "r", encoding="utf-8") as f:
        valid_samples_list = json.load(f)
    print(f"Base samples count: {len(valid_samples_list)}")

    print("Loading outputs from 4 models (for captions only)...")
    lookup_maps = {} 
    for name, path in INPUT_FILES_MAP.items():
        lookup_maps[name] = build_lookup_map(path, name)
    print("Indexes built.")

    semaphore = asyncio.Semaphore(MAX_CONCURRENT_CONSENSUS)
    tasks = []

    print(f"Preparing to process {len(valid_samples_list)} samples...")
    
    for valid_sample in valid_samples_list:
        tasks.append(process_sample_step3_5(
            valid_sample, 
            lookup_maps, 
            semaphore, 
            local_text_client, 
            local_text_model
        ))

    results = await tqdm_asyncio.gather(*tasks, desc="Integrating models")
    
    results = [r for r in results if r is not None]
    results.sort(key=lambda x: x.get("original_sample_index", 0))

    print(f"Saving final output to: {OUTPUT_FILE_STEP3_5} ...")
    with open(OUTPUT_FILE_STEP3_5, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4, ensure_ascii=False)
        
    print(f"\n[SUCCESS] Integration complete.")
    print(f"Output file: {OUTPUT_FILE_STEP3_5}")
    print(f"Final output count: {len(results)}")
    
    if len(results) > 0:
        print("\n[Preview Sample 0 Keys (Verification of Inheritance)]:")
        print(list(results[0].keys()))

await main_step3_5_ensemble_structure()

In [None]:
import asyncio
import json
import os
import re
from tqdm.asyncio import tqdm_asyncio

INPUT_FILE = "step2_5_Integration_vl_output.json"
OUTPUT_FILE = "step3_Context_Enhanced_Caption.json"
MAX_CONCURRENT_TASKS = 2

async def process_single_sample_generation(item, client, model):
    oid = item.get("original_sample_index")
    
    distilled_bg = item.get("distilled_background", "")
    keywords = item.get("extracted_keywords", "")
    context = item.get("context", "")
    
    original_captions = item.get("consensus_image_descriptions", [])
    if not original_captions:
        return oid, {"status": "skipped", "message": "No images"}

    if "ERROR" in str(distilled_bg) or "SKIPPED" in str(distilled_bg):
        distilled_bg = "Not available."
    
    captions_json_str = json.dumps(original_captions, indent=2, ensure_ascii=False)
    
    prompt = ENHANCED_CAPTION_PROMPT_TEMPLATE.format(
        distilled_background=distilled_bg,
        keywords=keywords,
        context=context,
        vl_captions_json=captions_json_str
    )

    try:
        response = await get_response_async(
            prev_messages=[], 
            next_content=prompt, 
            model=model, 
            client=client
        )
        content = response['content'].strip()
        
        cleaned_content = content
        if "```" in content:
            match = re.search(r"```(?:json)?(.*?)```", content, re.DOTALL)
            if match:
                cleaned_content = match.group(1).strip()
        
        parsed_json = {}
        try:
            parsed_json = json.loads(cleaned_content)
        except json.JSONDecodeError:
            return oid, {
                "status": "partial_error", 
                "raw_output": content, 
                "message": "Failed to parse JSON output"
            }

        root_key = "Context_Enhanced_Captions"
        standardized_result = {}

        if root_key in parsed_json:
            core_data = parsed_json[root_key]
            if "observations" in core_data: 
                standardized_result = core_data
            else:
                return oid, {
                    "status": "partial_error", 
                    "raw_output": content, 
                    "message": "Parsed JSON missing 'observations' or 'interpretations' fields."
                }
        else:
            if "observations" in parsed_json: 
                standardized_result = parsed_json
            else:
                return oid, {
                    "status": "partial_error", 
                    "raw_output": content, 
                    "message": f"Root key '{root_key}' not found in output."
                }

        return oid, {
            "status": "success", 
            "context_enhanced_data": standardized_result, 
            "raw_output_str": cleaned_content
        }

    except Exception as e:
        return oid, {"status": "error", "error_message": str(e)}


async def main_generation_from_file(input_path, output_path, client, model):
    if not os.path.exists(input_path):
        print(f"[Error] Input file not found: {input_path}")
        return

    print(f"Reading input file: {input_path} ...")
    with open(input_path, 'r', encoding='utf-8') as f:
        current_data = json.load(f)

    if not current_data:
        print("Data is empty.")
        return

    semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS)

    async def sem_task(item):
        async with semaphore:
            return await process_single_sample_generation(item, client, model)

    tasks = [sem_task(item) for item in current_data]

    print(f"Starting generation (Samples: {len(tasks)})...")
    results = await tqdm_asyncio.gather(*tasks)

    results_map = {oid: res for oid, res in results if oid is not None}
    
    success_count = 0
    for item in current_data:
        oid = item.get("original_sample_index")
        
        if oid in results_map:
            res = results_map[oid]
            
            if res['status'] == 'success':
                data = res['context_enhanced_data']
                obs_dict = data.get("observations", {})
                
                
                structured_captions = []
                original_caps = item.get("image_captions", [])
                
                for i in range(len(original_caps)):
                    idx = i + 1
                    key = f"Image {idx}"
                    
                    entry = {
                        "image_index": idx,
                        "observation": obs_dict.get(key, "Not found")
                    }
                    structured_captions.append(entry)
                
                item["context_enhanced_captions"] = structured_captions
                
                obs_summary = obs_dict.get("summary") or obs_dict.get("Summary") or ""
                
                item["context_enhanced_summary"] = {
                    "observation_summary": obs_summary
                }
                
                success_count += 1
                
            elif res['status'] == 'partial_error':
                item["_error_info"] = res.get("message")
                item["_raw_error_output"] = res.get("raw_output", "")
            else:
                item["_error_info"] = "ERROR_GENERATION"
        else:
             item["_error_info"] = "MISSING"

    try:
        print(f"Saving output file: {output_path} ...")
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(current_data, f, indent=4, ensure_ascii=False)
        print(f"Output file saved successfully!")

        print(f"\n[SUCCESS] All tasks completed! Success: {success_count}/{len(current_data)}")
        
        if current_data:
            print("\n[Preview Sample 0 Keys]:")
            print(list(current_data[0].keys()))

    except Exception as e:
        print(f"Failed to save file: {e}")

await main_generation_from_file(
    INPUT_FILE, 
    OUTPUT_FILE, 
    local_text_client, 
    local_text_model
)

In [None]:
import json
import os

SOURCE_FILE = "qa_generation_quickly.json"
TARGET_FILE = "step3_Context_Enhanced_Caption.json"
OUTPUT_FILE = "step3_Context_Enhanced_Caption.json"

def main():
    if not os.path.exists(SOURCE_FILE) or not os.path.exists(TARGET_FILE):
        print(f"Error: One or both input files not found.")
        return

    print(f"Loading source data from {SOURCE_FILE}...")
    with open(SOURCE_FILE, 'r', encoding='utf-8') as f:
        source_data = json.load(f)

    print(f"Loading target data from {TARGET_FILE}...")
    with open(TARGET_FILE, 'r', encoding='utf-8') as f:
        target_data = json.load(f)

    source_lookup = {}
    for item in source_data:
        oid = item.get("original_sample_index")
        if oid is not None:
            images_map = {}
            for img in item.get("image_info", []):
                idx = img.get("index")
                if idx is not None:
                    images_map[idx] = {
                        "fig_id": img.get("fig_id", ""),
                        "subfig_label": img.get("subfig_label", "")
                    }
            source_lookup[oid] = images_map

    print("Merging data fields...")
    updated_samples_count = 0
    updated_images_count = 0

    for item in target_data:
        oid = item.get("original_sample_index")
        
        if oid in source_lookup:
            current_image_map = source_lookup[oid]
            captions_list = item.get("context_enhanced_captions", [])
            
            sample_updated = False
            if isinstance(captions_list, list):
                for caption_obj in captions_list:
                    img_idx = caption_obj.get("image_index")
                    
                    if img_idx in current_image_map:
                        meta_data = current_image_map[img_idx]
                        caption_obj["fig_id"] = meta_data["fig_id"]
                        caption_obj["subfig_label"] = meta_data["subfig_label"]
                        updated_images_count += 1
                        sample_updated = True
            
            if sample_updated:
                updated_samples_count += 1

    print(f"Process complete.")
    print(f"Total samples matched and updated: {updated_samples_count}")
    print(f"Total individual image entries updated: {updated_images_count}")

    print(f"Saving to {OUTPUT_FILE}...")
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(target_data, f, indent=4, ensure_ascii=False)
    
    print("Done.")

if __name__ == "__main__":
    main()

In [None]:
# ==================== Step 7: Generate Visual Recognition Simple QA (Structured Data Adapted Version) ====================

import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio

# 1. Helper function: Extract and concatenate Observation directly from structured data
def extract_observation_text(item):
    """
    Extract all observation text from structured context_enhanced_captions
    and concatenate into a format suitable for Prompt input.
    """
    captions_list = item.get("context_enhanced_captions", [])
    
    # Get Observation Summary (if available)
    summary_data = item.get("context_enhanced_summary", {})
    obs_summary = summary_data.get("observation_summary", "")
    
    # Concatenate Observation for each Image
    combined_obs_parts = []
    for entry in captions_list:
        idx = entry.get("image_index")
        obs_text = entry.get("observation", "")
        if obs_text and obs_text != "Not found":
            combined_obs_parts.append(f"[Image {idx}]: {obs_text}")
    
    # Add Summary
    if obs_summary:
        combined_obs_parts.append(f"[Summary]: {obs_summary}")
        
    if not combined_obs_parts:
        return None
        
    return "\n".join(combined_obs_parts)

# 2. Generation function
async def generate_visual_element_qa_async(obs_str):
    prompt = VISUAL_ELEMENT_QA_PROMPT_TEMPLATE.format(
        Observation=obs_str
    )
    try:
        result = await get_response_async([], prompt, local_text_model, local_text_client)
        
        # Parse JSON (handle Markdown wrapping)
        content = result["content"].strip()
        if "```" in content:
            import re
            match = re.search(r"```(?:json)?(.*?)```", content, re.DOTALL)
            if match:
                content = match.group(1).strip()
        
        return json.loads(content)
        
    except Exception as e:
        print(f"Error generating Visual QA: {e}")
        return []

# 3. Single task logic (adapted to new structure)
async def run_visual_step_task(step1_data):
    idx = step1_data.get("original_sample_index")
    context = step1_data.get("context", "")
    
    # [Modification] No longer parse string, call extraction function instead
    obs_str = extract_observation_text(step1_data)
    
    if not obs_str:
        return None, {"original_sample_index": idx, "error": "No observations found in structured data"}

    try:
        qa_list = await generate_visual_element_qa_async(obs_str)
    except Exception as e:
        return None, {"original_sample_index": idx, "error": f"Gen failed: {e}"}

    if not qa_list:
        return None, {"original_sample_index": idx, "error": "No QA returned from LLM"}

    result_data = {
        "original_sample_index": int(idx) if idx is not None else -1,
        "visual_qa": qa_list[0], 
        "input_observation": obs_str,
        "extracted_keywords": step1_data.get("extracted_keywords", ""),
        "distilled_background": step1_data.get("distilled_background", ""),
        "context": context,
        "context_enhanced_captions": step1_data.get("context_enhanced_captions"),
        "context_enhanced_summary": step1_data.get("context_enhanced_summary") 
    }
    
    return result_data, None

# 4. Main execution logic
async def main_visual_element_gen():
    INPUT_FILE = "step3_Context_Enhanced_Caption.json"
    OUTPUT_FILE = "step4_visual-element_qa_output.json"
    FAILED_FILE = "step4_visual-element_qa_output_failed.json"
    
    print(f"[INFO] Loading data from {INPUT_FILE}...")
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data_list = json.load(f)
        
    # Run only the first 5 items for testing (if needed)
    # data_list = data_list[:5]

    CONCURRENT_LIMIT = 10
    semaphore = asyncio.Semaphore(CONCURRENT_LIMIT)

    async def wrapped(item):
        async with semaphore:
            return await run_visual_step_task(item)

    tasks = [wrapped(item) for item in data_list]
    
    print(f"[INFO] Starting Visual QA Gen for {len(tasks)} samples...")
    results = await tqdm_asyncio.gather(*tasks)

    success_results = []
    failed_results = []

    for passed, failed in results:
        if passed:
            success_results.append(passed)
        else:
            failed_results.append(failed)

    print(f"[INFO] Saving {len(success_results)} VALID items to {OUTPUT_FILE}...")
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(success_results, f, indent=4, ensure_ascii=False)

    if failed_results:
        print(f"[INFO] Saving {len(failed_results)} FAILED items to {FAILED_FILE}...")
        with open(FAILED_FILE, "w", encoding="utf-8") as f:
            json.dump(failed_results, f, indent=4, ensure_ascii=False)
            
    print(f"\n[INFO] Task Complete. Success Rate: {len(success_results)}/{len(data_list)}")

# Start
if 'VISUAL_ELEMENT_QA_PROMPT_TEMPLATE' not in globals():
    print("[WARNING] Please define the Prompt Template first!")
else:
    await main_visual_element_gen()

In [None]:
import asyncio
import json
import os
from tqdm.asyncio import tqdm_asyncio


INCLUDE_FULL_QA = False
def extract_visual_info(visual_qa_data, include_full=False):
    """
    Function: Filter data based on the switch and return a standard JSON string.
    """
    if not visual_qa_data:
        return "{}"

    # 1. Create an empty dictionary to store filtered data
    filtered_data = {}

    # Get raw QA pairs
    raw_pairs = visual_qa_data.get("qa_pairs", {})
    processed_pairs = {}

    # 2. Filter QA content
    for i, (original_key, val) in enumerate(raw_pairs.items(), start=1):
        if include_full:
            # True: Keep full Q and A
            # Keep original Key (e.g. "qa_1") or use "visual_qa_1"
            processed_pairs[original_key] = {
                "question": val.get("question", ""),
                "answer": val.get("answer", "")
            }
        else:
            # False: Only extract Answer
            # 2. Generate new names "visual_fact_1", "visual_fact_2"...
            new_key_name = f"{i}"
            processed_pairs[new_key_name] = val.get("answer", "")

    # Put processed QA pairs into result dictionary
    filtered_data["visual_facts"] = processed_pairs


    # 3. Finally convert to JSON string and return
    return json.dumps(filtered_data, ensure_ascii=False, indent=2)


def split_caption_data(item):
    captions_list = item.get("context_enhanced_captions", [])
    summary_data = item.get("context_enhanced_summary", {})
    obs_parts = []
    for entry in captions_list:
        idx = entry.get("image_index")
        f_id = entry.get("fig_id", "")
        s_label = entry.get("subfig_label", "")
        id_prefix = f":{f_id} {s_label} :" if (f_id or s_label) else ""

        obs = entry.get("observation", "")
        if obs and obs != "Not found":
            obs_parts.append(f"{id_prefix} [Image {idx}]: {obs}")

    if summary_data:
        if summary_data.get("observation_summary"):
            obs_parts.append(
                f"[Observation Summary]: {summary_data['observation_summary']}"
            )

    return "\n".join(obs_parts), ""



# Main function
async def generate_logic_chain_async(visual_fact, obs_str, int_str, context_str):

    prompt = LOGIC_CHAIN_PROMPT_TEMPLATE.format(
        observation=obs_str,
        context=context_str
    )

    retry_count = 3

    for attempt in range(retry_count):
        try:
            # Reuse get_response_async
            # Note: Text input only here, so pack_content uses None for image
            content = openai_pack_content(prompt, None)
            result = await get_response_async([], content, local_text_model, local_text_client)


            parsed_json = process_qa_output(result["content"]) # Reuse previous process_qa_output
            return parsed_json

        except Exception as e:
            print(f"Error generating Logic Chain: {e}")
            if attempt < retry_count - 1:
                await asyncio.sleep(1)  # Wait and retry
    return None

async def run_logic_chain_task(item):
    # 1. Prepare input data
    idx = item.get("original_sample_index")
    visual_qa_data = item.get("visual_qa", {})
    context_str = item.get("context", "")

    # Extract data from visual_qa
    visual_fact = extract_visual_info(visual_qa_data, include_full=INCLUDE_FULL_QA)

    # Extract Observation and Interpretation
    obs_str, int_str = split_caption_data(item)

    if not visual_fact or not context_str:
        return None, {"original_sample_index": idx, "error": "Missing visual_qa or context"}
    int_str = None
    # 2. Call LLM to generate
    retry_count = 3
    for attempt in range(retry_count):
        try:
            logic_chain_json = await generate_logic_chain_async(visual_fact, obs_str, int_str, context_str)
            if logic_chain_json is None:
                raise Exception("Logic chain generation returned None")
            break  # Break loop on success
        except Exception as e:
            if attempt >= retry_count - 1:
                return None, {"original_sample_index": idx, "error": f"Gen failed: {e}"}
            else:
                await asyncio.sleep(2)  # Wait and retry



    visual_fact = extract_visual_info(visual_qa_data, include_full=INCLUDE_FULL_QA)
    # 3. Assemble results
    result_data = item.copy() # Copy all fields from Step 5

    # New fields
    result_data["logic_chain"] = logic_chain_json

    result_data["debug_model_input"] = {
        "observation_input": obs_str,
        "interpretation_input": int_str
    }

    try:
        result_data["debug_input_visual_facts"] = json.loads(visual_fact)
    except:
        result_data["debug_input_visual_facts"] = visual_fact

    return result_data, None



async def main_logic_chain_gen():

    INPUT_FILE = "step4_visual-element_qa_output.json"
    OUTPUT_FILE = "step5_logic_chain_output.json"
    FAILED_FILE = "step5_logic_chain_failed.json"

    if not os.path.exists(INPUT_FILE):
        print(f"[ERROR] Input file {INPUT_FILE} not found!")
        return

    print(f"[INFO] Loading data from {INPUT_FILE}...")
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data_list = json.load(f)

    CONCURRENT_LIMIT = 32
    semaphore = asyncio.Semaphore(CONCURRENT_LIMIT)

    async def wrapped(item):
        async with semaphore:
            return await run_logic_chain_task(item)

    tasks = [wrapped(item) for item in data_list]

    print(f"[INFO] Starting Logic Chain Generation for {len(tasks)} samples...")
    results = await tqdm_asyncio.gather(*tasks)

    success_results = []
    failed_results = []

    for passed, failed in results:
        if passed:
            success_results.append(passed)
        else:
            failed_results.append(failed)

    print(f"[INFO] Saving {len(success_results)} VALID items to {OUTPUT_FILE}...")
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(success_results, f, indent=2, ensure_ascii=False)

    if failed_results:
        print(f"[INFO] Saving {len(failed_results)} FAILED items to {FAILED_FILE}...")
        with open(FAILED_FILE, "w", encoding="utf-8") as f:
            json.dump(failed_results, f, indent=2, ensure_ascii=False)

    print(f"\n[INFO] Logic Chain Task Complete.")

In [None]:
if 'LOGIC_CHAIN_PROMPT_TEMPLATE' not in globals():
    print("[ERROR] Please define LOGIC_CHAIN_PROMPT_TEMPLATE first!")
else:
    await main_logic_chain_gen()

In [None]:
from tqdm.asyncio import tqdm_asyncio
async def generate_logic_based_qa_async(logic_chain, visual_evidence, original_text):
    prompt = OPEN_ENDED_QA_GENERATION_PROMPT_TEMPLATE.format(
        logic_chain=logic_chain,
        visual_evidence=visual_evidence,
        original_text=original_text
    )
    
    max_retries = 3
    for attempt in range(max_retries):
        try:
            content = openai_pack_content(prompt, None)  
            result = await get_response_async([], content, local_text_model, local_text_client)

            full_response = result["content"].strip()

            # process response
            result = process_qa_output(full_response)
            return result
        except Exception as e:
            print(f"--- [Error] (Attempt {attempt + 1}/{max_retries}): {e}")
            if attempt == max_retries - 1:
                print("--- Max retries reached for logic-based QA generation.")
                raise e
            await asyncio.sleep(5)

async def run_logic_based_qa_task(item):
    idx = item.get("original_sample_index")
    logic_chain = item.get("logic_chain", {})
    visual_qa_data = item.get("visual_qa", {})
    context_str = item.get("context", "")

    obs_str, int_str = split_caption_data(item)

    try:
        qa_pair = await generate_logic_based_qa_async(
            json.dumps(logic_chain[0], ensure_ascii=False, indent=2),
            obs_str,
            context_str
        )

    except Exception as e:
        return None, {"original_sample_index": idx, "error": f"Gen failed: {e}"}
    if not qa_pair:
        return None, {"original_sample_index": idx, "error": "No QA returned from LLM"}
    
    result_data = {
        "original_sample_index": int(idx) if idx is not None else -1,
        "basic_qa": qa_pair,
        "input_observation": obs_str,
        "input_context": context_str,
        "input_logic_chain": logic_chain
    }

    return result_data, None

async def main_logic_based_qa_gen():
    INPUT_FILE = "step5_logic_chain_output.json"
    OUTPUT_FILE = "step6_logic_based_qa_output.json"
    FAILED_FILE = "step6_logic_based_qa_failed.json"
    
    print(f"[INFO] Loading data from {INPUT_FILE}...")
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data_list = json.load(f)
        
    CONCURRENT_LIMIT = 10
    semaphore = asyncio.Semaphore(CONCURRENT_LIMIT)
    tasks = []
    for item in data_list:
        tasks.append(run_logic_based_qa_task(item))
        
    print(f"[INFO] Starting Logic-Based QA Generation for {len(tasks)} samples...")
    results = await tqdm_asyncio.gather(*tasks)
    success_results = []
    failed_results = []
    for passed, failed in results:
        if passed:
            success_results.append(passed)
        else:
            failed_results.append(failed)
    print(f"[INFO] Saving {len(success_results)} VALID items to {OUTPUT_FILE}...")
    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        json.dump(success_results, f, indent=2, ensure_ascii=False)
    if failed_results:
        print(f"[INFO] Saving {len(failed_results)} FAILED items to {FAILED_FILE}...")
        with open(FAILED_FILE, "w", encoding="utf-8") as f:
            json.dump(failed_results, f, indent=2, ensure_ascii=False)
    print(f"\n[INFO] Logic-Based QA Task Complete.")
    


In [None]:
await main_logic_based_qa_gen()