In [3]:
import os
import yaml
import difflib
from collections import defaultdict
from pprint import pprint
from IPython.display import HTML
# ---------------------------------------------
# STEP 1: Load YAML files
# ---------------------------------------------

# Set your paths
base_path = 'data/keno_1000/annotations'  # <-- Change this
version_dirs = ['v1.0', 'v1.1_Markus', 'v1.2', 'v1.3']

# Build nested dictionary {filename: {version: yaml}}
version_files = {}

def load_yaml(filepath):
    with open(filepath, 'r') as f:
        return yaml.safe_load(f)

for version in version_dirs:
    version_path = os.path.join(base_path, version)
    version_files[version] = set([
        fname for fname in os.listdir(version_path) if fname.endswith('.yaml')
    ])

# Get intersection of filenames
common_files = set.intersection(*version_files.values())
print(f"Found {len(common_files)} common files present in all versions.")

# Load YAML data only for common files
data = defaultdict(dict)
for version in version_dirs:
    version_path = os.path.join(base_path, version)
    for fname in common_files:
        full_path = os.path.join(version_path, fname)
        yaml_data = load_yaml(full_path)
        data[fname][version] = yaml_data

# ---------------------------------------------
# STEP 2: Diff function per string
# ---------------------------------------------

def html_diff(a, b):
    """Return highlighted HTML diff between two strings."""
    if not a and not b:  # Both empty
        return ""
    if not a:  # First string is empty
        return f'<span style="background-color: #a6f3a6">{b}</span>'
    if not b:  # Second string is empty
        return f'<span style="background-color: #f3a6a6">{a}</span>'
    if a == b:  # Strings are identical
        return b

    # Split into words for better matching
    def split_into_words(text):
        return text.replace('\n', ' \n ').split()

    words_a = split_into_words(a)
    words_b = split_into_words(b)
    
    matcher = difflib.SequenceMatcher(None, words_a, words_b)
    output = []
    
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag == 'equal':
            output.append(' '.join(words_b[j1:j2]))
        elif tag == 'insert':
            output.append(f'<span style="background-color: #a6f3a6">{" ".join(words_b[j1:j2])}</span>')
        elif tag == 'delete':
            output.append(f'<span style="background-color: #f3a6a6">{" ".join(words_a[i1:i2])}</span>')
        elif tag == 'replace':
            output.append(f'<span style="background-color: #f3e7a6">{" ".join(words_b[j1:j2])}</span>')
    
    # Rejoin text and fix newlines
    return ' '.join(output).replace(' \n ', '\n')

# ---------------------------------------------
# STEP 3: Build HTML report
# ---------------------------------------------

# Update the html_output initialization with a legend section:
html_output = """
<html><head>
<style>
.legend {
    margin: 20px 0;
    padding: 10px;
    background-color: #f8f9fa;
    border: 1px solid #ddd;
}
.legend-item {
    display: inline-block;
    margin-right: 20px;
}
.legend-color {
    display: inline-block;
    width: 20px;
    height: 20px;
    margin-right: 5px;
    vertical-align: middle;
}
table { border-collapse: collapse; width: 100%; table-layout: fixed; }
th, td { 
    border: 1px solid #999; 
    padding: 0.5rem; 
    vertical-align: top; 
    width: 25%;
}
th { background-color: #eee; }
pre { 
    white-space: pre-wrap; 
    margin: 0;
    word-wrap: break-word;
}
td:first-child { width: 10%; }
td:not(:first-child) { width: 30%; }
.search-container {
    margin: 20px 0;
    padding: 10px;
    background-color: #f8f9fa;
    border: 1px solid #ddd;
    display: flex;
    gap: 10px;
    align-items: center;
}
.search-container input {
    width: 300px;
    padding: 8px;
    font-size: 16px;
    border: 1px solid #ddd;
    border-radius: 4px;
}
.search-button {
    padding: 8px 16px;
    font-size: 16px;
    background-color: #007bff;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
}
.search-button:hover {
    background-color: #0056b3;
}
.report-section {
    display: block;
}
.report-section.hidden {
    display: none;
}
</style>
<script>
function filterUIDs() {
    const searchTerm = document.getElementById('searchInput').value.toLowerCase();
    const sections = document.getElementsByClassName('report-section');
    
    for (let section of sections) {
        const uid = section.getAttribute('data-uid').toLowerCase();
        if (uid.includes(searchTerm)) {
            section.classList.remove('hidden');
        } else {
            section.classList.add('hidden');
        }
    }
}
</script>
</head><body>

<div class="legend">
    <div class="legend-item">
        <div class="legend-color" style="background-color: #a6f3a6;"></div>
        <span>Additional content</span>
    </div>
    <div class="legend-item">
        <div class="legend-color" style="background-color: #f3a6a6;"></div>
        <span>Missing content</span>
    </div>
    <div class="legend-item">
        <div class="legend-color" style="background-color: #f3e7a6;"></div>
        <span>Changed content</span>
    </div>
</div>
<div class="legend">
    <!-- ...existing legend items... -->
</div>

<div class="search-container">
    <input type="text" 
           id="searchInput" 
           placeholder="Search for UID..." 
           autocomplete="off">
    <button class="search-button" 
            onclick="filterUIDs()">
        Search
    </button>
</div>
"""


# Update the section where we process each file
for fname, versions in data.items():
    # Start of report section
    html_output += f'<div class="report-section" data-uid="{fname}">'
    html_output += f'<h2>Image ID: {fname}</h2>'
    
    v1_0 = versions.get('v1.0')
    base = versions.get('v1.1_Markus')
    v1_2 = versions.get('v1.2')
    v1_3 = versions.get('v1.3')

    reasoning_v1_0 = v1_0.get('reasoning', {}).get('Reasoning', []) if v1_0 else []
    reasoning_base = base.get('reasoning', {}).get('Reasoning', [])
    reasoning_v1_2 = v1_2.get('reasoning', {}).get('Reasoning', []) if v1_2 else []
    reasoning_v1_3 = v1_3.get('reasoning', {}).get('Reasoning', []) if v1_3 else []

    max_steps = max(len(reasoning_v1_0), len(reasoning_base), len(reasoning_v1_2), len(reasoning_v1_3))

    # Process all reasoning steps
    for step in range(max_steps):
        step_v1_0 = reasoning_v1_0[step] if step < len(reasoning_v1_0) else {}
        step_base = reasoning_base[step] if step < len(reasoning_base) else {}
        step_v1_2 = reasoning_v1_2[step] if step < len(reasoning_v1_2) else {}
        step_v1_3 = reasoning_v1_3[step] if step < len(reasoning_v1_3) else {}

        desc_v1_0 = step_v1_0.get('Description', '')
        desc_base = step_base.get('Description', '')
        desc_v1_2 = step_v1_2.get('Description', '')
        desc_v1_3 = step_v1_3.get('Description', '')

        action_v1_0 = '\n'.join(step_v1_0.get('Action', []))
        action_base = '\n'.join(step_base.get('Action', []))
        action_v1_2 = '\n'.join(step_v1_2.get('Action', []))
        action_v1_3 = '\n'.join(step_v1_3.get('Action', []))


        result_v1_0 = step_v1_0.get('Result', '')
        result_base = step_base.get('Result', '')
        result_v1_2 = step_v1_2.get('Result', '')
        result_v1_3 = step_v1_3.get('Result', '')

        # Add FinalAssessment if available
        final_assessment_v1_0 = step_v1_0.get('FinalAssessment', '')
        final_assessment_base = step_base.get('FinalAssessment', '')
        final_assessment_v1_2 = step_v1_2.get('FinalAssessment', '')
        final_assessment_v1_3 = step_v1_3.get('FinalAssessment', '')

        html_output += '<table>'
        html_output += f'<tr><th colspan="5">Reasoning Step {step+1}</th></tr>'
        html_output += '<tr><th>Section</th><th>v1.0 (Llama-3-8B)</th><th>v1.1 (Markus) (reference)</th><th>v1.2 (Llama-3-70B)</th><th>v1.3 (GPT4-Turbo)</th></tr>'

        # Description
        html_output += '<tr><td><b>Description</b></td>'
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{desc_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_3)}</pre></td></tr>'

        # Action
        html_output += '<tr><td><b>Action</b></td>'
        html_output += f'<td><pre>{html_diff(action_base, action_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{action_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(action_base, action_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(action_base, action_v1_3)}</pre></td></tr>'

        # Result
        html_output += '<tr><td><b>Result</b></td>'
        html_output += f'<td><pre>{html_diff(result_base, result_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{result_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(result_base, result_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(result_base, result_v1_3)}</pre></td></tr>'

        html_output += '</table><br>'

    # Add this after the reasoning steps loop but before closing the report-section div
    html_output += '<table>'
    html_output += '<tr><th colspan="5">Final Assessment</th></tr>'
    html_output += '<tr><th>Section</th><th>v1.0 (Llama-3-8B)</th><th>v1.1 (Markus) (reference)</th><th>v1.2 (Llama-3-70B)</th><th>v1.3 (GPT4-Turbo)</th></tr>'

    # Get FinalAssessment from each version
    final_v1_0 = v1_0.get('reasoning', {}).get('FinalAssessment', '') if v1_0 else ''
    final_base = base.get('reasoning', {}).get('FinalAssessment', '')
    final_v1_2 = v1_2.get('reasoning', {}).get('FinalAssessment', '') if v1_2 else ''
    final_v1_3 = v1_3.get('reasoning', {}).get('FinalAssessment', '') if v1_3 else ''

    # Format as string if list
    if isinstance(final_v1_0, list): final_v1_0 = '\n'.join(final_v1_0)
    if isinstance(final_base, list): final_base = '\n'.join(final_base)
    if isinstance(final_v1_2, list): final_v1_2 = '\n'.join(final_v1_2)
    if isinstance(final_v1_3, list): final_v1_3 = '\n'.join(final_v1_3)

    html_output += '<tr><td><b>FinalAssessment</b></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_0)}</pre></td>'
    html_output += f'<td><pre>{final_base}</pre></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_2)}</pre></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_3)}</pre></td></tr>'

    html_output += '</table><br>'
    html_output += '</div>'  # Close report-section div

# ---------------------------------------------
# STEP 4: Save full HTML report
# ---------------------------------------------

output_path = 'reasoning_diff_report.html'
with open(output_path, 'w') as f:
    f.write(html_output)

print(f"✅ HTML diff report generated: {output_path}")

Found 10 common files present in all versions.


✅ HTML diff report generated: reasoning_diff_report.html


In [4]:
import yaml
import json
from datasets import Dataset, DatasetDict
import os
from typing import Dict, List
from tqdm import tqdm

def load_yaml_file(filepath: str) -> Dict:
    """Load and validate a single YAML file."""
    try:
        with open(filepath, 'r') as f:
            data = yaml.safe_load(f)
            
        # Basic validation
        required_fields = ['annotator', 'metadata', 'reasoning']
        if not all(field in data for field in required_fields):
            print(f"Warning: Missing required fields in {filepath}")
            # skip this file
            return {}
        return data
    except Exception as e:
        print(f"Error loading {filepath}: {e}")
        return {}

def process_yaml_files(folder_path: str) -> Dataset:
    """Process all YAML files in folder and create dataset."""
    data_dict = {
        'UID': [], 'Annotator': [], 'Reasoning': [], 'FinalAssessment': [],
        'Split': [], 'PatientID': [], 'PhysicianID': [], 
        'StudyDate': [], 'Age': [], 'Sex': [], 
        'HeartSize': [], 'PulmonaryCongestion': [],
        'PleuralEffusion_Right': [], 'PleuralEffusion_Left': [],
        'PulmonaryOpacities_Right': [], 'PulmonaryOpacities_Left': [],
        'Atelectasis_Right': [], 'Atelectasis_Left': []
    }
    
    yaml_files = [f for f in os.listdir(folder_path) if f.endswith('.yaml')]
    
    for yaml_file in tqdm(yaml_files, desc="Processing YAML files"):
        data = load_yaml_file(os.path.join(folder_path, yaml_file))
        if not data or 'metadata' not in data:
            continue
        
        # Extract metadata
        metadata = data['metadata']
        
        data_dict['UID'].append(metadata.get('UID', ''))
        data_dict['Annotator'].append(data.get('annotator', ''))
        data_dict['Split'].append(metadata.get('Split', ''))
        data_dict['PatientID'].append(metadata.get('PatientID', ''))
        data_dict['PhysicianID'].append(metadata.get('PhysicianID', ''))
        data_dict['StudyDate'].append(metadata.get('StudyDate', ''))
        data_dict['Age'].append(metadata.get('Age', -1))
        data_dict['Sex'].append(metadata.get('Sex', ''))
        
        # Extract findings
        data_dict['HeartSize'].append(metadata.get('HeartSize', -1))
        data_dict['PulmonaryCongestion'].append(metadata.get('PulmonaryCongestion', -1))
        data_dict['PleuralEffusion_Right'].append(metadata.get('PleuralEffusion_Right', -1))
        data_dict['PleuralEffusion_Left'].append(metadata.get('PleuralEffusion_Left', -1))
        data_dict['PulmonaryOpacities_Right'].append(metadata.get('PulmonaryOpacities_Right', -1))
        data_dict['PulmonaryOpacities_Left'].append(metadata.get('PulmonaryOpacities_Left', -1))
        data_dict['Atelectasis_Right'].append(metadata.get('Atelectasis_Right', -1))
        data_dict['Atelectasis_Left'].append(metadata.get('Atelectasis_Left', -1))
        
        # Extract reasoning - THIS IS WHERE THE FIX IS NEEDED
        reasoning = data.get('reasoning', {})
        
        # Serialize reasoning to JSON strings to avoid mixed-type issues
        reasoning_data = reasoning.get('Reasoning', [])
        data_dict['Reasoning'].append(json.dumps(reasoning_data))
        
        final_assessment = reasoning.get('FinalAssessment', [])
        data_dict['FinalAssessment'].append(json.dumps(final_assessment))

    # Create dataset
    dataset = Dataset.from_dict(data_dict)
    
    # Split into train/val/test
    dataset_dict = DatasetDict({
        'train': dataset.filter(lambda x: x['Split'] == 'train'),
        'val': dataset.filter(lambda x: x['Split'] == 'val'),
        'test': dataset.filter(lambda x: x['Split'] == 'test')
    })
    
    print(f"Total samples: {len(dataset)}")
    print(f"Train: {len(dataset_dict['train'])}")
    print(f"val: {len(dataset_dict['val'])}")
    print(f"Test: {len(dataset_dict['test'])}")
    
    return dataset_dict

In [5]:
folder_path = 'data/keno_1000/annotations/v2.0'
dataset = process_yaml_files(folder_path)
print("Dataset created successfully!")
# add one sample for validation and test respectivaly
dataset["test"] = dataset["val"]
# Upload to HuggingFace Hub
dataset.push_to_hub("jomoll/TAIX-reasoning-v2.0", private=True)

Processing YAML files:  17%|█▋        | 657/3977 [00:08<00:42, 77.69it/s]



Processing YAML files:  47%|████▋     | 1858/3977 [00:25<00:27, 75.78it/s]



Processing YAML files:  52%|█████▏    | 2059/3977 [00:27<00:25, 75.05it/s]



Processing YAML files:  57%|█████▋    | 2252/3977 [00:30<00:22, 76.64it/s]



Processing YAML files:  63%|██████▎   | 2493/3977 [00:33<00:19, 76.02it/s]



Processing YAML files:  73%|███████▎  | 2894/3977 [00:39<00:14, 75.83it/s]



Processing YAML files:  80%|████████  | 3191/3977 [00:43<00:10, 73.97it/s]



Processing YAML files:  82%|████████▏ | 3248/3977 [00:44<00:09, 74.12it/s]



Processing YAML files:  98%|█████████▊| 3896/3977 [00:52<00:01, 75.42it/s]



Processing YAML files: 100%|██████████| 3977/3977 [00:54<00:00, 73.59it/s]


Filter:   0%|          | 0/3968 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3968 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3968 [00:00<?, ? examples/s]

Total samples: 3968
Train: 3655
val: 313
Test: 0
Dataset created successfully!


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/jomoll/TAIX-reasoning-v2.0/commit/cf27185c624608591e2ee128ba445b3b7e830e2f', commit_message='Upload dataset', commit_description='', oid='cf27185c624608591e2ee128ba445b3b7e830e2f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/jomoll/TAIX-reasoning-v2.0', endpoint='https://huggingface.co', repo_type='dataset', repo_id='jomoll/TAIX-reasoning-v2.0'), pr_revision=None, pr_num=None)

In [6]:
from datasets import load_dataset
from datasets import Dataset
import pandas as pd

def create_merged_dataset(split: str = "train") -> Dataset:
    # Load both datasets
    image_dataset = load_dataset("TLAIM/TAIX-Ray", name="default")[split]
    label_dataset = load_dataset("jomoll/TAIX-reasoning-v2.0", cache_dir=None)[split]

    # Map UID to reasoning entry
    uid2label = {row['UID']: row for row in label_dataset}

    def enrich_with_labels(example):
        uid = example["UID"]
        label_row = uid2label.get(uid)
        # Add Reasoning if available, else mark as None
        example["Reasoning"] = label_row["Reasoning"] if label_row else None
        return example

    # Apply label enrichment
    merged_dataset = image_dataset.map(enrich_with_labels)

    # Filter only samples with a reasoning trace
    merged_dataset = merged_dataset.filter(lambda x: x["Reasoning"] is not None)
    print(f"Final dataset size: {len(merged_dataset)}")

    # Sample print
    print("Sample:")
    print(f"UID: {merged_dataset[0]['UID']}")
    print(f"Reasoning: {merged_dataset[0]['Reasoning'][:100]}...")

    return merged_dataset

train_dataset = create_merged_dataset("train")
val_dataset = create_merged_dataset("val")
test_dataset = val_dataset

dataset = DatasetDict({
    'train': train_dataset,
    'val': val_dataset,
    'test': test_dataset
})


Resolving data files:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/23 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/19 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/23 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/19 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/73 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/23 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/19 [00:00<?, ?it/s]

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3655 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/313 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/313 [00:00<?, ? examples/s]

Map:   0%|          | 0/137595 [00:00<?, ? examples/s]

Filter:   0%|          | 0/137595 [00:00<?, ? examples/s]

Final dataset size: 3655
Sample:
UID: ebc3c8d0e455dee5118c7aedf93e2a4313639a688b4cb7b93068e54bcc705d4c
Reasoning: [{"Step": {"Description": "Assess the image quality.", "Action": ["I am looking at the chest X-ray i...


Resolving data files:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/23 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/19 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/74 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/23 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/19 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/73 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/23 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/19 [00:00<?, ?it/s]

Final dataset size: 313
Sample:
UID: b6c2215aac6e552fa54e483b8e2a66c1d6ef3b481c75125e345ad6c27807bf4f
Reasoning: [{"Step": {"Description": "Assess the image quality.", "Action": ["I am looking at the chest X-ray i...


In [7]:
dataset.push_to_hub("jomoll/TAIX-reasoning-v2.1", private=True)

Uploading the dataset shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/1828 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/19 [00:00<?, ?ba/s]

Map:   0%|          | 0/1827 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/19 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/313 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/313 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/jomoll/TAIX-reasoning-v2.1/commit/b815534b6939043528504714b81fc5da812ecfbb', commit_message='Upload dataset', commit_description='', oid='b815534b6939043528504714b81fc5da812ecfbb', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/jomoll/TAIX-reasoning-v2.1', endpoint='https://huggingface.co', repo_type='dataset', repo_id='jomoll/TAIX-reasoning-v2.1'), pr_revision=None, pr_num=None)

In [8]:
# delete all files in the data/keno_1000/annotations/v2.1 folder that have no 'Reasoning' section
import yaml
import os
def delete_files_without_reasoning(folder_path: str):
    """Delete all YAML files that do not have a 'Reasoning' section."""
    yaml_files = [f for f in os.listdir(folder_path) if f.endswith('.yaml')]
    
    for yaml_file in yaml_files:
        file_path = os.path.join(folder_path, yaml_file)
        with open(file_path, 'r') as f:
            data = yaml.safe_load(f)
        
        # Check if 'reasoning' section exists
        if not data.get('reasoning', {}).get('Reasoning'):
            print(f"Deleting {file_path} - no 'Reasoning' section found.")
            os.remove(file_path)

# delete all files that have 'annotator: gemini-2.5-flash-lite-preview-06-17'
def delete_files_with_gemini_annotator(folder_path: str):
    """Delete all YAML files that have 'annotator: gemini-2.5-flash-lite-preview-06-17'."""
    yaml_files = [f for f in os.listdir(folder_path) if f.endswith('.yaml')]
    
    for yaml_file in yaml_files:
        file_path = os.path.join(folder_path, yaml_file)
        with open(file_path, 'r') as f:
            data = yaml.safe_load(f)
        
        # Check if 'annotator' is 'gemini-2.5-flash-lite-preview-06-17'
        if data.get('annotator') == 'gemini-2.5-flash-lite-preview-06-17':
            print(f"Deleting {file_path} - annotator is gemini-2.5-flash-lite-preview-06-17.")
            os.remove(file_path)



# Specify the folder path
folder_path = 'data/keno_1000/annotations/v2.2'
delete_files_without_reasoning(folder_path)
#delete_files_with_gemini_annotator(folder_path)
