In [48]:
import os
import yaml
import difflib
from collections import defaultdict
from pprint import pprint
from IPython.display import HTML
# ---------------------------------------------
# STEP 1: Load YAML files
# ---------------------------------------------

# Set your paths
base_path = 'data/keno_1000/annotations'  # <-- Change this
version_dirs = ['v1.0', 'v1.1_Markus', 'v1.2', 'v1.3']

# Build nested dictionary {filename: {version: yaml}}
version_files = {}

def load_yaml(filepath):
    with open(filepath, 'r') as f:
        return yaml.safe_load(f)

for version in version_dirs:
    version_path = os.path.join(base_path, version)
    version_files[version] = set([
        fname for fname in os.listdir(version_path) if fname.endswith('.yaml')
    ])

# Get intersection of filenames
common_files = set.intersection(*version_files.values())
print(f"Found {len(common_files)} common files present in all versions.")

# Load YAML data only for common files
data = defaultdict(dict)
for version in version_dirs:
    version_path = os.path.join(base_path, version)
    for fname in common_files:
        full_path = os.path.join(version_path, fname)
        yaml_data = load_yaml(full_path)
        data[fname][version] = yaml_data

# ---------------------------------------------
# STEP 2: Diff function per string
# ---------------------------------------------

def html_diff(a, b):
    """Return highlighted HTML diff between two strings."""
    if not a and not b:  # Both empty
        return ""
    if not a:  # First string is empty
        return f'<span style="background-color: #a6f3a6">{b}</span>'
    if not b:  # Second string is empty
        return f'<span style="background-color: #f3a6a6">{a}</span>'
    if a == b:  # Strings are identical
        return b

    # Split into words for better matching
    def split_into_words(text):
        return text.replace('\n', ' \n ').split()

    words_a = split_into_words(a)
    words_b = split_into_words(b)
    
    matcher = difflib.SequenceMatcher(None, words_a, words_b)
    output = []
    
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag == 'equal':
            output.append(' '.join(words_b[j1:j2]))
        elif tag == 'insert':
            output.append(f'<span style="background-color: #a6f3a6">{" ".join(words_b[j1:j2])}</span>')
        elif tag == 'delete':
            output.append(f'<span style="background-color: #f3a6a6">{" ".join(words_a[i1:i2])}</span>')
        elif tag == 'replace':
            output.append(f'<span style="background-color: #f3e7a6">{" ".join(words_b[j1:j2])}</span>')
    
    # Rejoin text and fix newlines
    return ' '.join(output).replace(' \n ', '\n')

# ---------------------------------------------
# STEP 3: Build HTML report
# ---------------------------------------------

# Update the html_output initialization with a legend section:
html_output = """
<html><head>
<style>
.legend {
    margin: 20px 0;
    padding: 10px;
    background-color: #f8f9fa;
    border: 1px solid #ddd;
}
.legend-item {
    display: inline-block;
    margin-right: 20px;
}
.legend-color {
    display: inline-block;
    width: 20px;
    height: 20px;
    margin-right: 5px;
    vertical-align: middle;
}
table { border-collapse: collapse; width: 100%; table-layout: fixed; }
th, td { 
    border: 1px solid #999; 
    padding: 0.5rem; 
    vertical-align: top; 
    width: 25%;
}
th { background-color: #eee; }
pre { 
    white-space: pre-wrap; 
    margin: 0;
    word-wrap: break-word;
}
td:first-child { width: 10%; }
td:not(:first-child) { width: 30%; }
.search-container {
    margin: 20px 0;
    padding: 10px;
    background-color: #f8f9fa;
    border: 1px solid #ddd;
    display: flex;
    gap: 10px;
    align-items: center;
}
.search-container input {
    width: 300px;
    padding: 8px;
    font-size: 16px;
    border: 1px solid #ddd;
    border-radius: 4px;
}
.search-button {
    padding: 8px 16px;
    font-size: 16px;
    background-color: #007bff;
    color: white;
    border: none;
    border-radius: 4px;
    cursor: pointer;
}
.search-button:hover {
    background-color: #0056b3;
}
.report-section {
    display: block;
}
.report-section.hidden {
    display: none;
}
</style>
<script>
function filterUIDs() {
    const searchTerm = document.getElementById('searchInput').value.toLowerCase();
    const sections = document.getElementsByClassName('report-section');
    
    for (let section of sections) {
        const uid = section.getAttribute('data-uid').toLowerCase();
        if (uid.includes(searchTerm)) {
            section.classList.remove('hidden');
        } else {
            section.classList.add('hidden');
        }
    }
}
</script>
</head><body>

<div class="legend">
    <div class="legend-item">
        <div class="legend-color" style="background-color: #a6f3a6;"></div>
        <span>Additional content</span>
    </div>
    <div class="legend-item">
        <div class="legend-color" style="background-color: #f3a6a6;"></div>
        <span>Missing content</span>
    </div>
    <div class="legend-item">
        <div class="legend-color" style="background-color: #f3e7a6;"></div>
        <span>Changed content</span>
    </div>
</div>
<div class="legend">
    <!-- ...existing legend items... -->
</div>

<div class="search-container">
    <input type="text" 
           id="searchInput" 
           placeholder="Search for UID..." 
           autocomplete="off">
    <button class="search-button" 
            onclick="filterUIDs()">
        Search
    </button>
</div>
"""


# Update the section where we process each file
for fname, versions in data.items():
    # Start of report section
    html_output += f'<div class="report-section" data-uid="{fname}">'
    html_output += f'<h2>Image ID: {fname}</h2>'
    
    v1_0 = versions.get('v1.0')
    base = versions.get('v1.1_Markus')
    v1_2 = versions.get('v1.2')
    v1_3 = versions.get('v1.3')

    reasoning_v1_0 = v1_0.get('reasoning', {}).get('Reasoning', []) if v1_0 else []
    reasoning_base = base.get('reasoning', {}).get('Reasoning', [])
    reasoning_v1_2 = v1_2.get('reasoning', {}).get('Reasoning', []) if v1_2 else []
    reasoning_v1_3 = v1_3.get('reasoning', {}).get('Reasoning', []) if v1_3 else []

    max_steps = max(len(reasoning_v1_0), len(reasoning_base), len(reasoning_v1_2), len(reasoning_v1_3))

    # Process all reasoning steps
    for step in range(max_steps):
        step_v1_0 = reasoning_v1_0[step] if step < len(reasoning_v1_0) else {}
        step_base = reasoning_base[step] if step < len(reasoning_base) else {}
        step_v1_2 = reasoning_v1_2[step] if step < len(reasoning_v1_2) else {}
        step_v1_3 = reasoning_v1_3[step] if step < len(reasoning_v1_3) else {}

        desc_v1_0 = step_v1_0.get('Description', '')
        desc_base = step_base.get('Description', '')
        desc_v1_2 = step_v1_2.get('Description', '')
        desc_v1_3 = step_v1_3.get('Description', '')

        action_v1_0 = '\n'.join(step_v1_0.get('Action', []))
        action_base = '\n'.join(step_base.get('Action', []))
        action_v1_2 = '\n'.join(step_v1_2.get('Action', []))
        action_v1_3 = '\n'.join(step_v1_3.get('Action', []))


        result_v1_0 = step_v1_0.get('Result', '')
        result_base = step_base.get('Result', '')
        result_v1_2 = step_v1_2.get('Result', '')
        result_v1_3 = step_v1_3.get('Result', '')

        # Add FinalAssessment if available
        final_assessment_v1_0 = step_v1_0.get('FinalAssessment', '')
        final_assessment_base = step_base.get('FinalAssessment', '')
        final_assessment_v1_2 = step_v1_2.get('FinalAssessment', '')
        final_assessment_v1_3 = step_v1_3.get('FinalAssessment', '')

        html_output += '<table>'
        html_output += f'<tr><th colspan="5">Reasoning Step {step+1}</th></tr>'
        html_output += '<tr><th>Section</th><th>v1.0 (Llama-3-8B)</th><th>v1.1 (Markus) (reference)</th><th>v1.2 (Llama-3-70B)</th><th>v1.3 (GPT4-Turbo)</th></tr>'

        # Description
        html_output += '<tr><td><b>Description</b></td>'
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{desc_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(desc_base, desc_v1_3)}</pre></td></tr>'

        # Action
        html_output += '<tr><td><b>Action</b></td>'
        html_output += f'<td><pre>{html_diff(action_base, action_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{action_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(action_base, action_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(action_base, action_v1_3)}</pre></td></tr>'

        # Result
        html_output += '<tr><td><b>Result</b></td>'
        html_output += f'<td><pre>{html_diff(result_base, result_v1_0)}</pre></td>'  # Compare against base
        html_output += f'<td><pre>{result_base}</pre></td>'  # Base version
        html_output += f'<td><pre>{html_diff(result_base, result_v1_2)}</pre></td>'
        html_output += f'<td><pre>{html_diff(result_base, result_v1_3)}</pre></td></tr>'

        html_output += '</table><br>'

    # Add this after the reasoning steps loop but before closing the report-section div
    html_output += '<table>'
    html_output += '<tr><th colspan="5">Final Assessment</th></tr>'
    html_output += '<tr><th>Section</th><th>v1.0 (Llama-3-8B)</th><th>v1.1 (Markus) (reference)</th><th>v1.2 (Llama-3-70B)</th><th>v1.3 (GPT4-Turbo)</th></tr>'

    # Get FinalAssessment from each version
    final_v1_0 = v1_0.get('reasoning', {}).get('FinalAssessment', '') if v1_0 else ''
    final_base = base.get('reasoning', {}).get('FinalAssessment', '')
    final_v1_2 = v1_2.get('reasoning', {}).get('FinalAssessment', '') if v1_2 else ''
    final_v1_3 = v1_3.get('reasoning', {}).get('FinalAssessment', '') if v1_3 else ''

    # Format as string if list
    if isinstance(final_v1_0, list): final_v1_0 = '\n'.join(final_v1_0)
    if isinstance(final_base, list): final_base = '\n'.join(final_base)
    if isinstance(final_v1_2, list): final_v1_2 = '\n'.join(final_v1_2)
    if isinstance(final_v1_3, list): final_v1_3 = '\n'.join(final_v1_3)

    html_output += '<tr><td><b>FinalAssessment</b></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_0)}</pre></td>'
    html_output += f'<td><pre>{final_base}</pre></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_2)}</pre></td>'
    html_output += f'<td><pre>{html_diff(final_base, final_v1_3)}</pre></td></tr>'

    html_output += '</table><br>'
    html_output += '</div>'  # Close report-section div

# ---------------------------------------------
# STEP 4: Save full HTML report
# ---------------------------------------------

output_path = 'reasoning_diff_report.html'
with open(output_path, 'w') as f:
    f.write(html_output)

print(f"✅ HTML diff report generated: {output_path}")

Found 10 common files present in all versions.
✅ HTML diff report generated: reasoning_diff_report.html


In [38]:
def fix_yaml_format(directory):
    # Ensure directory exists
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist!")
        return

    # Get all yaml files
    yaml_files = [f for f in os.listdir(directory) if f.endswith('.yaml')]
    print(f"Found {len(yaml_files)} YAML files")

    for filename in yaml_files:
        filepath = os.path.join(directory, filename)
        print(f"Processing {filename}...")
        
        # Read file content
        with open(filepath, 'r') as file:
            content = file.read()

        # Replace the pattern with correct indentation and no extra newline
        modified_content = re.sub(
            r'(\s+)- Description:', 
            r'\1- Step:\n\1  Description:', 
            content
        )
        
        # Clean up any double newlines that might have been created
        modified_content = re.sub(r'\n\s*\n(\s+Description:)', r'\n\1', modified_content)

        # Write back to file
        with open(filepath, 'w') as file:
            file.write(modified_content)

if __name__ == "__main__":
    yaml_dir = "data/keno_1000/annotations/v1.0"
    fix_yaml_format(yaml_dir)
    print("✅ YAML files have been updated")

Found 1000 YAML files
Processing 3c55095429b09f26d962fb93c59fcd0c27b619c2dbd0e17f646cd02cb9ee1398.yaml...
Processing aa711321ced00f08a63e5e379572f10280ef256930bc911bcccbdc1adc05c9d4.yaml...
Processing 1c018ee66f3d781c7583e20d42fcdaf332a043b275c231b0546ba5077a9f43f3.yaml...
Processing b7cb0014be14fe86f7da242d5adb54fd5380c42b9629af458bd49af0647e6aea.yaml...
Processing 11287ce58df1fe2aab670f9f17d276264ba70d8f7710fdfb90bd60464de8dc0f.yaml...
Processing 1409f3ee767ab4e52f97c2814846cfcbd356d7eda134bcf5e5d96631d734e154.yaml...
Processing 446de4d5faa04823cf31217763c000d2fd26e22ce241c8c73398de47da9005bf.yaml...
Processing f6828db2c60cf5ea307ae9bb8affe0bd6b8434efb8986206194b18b5b887a2ff.yaml...
Processing 8168b8b3e65281b9b5baff6dcf984082e88d92d7c79df4061c34025310192cc4.yaml...
Processing d26d9a51f63e9b2641e175ed8df81031a163a4a21949efb45d4dfbb38b75337a.yaml...
Processing 8b0c4bdb6bb706feb6b18d15e52b3518dba8ffb53d2515acedcd6c58fc60c70f.yaml...
Processing ab060fd7f0f6b3730302072e20c3f987770d455e53a

In [46]:
import os
import yaml

class IndentDumper(yaml.Dumper):
    def increase_indent(self, flow=False, *args, **kwargs):
        return super().increase_indent(flow=flow, indentless=False)

    def indent(self, *args, **kwargs):
        # Reduce the indent level by 2 spaces
        indent = super().indent(*args, **kwargs)
        if len(indent) >= 2:
            return indent[2:]
        return indent

def fix_yaml_indentation(directory):
    yaml_files = [f for f in os.listdir(directory) if f.endswith('.yaml')]
    print(f"Found {len(yaml_files)} YAML files")

    for filename in yaml_files:
        filepath = os.path.join(directory, filename)
        print(f"Processing {filename}...")
        
        with open(filepath, 'r') as file:
            try:
                data = yaml.safe_load(file)
            except yaml.YAMLError as e:
                print(f"Error reading {filename}: {e}")
                continue

        # Fix reasoning structure
        if 'reasoning' in data and 'Reasoning' in data['reasoning']:
            steps = []
            for step in data['reasoning']['Reasoning']:
                if isinstance(step, dict) and 'Step' in step:
                    # Keep the structure but fix indentation
                    steps.append({
                        'Step': {
                            'Description': step['Step'].get('Description', ''),
                            'Action': step['Step'].get('Action', []),
                            'Result': step['Step'].get('Result', '')
                        }
                    })

            data['reasoning']['Reasoning'] = steps

        # Write back with fixed indentation
        with open(filepath, 'w') as file:
            yaml.dump(data, file,
                     Dumper=IndentDumper,
                     default_flow_style=False,
                     sort_keys=False,
                     indent=2,
                     allow_unicode=True,
                     width=float("inf"))

if __name__ == "__main__":
    yaml_dir = "data/keno_1000/annotations/v1.3"
    fix_yaml_indentation(yaml_dir)
    print("✅ YAML files have been updated")

Found 10 YAML files
Processing c1fc327835cf9370489271b920a8be3ac09123677af1eaf2aa2fcb84a803ec9e.yaml...
Processing b8af0db8c69bcec1a99691c75ab99b7e11bb471b07bdfe021947a57381a31f4b.yaml...
Processing dcf7ac6fe3973376643097f1cf11cfc3248e74847c3887802252a6d98d769fed.yaml...
Processing b92b33579601ec85a2fcc2936f02d071a0916e8cfcf2fd962e8d5b64d5136b97.yaml...
Processing c2b3dc407de7216610d11d67f090231d340fcf91113f7a82203296beb92887fd.yaml...
Processing b5c89f524a7564642a8581ee34f291d8c51a9aa3b61391a7af30ac79193e50f1.yaml...
Processing b15366ee19f42997a315fde435db7d16981d077aa6a2438401276cba0e4c101c.yaml...
Processing af6a7575e1e105db01157177c6290a718da9a5a386e291bad4e862c9d1c337c7.yaml...
Processing ba158aae1730e9d95a9ac1e074dd0a80f077b8260b8e5a61e9866740e290701f.yaml...
Processing b0be9a8cd4288d3723c285db53d9c5b0fe0df2ee7e861c36f78d42716ded8437.yaml...
✅ YAML files have been updated
