In [None]:
import pandas as pd
from IPython.display import display_html, HTML

def display_images_with_text_below(df):
    html = """<table border='1' style='border-collapse: collapse; text-align: center;'>
              <tr><th>Query Image</th><th>Groundtruth Image</th></tr>"""

    for _, row in df.iterrows():
        html += "<tr>"

        # Query image and text below
        html += f"""
            <td>
                <img src="{row['query']}" style="width:300px;"><br>
                <div style="padding: 4px; word-break: break-word;">{row['query']}</div>
            </td>
        """

        html += "</tr>"

    html += "</table>"
    display_html(HTML(html))

# Load the TSV
df = pd.read_csv('unmatched_queries.tsv', sep='\t')

# Construct full image paths
df['query'] = '../data/test_suite/imgs/' + df['query'] + '.png'

# Display
display_images_with_text_below(df)


In [44]:
import pandas as pd
import glob

# Path to your TSV files (adjust the pattern as needed)
tsv_files = glob.glob("components/*.tsv")

all_dfs = []
total_rows = 0

print("File lengths (after header skip and cleaning):")
for file in tsv_files:
    # Read file skipping header row
    df = pd.read_csv(file, sep='\t', skiprows=1, header=None, engine='python', encoding='utf-8', on_bad_lines='skip')
    df = df.dropna(how='all')  # Remove completely empty rows
    print(f"{file}: {len(df)} rows")
    all_dfs.append(df)
    total_rows += len(df)

# Concatenate all DataFrames
combined_df = pd.concat(all_dfs, ignore_index=True)

# Keep and rename only the first two columns
combined_df = combined_df.iloc[:, :2]
combined_df.columns = ['query', 'gt']

# Drop rows with missing query or gt_extended
combined_df = combined_df.dropna(subset=['query', 'gt'])

# Sort alphabetically by 'query'
combined_df = combined_df.sort_values(by='query').reset_index(drop=True)

# Final length
print(f"\nTotal concatenated rows: {len(combined_df)}")

# Save to TSV
combined_df.to_csv("query_groundtruth.tsv", sep='\t', index=False)


File lengths (after header skip and cleaning):
components/query_GT_p9.tsv: 12 rows
components/query_GT_p8.tsv: 18 rows
components/query_GT_p2_chromoscope.tsv: 1924 rows
components/query_GT_p10.tsv: 4 rows
components/query_GT_p11.tsv: 2 rows
components/query_GT_p6.tsv: 6 rows
components/addition4.tsv: 36 rows
components/query_GT_p7.tsv: 14 rows
components/query_GT_p5.tsv: 7 rows
components/query_GT_p4.tsv: 133 rows
components/addition2.tsv: 36 rows
components/addition3.tsv: 58 rows
components/query_GT_p1.tsv: 1606 rows
components/query_GT_p3.tsv: 117 rows
components/addition1.tsv: 54 rows

Total concatenated rows: 4027


In [6]:
import pandas as pd

def generate_images_with_text_html(df):
    html = """<html><head><meta charset="UTF-8"></head><body>
              <table border='1' style='border-collapse: collapse; text-align: center;'>
              <tr><th>Original</th><th>Query</th><th>Groundtruth</th></tr>"""

    for _, row in df.iterrows():
        html += "<tr>"

        # Original image and text below
        html += f"""
            <td>
                <img src="{row['image_original']}" style="width:300px;"><br>
                <div style="padding: 4px; word-break: break-word;">{row['original']}</div>
            </td>
        """

        # Query image and text below
        html += f"""
            <td>
                <img src="{row['image']}" style="width:300px;"><br>
                <div style="padding: 4px; word-break: break-word;">{row['query']}</div>
            </td>
        """

        # Groundtruth image and text below
        html += f"""
            <td>
                <img src="{row['image_groundtruth']}" style="width:300px;"><br>
                <div style="padding: 4px; word-break: break-word;">{row['groundtruth']}</div>
            </td>
        """

        html += "</tr>"

    html += "</table></body></html>"
    return html

# Load the TSV
df = pd.read_csv('../groundtruth.tsv', sep='\t')

# Construct full image paths
df['image'] = '../../data/test_suite/imgs/' + df['query'] + '.png'
df['image_original'] = '../../data/unified/imgs/' + df['original'] + '.png'
df['image_groundtruth'] = '../../data/unified/imgs/' + df['groundtruth'] + '.png'

# Generate HTML
html_output = generate_images_with_text_html(df)

# Save to HTML file
with open('groundtruth.html', 'w', encoding='utf-8') as f:
    f.write(html_output)

print("HTML file saved as 'groundtruth.html'")


HTML file saved as 'groundtruth.html'
