# TSV Viewer for DOME Recommendations (v31)
This notebook provides an interactive viewer for the dataset: `v31_Dome-Recommendations-ID_Sync_Manual_Simple.tsv`.

**Features:**
- Cycle through records.
- Randomize selection.
- Search by Title.
- Clickable links for DOI, PMID, and PMCID.
- Full field inspection.

In [1]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import random
import os

# Configuration
input_file = 'v31_Dome-Recommendations-ID_Sync_Manual_Simple.tsv'

# Load Data
if os.path.exists(input_file):
    print(f"Loading '{input_file}'...")
    df = pd.read_csv(input_file, sep='\t')
    print(f"Loaded {len(df)} records with {len(df.columns)} columns.")
    
    # Ensure relevant columns are string for easier handling
    str_cols = ['publication/doi', 'publication/pmid', 'publication/pmcid', 'publication/title']
    for c in str_cols:
        if c in df.columns:
            df[c] = df[c].astype(str).replace('nan', '')
            
else:
    print(f"Error: File '{input_file}' not found in current directory.")
    df = pd.DataFrame() # Empty fallback

Loading 'v31_Dome-Recommendations-ID_Sync_Manual_Simple.tsv'...
Loaded 270 records with 51 columns.


In [None]:
# State and Helper Functions

current_index = 0

def resolve_links(col_name, value):
    """Generates HTML links for identifier columns."""
    if not value or str(value).lower() == 'nan' or str(value).strip() == '':
        return value
        
    val_str = str(value)
    
    if 'doi' in col_name.lower():
        # Clean DOI if needed
        return f'<a href="https://doi.org/{val_str}" target="_blank">{val_str}</a>'
    elif 'pmid' in col_name.lower() and 'pmcid' not in col_name.lower():
        return f'<a href="https://pubmed.ncbi.nlm.nih.gov/{val_str}/" target="_blank">{val_str}</a>'
    elif 'pmcid' in col_name.lower():
        # Handle PMC prefix if present or missing
        link_val = val_str if val_str.upper().startswith('PMC') else f"PMC{val_str}"
        return f'<a href="https://www.ncbi.nlm.nih.gov/pmc/articles/{link_val}/" target="_blank">{val_str}</a>'
    
    return val_str

def render_record(idx):
    """Renders a single record as an HTML table with grouping and filtering."""
    if df.empty or idx < 0 or idx >= len(df):
        return "No record selected."
    
    row = df.iloc[idx]
    
    # 1. Column Filtering
    all_cols = df.columns.tolist()
    
    # Fields to exclude: ends with /done, /skip OR specific list
    exclude_suffixes = ('/done', '/skip')
    exclude_exact = {'public', 'created', 'updated', 'publication/created', 'publication/updated', 'createdAt', 'updatedAt'}
    
    valid_cols = [c for c in all_cols if c not in exclude_exact and not c.endswith(exclude_suffixes)]
    
    # 2. Grouping
    id_targets = {'_id/$oid', 'uuid', 'shortid', 'oid', 'OID', 'ShortID', 'UUID'}
    
    pub_cols = []
    id_cols = []
    other_cols = []
    
    for c in valid_cols:
        if c in id_targets:
            id_cols.append(c)
        elif c.lower().startswith('publication/'):
            pub_cols.append(c)
        else:
            other_cols.append(c)
            
    # Sort IDs for consistency if they exist
    id_order_pref = ['_id/$oid', 'uuid', 'shortid']
    id_cols = sorted(id_cols, key=lambda x: id_order_pref.index(x) if x in id_order_pref else 99)
    
    # 3. Build HTML
    html = f"<div style='margin-bottom:10px;'><h3>Record Index: {idx} <span style='font-weight:normal; font-size:0.8em; color:#666;'>(Spreadsheet Line: {idx+2})</span></h3></div>"
    html += "<table style='width:100%; border-collapse: collapse; font-family: sans-serif; font-size: 14px;'>"
    
    def render_group(title, cols, bg_header, bg_row_light):
        if not cols: return ""
        section_html = f"<tr style='background-color:{bg_header}; border-bottom:1px solid #aaa;'><th colspan='2' style='text-align:left; padding:8px; font-size:1.1em;'>{title}</th></tr>"
        for col in cols:
            val = str(row[col])
            if val == 'nan': val = ''
            disp_val = resolve_links(col, val)
            
            section_html += f"<tr style='background-color:{bg_row_light};'><td style='padding:6px; border-bottom:1px solid #ddd; width:220px; font-weight:bold; color:#444;'>{col}</td>"
            section_html += f"<td style='padding:6px; border-bottom:1px solid #ddd;'>{disp_val}</td></tr>"
        return section_html

    # Render Sequence: Publication -> Identifiers -> Other
    html += render_group("Publication Details", pub_cols, "#bbdefb", "#e3f2fd") # Blue theme
    html += render_group("System Identifiers", id_cols, "#fff9c4", "#fffde7")   # Yellow theme
    html += render_group("Other Fields", other_cols, "#dadada", "#f5f5f5")      # Grey theme
    
    html += "</table>"
    return html

def find_by_title(search_term):
    """Finds index of first record mapping title."""
    if not search_term: return None
    
    # Identify title column
    title_col = None
    for c in df.columns:
        if 'publication/title' in c.lower():
            title_col = c
            break
    if not title_col: return None
    
    mask = df[title_col].astype(str).str.contains(search_term, case=False, regex=False)
    matches = df.index[mask].tolist()
    return matches[0] if matches else None

In [3]:
# UI Creation and Logic

# Widgets
w_prev = widgets.Button(description="< Prev", layout=widgets.Layout(width='80px'))
w_next = widgets.Button(description="Next >", layout=widgets.Layout(width='80px'))
w_rand = widgets.Button(description="Random", button_style='info', layout=widgets.Layout(width='80px'))

w_search = widgets.Text(placeholder="Search Title (Press Enter)", layout=widgets.Layout(width='300px'))
w_search_btn = widgets.Button(description="Go", icon="search", layout=widgets.Layout(width='50px'))
w_status = widgets.Label(value="")

w_output = widgets.Output(layout=widgets.Layout(border='1px solid #ccc', padding='10px', max_height='600px', overflow='auto'))

# Layout
header_row = widgets.HBox([w_prev, w_next, w_rand, widgets.HTML("&nbsp;&nbsp;|&nbsp;&nbsp;"), w_search, w_search_btn])
ui = widgets.VBox([header_row, w_status, w_output])

# Event Handlers
def update_display(b=None):
    global current_index
    # Bounds check
    if len(df) > 0:
        current_index = max(0, min(current_index, len(df)-1))
    
    w_status.value = f"Showing record {current_index + 1} of {len(df)}"
    
    with w_output:
        w_output.clear_output(wait=True)
        display(HTML(render_record(current_index)))

def on_next(b):
    global current_index
    current_index += 1
    update_display()

def on_prev(b):
    global current_index
    current_index -= 1
    update_display()

def on_rand(b):
    global current_index
    if len(df) > 0:
        current_index = random.randint(0, len(df)-1)
        update_display()
        
def on_search(b):
    global current_index
    term = w_search.value.strip()
    if term:
        idx = find_by_title(term)
        if idx is not None:
            current_index = idx
            w_status.value = f"Match found at index {idx}!"
            update_display()
        else:
            w_status.value = f"No match found for '{term}'"

# Bindings
w_prev.on_click(on_prev)
w_next.on_click(on_next)
w_rand.on_click(on_rand)
w_search.on_submit(on_search) # Handles 'Enter' key
w_search_btn.on_click(on_search)

# Initial Display
update_display()
display(ui)

VBox(children=(HBox(children=(Button(description='< Prev', layout=Layout(width='80px'), style=ButtonStyle()), â€¦