# Cell 1: Setup and Configuration

In [1]:
# Cell 1: Setup and Configuration
import pandas as pd
import numpy as np
import subprocess
import os
import shlex
import dash
from dash import html, Input, Output, State, callback, dcc
import dash_bootstrap_components as dbc
from jupyter_dash import JupyterDash

# --- Configuration ---
# <<< Path to main CSV file containing image, mask, seg paths and identifiers (sub, ses, run, etc.) >>>

main_csv_path = "/Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/report/bids_csv.csv" # Or IQA.csv if it has IDs

# <<< Paths to the rating CSV files columns (like 'sub', 'ses', 'run') and a 'rating' columnv >>>
rating_paths = [
    "/Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_meri.csv",
    "/Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_jaime.csv",
    "/Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_bene.csv"
]
# Assign names to the raters for the new columns
rater_names = ['meri', 'jaime', 'bene']

# <<< Columns to use for merging ratings with the main CSV >>>
merge_cols = ['sub', 'ses', 'run'] 

# Define rating thresholds for the AVERAGE rating
threshold_excluded = 1.0
threshold_bad_upper = 2.0
threshold_great = 3.0 # up to

# Number of samples per category
n_samples = 4

# ITK-SNAP command path (adjust if needed)
itksnap_command = "/Applications/ITK-SNAP.app/Contents/MacOS/ITK-SNAP"
# --- End Configuration ---

# --- Helper function for rating text ---
def rating_text(rating):
    """Convert the quality rating to a human readable category."""
    if pd.isna(rating):
        return "NaN"
    rating = float(rating)
    if rating < 0:
        return "invalid (<0)"
    elif rating < threshold_excluded: # < 1.0
        return "Excluded"
    elif rating < threshold_bad_upper: # < 2.0
        return "Bad"
    elif rating < threshold_great: # < 3.0
        return "Acceptable"
    elif rating <= 4.0:
        return "Great"
    else:
         return "invalid (>4)"

# Cell 2: Load Data and Basic Checks

In [2]:
# Cell 2: Load Data, Merge Ratings, and Calculate Average

df_merged = None # Initialize df_merged
all_dfs_loaded = True

try:
    # 1. Read the main CSV file
    print(f"Loading main CSV: {main_csv_path}")
    df_main = pd.read_csv(main_csv_path)

    df_merged = df_main.copy() 

    # 2. Read and merge each rating CSV
    for i, r_path in enumerate(rating_paths):
        rater = rater_names[i]
        print(f"Loading rating CSV for {rater}: {r_path}")
        df_rating = pd.read_csv(r_path)

        # Check for required columns in rating file
        if 'rating' not in df_rating.columns:
             print(f"Warning: 'rating' column not found in {r_path}. Skipping this rater.")
             continue
        if not all(col in df_rating.columns for col in merge_cols):
             print(f"Warning: Not all merge columns ({merge_cols}) found in {r_path}. Skipping this rater.")
             continue

        # Select only merge columns and the rating column, rename rating column
        rating_col_name = f'rating_{rater}'
        df_rating_subset = df_rating[merge_cols + ['rating']].rename(columns={'rating': rating_col_name})

        # Merge ratings into the main dataframe
        df_merged = pd.merge(df_merged, df_rating_subset, on=merge_cols, how='left')
        print(f"Merged ratings for {rater}.")

    # 3. Calculate Average Rating
    rating_cols_to_average = [f'rating_{r}' for r in rater_names if f'rating_{r}' in df_merged.columns]
    if not rating_cols_to_average:
        raise ValueError("No rating columns were successfully merged to calculate an average.")

    # Ensure rating columns are numeric (important for .mean())
    for col in rating_cols_to_average:
        df_merged[col] = pd.to_numeric(df_merged[col], errors='coerce')

    # Calculate the mean, skipping NaNs
    df_merged['rating'] = df_merged[rating_cols_to_average].mean(axis=1, skipna=True)
    df_merged['rating_text'] = df_merged['rating'].apply(rating_text) # Add average rating text
    print("Calculated average rating and rating_text columns.")

    # 4. Display results
    print("\nFirst 5 rows of the merged DataFrame with average rating:")
    display(df_merged.head(20))
    print("\nMerged DataFrame Info:")
    df_merged.info()
    print("\nCheck for missing average ratings:")
    print(df_merged['rating'].isnull().sum())


except FileNotFoundError as e:
    print(f"Error: CSV file not found - {e}")
    all_dfs_loaded = False
    df_merged = None
except ValueError as ve:
    print(f"Error: {ve}")
    all_dfs_loaded = False
    df_merged = None
except KeyError as ke:
    print(f"Error: Column not found during merge - {ke}. Check 'merge_cols' and CSV headers.")
    all_dfs_loaded = False
    df_merged = None
except Exception as e:
    print(f"An unexpected error occurred during loading/merging: {e}")
    all_dfs_loaded = False
    df_merged = None

Loading main CSV: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/report/bids_csv.csv
Loading rating CSV for meri: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_meri.csv
Merged ratings for meri.
Loading rating CSV for jaime: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_jaime.csv
Merged ratings for jaime.
Loading rating CSV for bene: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/rating/ratings_bene.csv
Merged ratings for bene.
Calculated average rating and rating_text columns.

First 5 rows of the merged DataFrame with average rating:


Unnamed: 0,name,sub,ses,run,im,mask,rating_meri,rating_jaime,rating_bene,rating,rating_text
0,sub-PLWLZ,1,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.375,2.2,2.425,2.333333,Acceptable
1,sub-YLYGX,2,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.3,1.6,1.25,1.716667,Bad
2,sub-003_T1w,3,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,,,,,
3,sub-KXKXY,4,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.525,1.5,1.2,1.741667,Bad
4,sub-FENBQ,5,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.125,3.075,2.875,2.691667,Acceptable
5,sub-IHVWW,6,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,1.95,1.875,3.175,2.333333,Acceptable
6,sub-007_T1w,7,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,,,,,
7,sub-UIEJI,8,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,1.625,2.6,1.925,2.05,Acceptable
8,sub-LYHPZ,9,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.375,2.95,3.675,3.0,Great
9,sub-DEYFZ,10,,,/Users/cyriltelley/Desktop/MSE/Second_semester...,/Users/cyriltelley/Desktop/MSE/Second_semester...,2.275,1.65,2.025,1.983333,Bad



Merged DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83 entries, 0 to 82
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          83 non-null     object 
 1   sub           83 non-null     int64  
 2   ses           0 non-null      float64
 3   run           0 non-null      float64
 4   im            83 non-null     object 
 5   mask          83 non-null     object 
 6   rating_meri   68 non-null     float64
 7   rating_jaime  68 non-null     float64
 8   rating_bene   68 non-null     float64
 9   rating        68 non-null     float64
 10  rating_text   83 non-null     object 
dtypes: float64(6), int64(1), object(4)
memory usage: 7.3+ KB

Check for missing average ratings:
15


# Cell 3: Filter and Sample Data

In [3]:

final_samples = pd.DataFrame() # Initialize empty

if df_merged is not None and all_dfs_loaded:
    try:
        print("\nFiltering based on calculated average 'rating' column...")

        # Filter data for each category using the average rating, dropping rows where avg rating is NaN
        df_excluded = df_merged[df_merged['rating'] < threshold_excluded].dropna(subset=['rating'])
        df_bad = df_merged[(df_merged['rating'] >= threshold_excluded) & (df_merged['rating'] < threshold_bad_upper)].dropna(subset=['rating'])
        # Group Acceptable and Great together for 'Great' category
        df_great = df_merged[df_merged['rating'] >= threshold_great].dropna(subset=['rating']) # Original Great
        df_acceptable = df_merged[(df_merged['rating'] >= threshold_bad_upper) & (df_merged['rating'] < threshold_great)].dropna(subset=['rating']) # Acceptable
        df_great_combined = pd.concat([df_acceptable, df_great]) # Combine Acceptable and Great

        # Sample from each category
        samples_excluded = df_excluded.sample(n=min(n_samples, len(df_excluded)), random_state=42)
        samples_bad = df_bad.sample(n=min(n_samples, len(df_bad)), random_state=42)
        #samples_great = df_great_combined.sample(n=min(n_samples, len(df_great_combined)), random_state=42)
        samples_great = df_great.sample(n=min(n_samples, len(df_great)), random_state=42) # Sample only from Great

        # Combine the samples
        final_samples = pd.concat([samples_excluded, samples_bad, samples_great])
        final_samples = final_samples.reset_index(drop=True) # Reset index for easier access later

        print(f"\nSelected {len(samples_excluded)} 'Excluded' samples.")
        print(f"Selected {len(samples_bad)} 'Bad' samples.")
        print(f"Selected {len(samples_great)} 'Great' (Rating >= {threshold_bad_upper}) samples.") # Updated threshold description

    except Exception as e:
        print(f"An error occurred during filtering or sampling: {e}")
        final_samples = pd.DataFrame() # Ensure it's empty on error
else:
    print("Merged DataFrame ('df_merged') not available. Cannot proceed with sampling.")


Filtering based on calculated average 'rating' column...

Selected 2 'Excluded' samples.
Selected 4 'Bad' samples.
Selected 4 'Great' (Rating >= 2.0) samples.


# Cell 4: Display Results Clearly


In [4]:

if not final_samples.empty:
    print("\n--- Selected Segmentation Files for Review (Based on Average Rating) ---\n")

    # Function to display samples for a given category
    def display_category_info(category_name, rating_range, category_df):
        print(f"\n--- {category_name} {rating_range} (up to {n_samples}) ---")
        if category_df.empty:
            print("No samples found for this category.")
            return
        # Iterate using index from the reset_index in Cell 3
        for index in category_df.index:
            row = category_df.loc[index]
            # Use 'mask' column for segmentation path
            mask_path = row.get('mask', 'N/A') # Use .get for safety
            if pd.isna(mask_path):
                mask_path = 'N/A'
            # Display other relevant info if needed, e.g., individual ratings
            rating_cols = [f'rating_{r}' for r in rater_names if f'rating_{r}' in row]
            individual_ratings = ", ".join([f"{r.split('_')[1]}: {row[r]:.1f}" for r in rating_cols if pd.notna(row[r])])

            print(f"  Sample Index: {index}")
            print(f"  Avg Rating: {row['rating']:.2f} ({row['rating_text']})")
            print(f"  Individual Ratings: [{individual_ratings}]")
            print(f"  Segmentation File: {mask_path}")
            print("-" * 10) # Separator

    # Display each category based on the final_samples DataFrame structure
    display_category_info("Excluded", f"(Rating < {threshold_excluded})", samples_excluded)
    display_category_info("Bad", f"({threshold_excluded} <= Rating < {threshold_bad_upper})", samples_bad)
    display_category_info("Great", f"(Rating >= {threshold_bad_upper})", samples_great) # Using combined threshold

elif df_merged is not None:
     print("\nNo samples were selected. Check rating thresholds or calculated average ratings.")
else:
     print("\nCannot display results as DataFrame was not loaded or merged.")


--- Selected Segmentation Files for Review (Based on Average Rating) ---


--- Excluded (Rating < 1.0) (up to 4) ---
  Sample Index: 70
  Avg Rating: 0.93 (Excluded)
  Individual Ratings: [meri: 1.8, jaime: 0.7, bene: 0.4]
  Segmentation File: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/samples_v3_bids/derivatives/masks/sub-071_mask.nii.gz
----------
  Sample Index: 66
  Avg Rating: 0.80 (Excluded)
  Individual Ratings: [meri: 1.4, jaime: 0.6, bene: 0.4]
  Segmentation File: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/samples_v3_bids/derivatives/masks/sub-067_mask.nii.gz
----------

--- Bad (1.0 <= Rating < 2.0) (up to 4) ---
  Sample Index: 29
  Avg Rating: 1.96 (Bad)
  Individual Ratings: [meri: 1.8, jaime: 2.0, bene: 2.0]
  Segmentation File: /Users/cyriltelley/Desktop/MSE/Second_semester/PA-MReye/Codes/mreyeqc/data/samples_v3_bids/derivatives/masks/sub-030_mask.nii.gz
----------
  Sample Index: 61
  Avg Rating: 1.11 (Ba

# Cell 5: Launch ITK-SNAP with a selected segmentation file

In [None]:

# --- Configuration ---
itksnap_command = "/Applications/ITK-SNAP.app/Contents/MacOS/ITK-SNAP"
# --- End Configuration ---

# --- Check if final_samples DataFrame exists ---
if 'final_samples' not in globals() or not isinstance(final_samples, pd.DataFrame):
    print("CRITICAL ERROR: 'final_samples' DataFrame not found.")
    print("Please ensure Cell 3 has been run successfully before this cell.")

else:
    # --- Dash App Definition ---
    app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

    # App Layout
    app.layout = dbc.Container([
        html.H4("Interactive Sample Review", className="mt-3 mb-3"),
        dbc.Alert(id='status-message-cell5', color="info", className="mt-2", is_open=False), # Unique ID for status
        html.Div(id='interactive-sample-list-container-cell5'), # Unique ID for list container
    ], fluid=True)

    # --- Generate Sample List Layout ---
    interactive_sample_rows = []
    if not final_samples.empty:
        # Using index from final_samples (which was reset in Cell 3)
        for index in final_samples.index:
            row = final_samples.loc[index]
            rating_val = row.get('rating', float('nan'))
            rating_text_val = row.get('rating_text', 'N/A')

            # Display individual ratings if available
            try:
                rating_cols = [f'rating_{r}' for r in rater_names if f'rating_{r}' in row]
                individual_ratings_str = ", ".join([f"{r.split('_')[1]}: {row[r]:.1f}" for r in rating_cols if pd.notna(row[r])])
            except NameError: 
                individual_ratings_str = "N/A"


            interactive_sample_rows.append(
                dbc.Card(dbc.CardBody([
                    dbc.Row([
                        dbc.Col(f"Index: {index}", width=1, className="fw-bold"),
                        dbc.Col(f"Avg Rating: {rating_val:.2f} ({rating_text_val})", width=3),
                        dbc.Col(f"Individual: [{individual_ratings_str}]", width=4, style={'fontSize': 'small'}),
                        dbc.Col(
                            dbc.Button(
                                "Open in ITK-SNAP",
                                id={'type': 'interactive-snap-button-cell5', 'index': index}, # Unique type ID
                                n_clicks=0,
                                size="sm",
                                className="w-100"
                            ),
                            width=4
                        )
                    ], className="mb-1 align-items-center"),
                     dbc.Row([
                        dbc.Col(html.Code(f"IMG: {os.path.basename(row.get('im', 'N/A'))}"), width=6),
                    
                        dbc.Col(html.Code(f"SEG: {os.path.basename(row.get('mask', 'N/A'))}"), width=6),
                     ], className="mt-1", style={'fontSize': 'x-small'})
                ]), className="mb-2")
            )
        # Assign children
        app.layout['interactive-sample-list-container-cell5'].children = interactive_sample_rows
    else:
         app.layout['interactive-sample-list-container-cell5'].children = html.P("No samples were selected in Cell 3 to display.")


    # --- Callback to Launch ITK-SNAP ---
    @app.callback(
        Output('status-message-cell5', 'children'),
        Output('status-message-cell5', 'is_open'),
        Input({'type': 'interactive-snap-button-cell5', 'index': dash.ALL}, 'n_clicks'),
        prevent_initial_call=True
    )
    def launch_itksnap_interactive_cell5(n_clicks):
        # Determine which button was clicked
        ctx = dash.callback_context
        if not ctx.triggered_id:
            return dash.no_update, False # No update if no trigger

        # Assume final_samples exists globally in the notebook scope for the callback
        if 'final_samples' not in globals() or not isinstance(final_samples, pd.DataFrame):
             return "Error: Sample data (final_samples) not found in global scope.", True

        button_id = ctx.triggered_id
        if not isinstance(button_id, dict) or 'index' not in button_id:
            return f"Error: Invalid trigger ID: {button_id}", True

        sample_index = button_id['index']

        if sample_index not in final_samples.index:
            return f"Error: Index {sample_index} not found in current samples.", True

        selected_row = final_samples.loc[sample_index]
        image_file_path = selected_row.get('im', None)
        mask_file_path = selected_row.get('mask', None) # Use 'seg' column

        # Check file existence
        img_exists = image_file_path and os.path.exists(image_file_path)
        seg_exists = mask_file_path and os.path.exists(mask_file_path)

        if img_exists and seg_exists:
            try:
                # Construct command using -g and -s, quote paths
                command = [
                    itksnap_command,
                    "-g", shlex.quote(image_file_path),
                    "-s", shlex.quote(mask_file_path)
                ]
                subprocess.Popen(command)
                status = f"Launched ITK-SNAP for sample index {sample_index}."
                return status, True # Return status and make alert visible
            except FileNotFoundError:
                status = f"Error: ITK-SNAP command ('{itksnap_command}') not found. Check path."
                return status, True
            except Exception as e:
                status = f"Error launching ITK-SNAP for index {sample_index}: {e}"
                return status, True
        else:
            # Provide more specific feedback on which file is missing
            missing = []
            if not img_exists: missing.append(f"Image ('{image_file_path}')")
            if not seg_exists: missing.append(f"mask ('{mask_file_path}')")
            status = f"Error: Cannot launch for index {sample_index}. File(s) not found: {', '.join(missing)}."
            return status, True


    # --- Run the App Inline ---
    print("\nStarting Dash app for interactive viewing...")
    app.run(port=8052, debug=False) 


Starting Dash app for interactive viewing...



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



Launching ITK-SNAP


qt.qpa.fonts: Populating font family aliases took 124 ms. Replace uses of missing font family ".AppleSystemUIFont" with one that exists to avoid this cost. 
qt.pointer.dispatch: delivering touch release to same window QWindow(0x0) not QWidgetWindow(0x11cb157b0, name="MainImageWindowWindow")
qt.pointer.dispatch: skipping QEventPoint(id=1 ts=0 pos=0,0 scn=589.558,357.55 gbl=589.558,357.55 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-589.558,-357.55 last=-589.558,-357.55 Δ 589.558,357.55) : no target window
qt.pointer.dispatch: delivering touch release to same window QWindow(0x0) not QWidgetWindow(0x11cb157b0, name="MainImageWindowWindow")
qt.pointer.dispatch: skipping QEventPoint(id=1 ts=0 pos=0,0 scn=363.824,191.554 gbl=363.824,191.554 Released ellipse=(1x1 ∡ 0) vel=0,0 press=-363.824,-191.554 last=-363.824,-191.554 Δ 363.824,191.554) : no target window
qt.pointer.dispatch: delivering touch release to same window QWindow(0x0) not QWidgetWindow(0x11cb157b0, name="MainImageWindowWindow")
qt.