In [1]:
import pandas as pd
from pathlib import Path
from IPython.display import display, Markdown

# Directory containing the CSVs
csv_dir = Path('analysis')

# List of files you want to load
files = ['summary.csv', 'outliers.csv', 'label_accuracy.csv']

for fname in files:
    path = csv_dir / fname
    if path.exists():
        df = pd.read_csv(path)
        display(Markdown(f"### `{fname}`"))
        display(df)
    else:
        display(Markdown(f"**Warning:** `{fname}` not found in {csv_dir}"))

### `summary.csv`

Unnamed: 0,model,dataset,n_examples,accuracy,over_rate,under_rate,mean_error,MAE,MAPE (%),sMAPE (%)
0,GPT-4.1,FSC-147,6146.0,0.168565,0.137651,0.693785,-17.633583,20.102506,22.667659,30.256929
1,claude-3.5-haiku,FSC-147,6146.0,0.110316,0.216564,0.673121,-19.170029,23.412463,29.082914,38.899164
2,gemini-2.5-flash-preview-04-17,FSC-147,6146.0,0.145298,0.393264,0.461438,37.450212,51.66824,42.502475,32.201666
3,gemini-2.5-pro-preview-05-06,FSC-147,6146.0,0.177839,0.190205,0.631956,4.368044,27.926782,22.550368,25.840446
4,gemma-3-12b-it,FSC-147,6146.0,0.120078,0.34315,0.536772,-12.718842,21.614709,25.422048,30.754934
5,gemma-3-27b-it,FSC-147,6146.0,0.136349,0.19053,0.673121,-15.057273,27.18451,26.942381,33.258917
6,gemma-3-4b-it,FSC-147,6146.0,0.112919,0.373902,0.513179,24.904816,61.427758,53.609235,28.127849
7,grok-2-vision,FSC-147,6146.0,0.14481,0.173609,0.681582,-18.847055,22.561992,25.39284,32.366165
8,o4-mini,FSC-147,6146.0,0.15083,0.254312,0.594858,-7.561503,20.146274,23.460326,28.024115
9,GPT-4.1,GeckoNum,22256.0,0.699991,0.214549,0.08546,1.17411,1.512356,21.814915,39.394083


### `outliers.csv`

Unnamed: 0,idx,idx.1,truth,result,abs_err,model,dataset
0,780,780,3701,1000,2701,GPT-4.1,FSC-147
1,6003,6003,2560,1,2559,GPT-4.1,FSC-147
2,1176,1176,1912,0,1912,GPT-4.1,FSC-147
3,731,731,2092,200,1892,GPT-4.1,FSC-147
4,1487,1487,1672,0,1672,GPT-4.1,FSC-147
...,...,...,...,...,...,...,...
205,3767,3767,11,150,139,o4-mini,GeckoNum
206,4999,4999,11,150,139,o4-mini,GeckoNum
207,12574,12574,11,144,133,o4-mini,GeckoNum
208,12017,12017,0,120,120,o4-mini,GeckoNum


### `label_accuracy.csv`

Unnamed: 0,label,accuracy,count,rank,model,dataset
0,lighters,1.000000,1,top,GPT-4.1,FSC-147
1,screws,1.000000,1,top,GPT-4.1,FSC-147
2,birthday candles,0.666667,3,top,GPT-4.1,FSC-147
3,meat skewers,0.666667,3,top,GPT-4.1,FSC-147
4,oyster shells,0.666667,9,top,GPT-4.1,FSC-147
...,...,...,...,...,...,...
415,flutes,0.422492,329,worst,o4-mini,GeckoNum
416,paperclips,0.463889,360,worst,o4-mini,GeckoNum
417,pencils,0.507538,398,worst,o4-mini,GeckoNum
418,trees,0.514950,301,worst,o4-mini,GeckoNum


In [2]:
import pandas as pd
import glob
import os

def find_missing_and_negative(results_filename="FSC-147_results.csv", root_dir="valid_results", recursive=True):
    """
    Scans for all CSV files named `results_filename` under `root_dir` (optionally recursively),
    and reports missing idx values in the overall range and idxs with result == -1.
    """
    # Construct glob pattern
    pattern = os.path.join(root_dir, "**", results_filename) if recursive else os.path.join(root_dir, "*", results_filename)
    file_paths = glob.glob(pattern, recursive=recursive)
    
    if not file_paths:
        print(f"No files named '{results_filename}' found under {root_dir}")
        return
    
    # Load dataframes
    dfs = {os.path.relpath(fp, start=root_dir): pd.read_csv(fp) for fp in file_paths}
    
    # Determine overall idx range
    all_mins = [df['idx'].min() for df in dfs.values()]
    all_maxs = [df['idx'].max() for df in dfs.values()]
    min_idx, max_idx = min(all_mins), max(all_maxs)
    full_range = set(range(min_idx, max_idx + 1))
    
    # Analyze each file
    for rel_path, df in dfs.items():
        present_idx = set(df['idx'])
        missing_idx = sorted(full_range - present_idx)
        negative_idx = sorted(df.loc[df['result'] == -1, 'idx'])
        
        print(f"\nFile: {rel_path}")
        print(f"  Missing idx (from {min_idx} to {max_idx}): {missing_idx}")
        print(f"  idx with result == -1: {negative_idx}")

# Example usage:
find_missing_and_negative()




File: GPT-4.1/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: gemma-3-4b-it/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: o4-mini/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: gemma-3-12b-it/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: gemini-2.5-flash-preview-04-17/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: grok-2-vision/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: gemma-3-27b-it/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: claude-3.5-haiku/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []

File: gemini-2.5-pro-preview-05-06/FSC-147_results.csv
  Missing idx (from 0 to 6145): []
  idx with result == -1: []


In [3]:
import pandas as pd
import glob
import os

def find_inconsistent_idxs(results_filename="TallyQA_results.csv", root_dir="valid_results", recursive=True):
    """
    Scans for all CSV files named `results_filename` under `root_dir` (optionally recursively),
    and reports idx values that are inconsistently missing or -1 across files.
    
    For each idx in the global range:
    - It's 'good' if present with result != -1
    - It's 'bad' if missing or result == -1
    - It's flagged if it's 'good' in at least one file AND 'bad' in at least one file
    """
    # Construct glob pattern
    pattern = os.path.join(root_dir, "**", results_filename) if recursive else os.path.join(root_dir, "*", results_filename)
    file_paths = glob.glob(pattern, recursive=recursive)
    
    if not file_paths:
        print(f"No files named '{results_filename}' found under {root_dir}")
        return
    
    # Load dataframes
    dfs = {os.path.relpath(fp, start=root_dir): pd.read_csv(fp) for fp in file_paths}
    file_names = list(dfs.keys())
    n_files = len(file_names)
    
    # Determine overall idx range
    all_mins = [df['idx'].min() for df in dfs.values()]
    all_maxs = [df['idx'].max() for df in dfs.values()]
    min_idx, max_idx = min(all_mins), max(all_maxs)
    full_range = range(min_idx, max_idx + 1)
    
    # Build status map: idx -> list of statuses per file
    # status: True = good (present & result != -1), False = bad (missing or result == -1)
    status = {idx: [] for idx in full_range}
    for df in dfs.values():
        present = set(df['idx'])
        neg = set(df.loc[df['result'] == -1, 'idx'])
        for idx in full_range:
            if idx in present and idx not in neg:
                status[idx].append(True)
            else:
                status[idx].append(False)
    
    # Identify inconsistent idx: mix of True and False
    inconsistent = [idx for idx, vals in status.items() if any(vals) and not all(vals)]
    if not inconsistent:
        print("All idx values are either consistently present or consistently missing/negative across all files.")
        return
    
    print(f"Inconsistent idx values (present/good in some files, missing/neg in others): {inconsistent}\n")
    
    # Report for each file which inconsistent idx are bad in that file
    for fname, df in dfs.items():
        present = set(df['idx'])
        neg = set(df.loc[df['result'] == -1, 'idx'])
        bad_in_file = [idx for idx in inconsistent if (idx not in present) or (idx in neg)]
        print(f"File: {fname}")
        print(f"  idx that are bad in this file: {bad_in_file}\n")

# Example usage:
find_inconsistent_idxs()
# find_inconsistent_idxs(recursive=False)


All idx values are either consistently present or consistently missing/negative across all files.


In [4]:
def extract_rows_by_idx(idx_list, source_file, output_file=None, idx_col_name='idx'):
    """
    Reads `source_file` (CSV without an 'idx' column, using the default integer row index),
    filters rows whose DataFrame index is in `idx_list` (preserving list order),
    adds the index as a column, and writes them to `output_file`.

    Parameters:
    - idx_list: list of integer row indices to extract.
    - source_file: path to the CSV file to read.
    - output_file: path to the CSV file to write. 
                   Defaults to prefixing 'filtered_' to the source filename.
    - idx_col_name: name for the new index column (defaults to 'idx').
    """
    df = pd.read_csv(source_file)
    
    # Determine which indices exist
    present = set(df.index)
    valid_idxs = [i for i in idx_list if i in present]
    missing = [i for i in idx_list if i not in present]
    if missing:
        print(f"Warning: these indices were not found and will be skipped: {missing}")
    
    # Select rows in the order of idx_list
    filtered_df = df.loc[valid_idxs].copy()
    
    # Insert the index as a new column with the exact values from idx_list
    filtered_df.insert(0, idx_col_name, valid_idxs)
    
    # Determine output file path
    if output_file is None:
        dir_name, base_name = os.path.split(source_file)
        output_file = os.path.join(dir_name, f"filtered_{base_name}")
    
    # Write filtered DataFrame to CSV without the pandas index
    filtered_df.to_csv(output_file, index=False)
    
    print(f"Wrote {len(filtered_df)} rows for indices {valid_idxs} to {output_file}")

idxs = [17608, 22000]
extract_rows_by_idx(idxs, source_file="/cluster/project/sachan/pmlr/grounding-vlms/eval/datasets/GeckoNum/dataset.csv", output_file="/cluster/project/sachan/pmlr/grounding-vlms/eval/datasets/Missing_next/dataset.csv")



OSError: Cannot save file into a non-existent directory: '/cluster/project/sachan/pmlr/grounding-vlms/eval/datasets/Missing_next'