# Perception Accuracy Scores Calculator

This notebook calculates perception accuracy scores for all challenges and modalities, providing detailed reports and a summary table.


In [16]:
import sys
from pathlib import Path

# Add parent directory to path so we can import from src
notebook_dir = Path().resolve()
parent_dir = notebook_dir.parent
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

import pandas as pd
from IPython.display import display, Markdown
from src.utils.score_utils import *

# Import challenge functions
# from calculate_scores_13e47133 import challenge_13e47133
# from calculate_scores_135a2760 import challenge_135a2760
# from calculate_scores_136b0064 import challenge_136b0064
# from calculate_scores_142ca369 import challenge_142ca369
# from calculate_scores_0934a4d8 import challenge_0934a4d8


## Helper Functions

Functions to calculate scores programmatically for each challenge.


In [17]:
def calculate_challenge_13e47133_scores():
    """Calculate scores for challenge 13e47133 and return scores dict"""
    gt_dots = {
        'A1': 'orange', 'B2': 'teal', 'T1': 'teal', 'S2': 'yellow', 'P11': 'green'
    }
    gt_divider = parse_range('I1', 'I20') | parse_range('I10', 'Q10') | parse_range('I16', 'Q16') | parse_range('Q11', 'Q15')
    points_per_dot, points_divider, points_background = 15, 20, 5
    
    modalities_data = {
        'Row-only': {'dots': {'A1': 'orange', 'B2': 'teal', 'T1': 'teal', 'S2': 'yellow', 'Q11': 'green'}, 'divider': 'J1-J20; J10-R10; J16-R16; R11-R15', 'background': True},
        'Column-only': {'dots': {'A1': 'orange', 'B1': 'teal', 'T1': 'teal', 'S1': 'yellow', 'P11': 'green'}, 'divider': 'I1-I20; I10-Q10; I16-Q16; Q10-Q16', 'background': True},
        'ASCII': {'dots': {'A1': 'orange', 'B2': 'teal', 'T1': 'teal', 'S2': 'yellow', 'P11': 'green'}, 'divider': 'I1-I20; J9-J17; P9-P17; Q11-Q15', 'background': True},
        'JSON': {'dots': {'A1': 'orange', 'B2': 'teal', 'T1': 'teal', 'S2': 'yellow', 'P11': 'green'}, 'divider': 'I1-I20; I10-Q10; I16-Q16; Q11-Q15', 'background': True},
        'Image 14×14': {'dots': {'P11': 'green', 'B1': 'teal', 'C1': 'teal', 'C2': 'teal', 'S1': 'yellow', 'T1': 'yellow', 'S2': 'yellow'}, 'divider': 'I1-I20; I10-R10; R10-R16; R16-I16; I16-I10', 'background': True},
        'Image 15×15': {'dots': {'A1': 'orange', 'B1': 'teal', 'P11': 'green', 'T1': 'yellow'}, 'divider': 'I2-I9; I10-Q10; I16-Q16; I10-I16; Q10-Q16', 'background': True},
        'Image 16×16': {'dots': {'A1': 'orange', 'B1': 'teal', 'T1': 'teal', 'S1': 'yellow', 'Q11': 'green'}, 'divider': 'J2-J9; J10-Q10; I10-I15; Q12-Q15; I16-Q16; I17-I19', 'background': True},
        'Image 17×17': {'dots': {'A1': 'orange', 'B2': 'teal', 'R2': 'yellow', 'P11': 'green', 'P12': 'green'}, 'divider': 'I1-I16; J10-Q10; J16-Q16; Q11-Q15', 'background': True},
        '24×24-1148': {'dots': {'A1': 'orange', 'T1': 'teal', 'A2': 'teal', 'B2': 'teal', 'S2': 'yellow', 'T2': 'yellow', 'S3': 'yellow', 'T3': 'yellow', 'P11': 'green', 'Q11': 'green', 'P12': 'green', 'Q12': 'green'}, 'divider': 'I1-I20; I10-Q10; I16-Q16; I10-I16; Q10-Q16', 'background': True},
        '24×24-1205': {'dots': {'A1': 'orange', 'B2': 'teal', 'S2': 'yellow', 'P11': 'green'}, 'divider': 'I1-I20; J10-Q10; J16-Q16; Q11-Q15', 'background': True},
        'Image 768×768': {'dots': {'A1': 'orange', 'B2': 'teal', 'P11': 'green', 'S1': 'yellow', 'T1': 'yellow'}, 'divider': 'I1-I20; J10-Q10; Q11-Q16; J16-P16', 'background': True}
    }
    
    scores = {}
    for modality_name, claims in modalities_data.items():
        total_penalty = 0.0
        for dot_coord, dot_color in gt_dots.items():
            gt_set = {dot_coord}
            claimed_set = {c for c, col in claims['dots'].items() if col == dot_color and c == dot_coord}
            _, penalty = format_feature_score_md(f"Dot {dot_coord}", points_per_dot, gt_set, claimed_set)
            total_penalty += penalty
        claimed_divider = parse_divider_claims(claims['divider'])
        _, penalty = format_feature_score_md("Divider", points_divider, gt_divider, claimed_divider)
        total_penalty += penalty
        gt_background = {'background'}
        claimed_background = {'background'} if claims['background'] else set()
        _, penalty = format_feature_score_md("Background", points_background, gt_background, claimed_background, is_binary=True)
        total_penalty += penalty
        scores[modality_name] = 100 - total_penalty
    return scores


In [18]:
def calculate_challenge_135a2760_scores():
    """Calculate scores for challenge 135a2760 and return scores dict"""
    gt_green_frame = parse_range('A1', 'M1') | parse_range('A5', 'M5') | parse_range('A1', 'A5') | parse_range('M1', 'M5')
    gt_red_frame = parse_range('B2', 'L2') | parse_range('B4', 'L4') | parse_range('B2', 'B4') | parse_range('L2', 'L4')
    gt_blue_dots = {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}
    points_green_frame, points_red_frame, points_per_blue_dot = 10, 10, 20
    
    modalities_data = {
        'Row-only': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B2-B4; L2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'I3': 'blue', 'K3': 'blue'}},
        'Column-only': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B2-B4; L2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        'ASCII': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B2-B4; L2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        'JSON': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B2-B4; L2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        'Image 14×14': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B3; L3', 'blue_dots': {'C3': 'blue', 'D3': 'blue', 'E3': 'blue', 'G3': 'blue', 'H3': 'blue', 'K3': 'blue'}},
        'Image 15×15': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L4', 'blue_dots': {'C3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        'Image 16×16': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'I3': 'blue', 'K3': 'blue'}},
        'Image 17×17': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'I3': 'blue', 'K3': 'blue'}},
        '24×24-1148': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B3; L3', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        '24×24-1205': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L4', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}},
        'Image 768×768': {'green_frame': 'A1-M1; A5-M5; A1-A5; M1-M5', 'red_frame': 'B2-L2; B4-L4; B3; L3', 'blue_dots': {'C3': 'blue', 'E3': 'blue', 'G3': 'blue', 'K3': 'blue'}}
    }
    
    scores = {}
    for modality_name, claims in modalities_data.items():
        total_penalty = 0.0
        claimed_green = parse_divider_claims(claims['green_frame'])
        _, penalty = format_feature_score_md("Green frame", points_green_frame, gt_green_frame, claimed_green)
        total_penalty += penalty
        claimed_red = parse_divider_claims(claims['red_frame'])
        if 'B2-L4' in claims['red_frame'] and ';' not in claims['red_frame']:
            claimed_red = parse_range('B2', 'L4')
        _, penalty = format_feature_score_md("Red frame", points_red_frame, gt_red_frame, claimed_red)
        total_penalty += penalty
        for dot_coord in gt_blue_dots:
            gt_set = {dot_coord}
            claimed_set = {c for c in claims['blue_dots'] if c == dot_coord}
            _, penalty = format_feature_score_md(f"Blue dot {dot_coord}", points_per_blue_dot, gt_set, claimed_set)
            total_penalty += penalty
        scores[modality_name] = 100 - total_penalty
    return scores


In [19]:
def calculate_challenge_136b0064_scores():
    """Calculate scores for challenge 136b0064 and return scores dict"""
    gt_shapes = {
        'red_u_row1': {'A1', 'C1', 'A2', 'C2', 'A3', 'B3', 'C3'},
        'pink_y_row1': {'E1', 'G1', 'F2', 'F3'},
        'blue_q_row2': {'A5', 'B5', 'A6', 'C6', 'B7'},
        'green_m_row2': {'E5', 'F5', 'G5', 'F6', 'E7', 'G7'},
        'blue_q_row3': {'A9', 'B9', 'A10', 'C10', 'B11'},
        'pink_y_row3': {'E9', 'G9', 'F10', 'F11'},
        'pink_y_row4': {'A13', 'C13', 'B14', 'B15'},
        'blue_q_row4': {'E13', 'F13', 'E14', 'G14', 'F15'}
    }
    gt_divider = parse_range('H1', 'H15')
    gt_grey_dot = {'J1'}
    points_per_shape, points_divider, points_grey_dot = 10, 10, 10
    
    # Using the same data structure as the original script
    modalities_data = {
        'Row-only': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'Column-only': {'shapes': {'red_u_row1': {'A1', 'A2', 'A3', 'B3', 'C1', 'C2', 'C3'}, **{k: v for k, v in gt_shapes.items() if k != 'red_u_row1'}}, 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'ASCII': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'JSON': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'Image 14×14': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1', 'K1'}},
        'Image 15×15': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'Image 16×16': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'Image 17×17': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        '24×24-1148': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1', 'J2'}},
        '24×24-1205': {'shapes': {'red_u_row1': {'A1', 'A2', 'A3', 'A4', 'A5', 'A6', 'B5', 'B6', 'C5', 'C6', 'C1', 'C2', 'C3', 'C4'}, 'pink_y_row1': {'E1', 'G1', 'E2', 'E3', 'F1', 'G3', 'F2', 'F3'}, 'blue_q_row2': {'A6', 'B6', 'C6', 'B7', 'A7'}, 'blue_q_row3': {'A10', 'B10', 'C10', 'B11', 'A11'}, 'green_m_row2': {'E5', 'F5', 'G5', 'F6', 'F7', 'E7', 'G7', 'E6'}, 'pink_y_row3': {'F10', 'E9', 'G9', 'E11', 'G11', 'F11'}, 'pink_y_row4': {'A13', 'C13', 'B13', 'B14', 'B15'}, 'blue_q_row4': {'E13', 'F13', 'E14', 'G14', 'F15'}}, 'divider': 'H1-H15', 'grey_dot': {'J1'}},
        'Image 768×768': {'shapes': gt_shapes.copy(), 'divider': 'H1-H15', 'grey_dot': {'J1'}}
    }
    
    scores = {}
    for modality_name, claims in modalities_data.items():
        total_penalty = 0.0
        for shape_key, gt_set in gt_shapes.items():
            claimed_set = claims['shapes'].get(shape_key, set())
            _, penalty = format_feature_score_md(f"Shape {shape_key}", points_per_shape, gt_set, claimed_set)
            total_penalty += penalty
        claimed_divider = parse_divider_claims(claims['divider'])
        _, penalty = format_feature_score_md("Yellow divider", points_divider, gt_divider, claimed_divider)
        total_penalty += penalty
        claimed_grey = claims.get('grey_dot', set())
        _, penalty = format_feature_score_md("Grey dot", points_grey_dot, gt_grey_dot, claimed_grey)
        total_penalty += penalty
        scores[modality_name] = 100 - total_penalty
    return scores


In [20]:
def calculate_challenge_142ca369_scores():
    """Calculate scores for challenge 142ca369 and return scores dict"""
    gt_lshapes = {'green': {'A9', 'B9', 'B8'}, 'blue': {'C7', 'D7', 'D6'}, 'red': {'E5', 'F5', 'F4'}, 'grey': {'G3', 'H3', 'H2'}}
    gt_dots = {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}
    points_per_lshape, points_per_dot = 12.5, 12.5
    
    modalities_data = {
        'Row-only': {'lshapes': {'grey': {'H2', 'G3', 'H3'}, 'red': {'F4', 'E5', 'F5'}, 'blue': {'D6', 'C7', 'D7'}, 'green': {'B8', 'A9', 'B9'}}, 'dots': {'K11': 'red', 'K15': 'blue', 'K19': 'green', 'L7': 'grey'}},
        'Column-only': {'lshapes': {'grey': {'H2', 'G3', 'H3'}, 'red': {'F4', 'E5', 'F5'}, 'blue': {'D6', 'C7', 'D7'}, 'green': {'B8', 'A9', 'B9'}}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}},
        'ASCII': {'lshapes': {'red': {'F4', 'E5', 'F5'}, 'blue': {'D6', 'C7', 'D7'}, 'green': {'B8', 'A9', 'B9'}, 'grey': {'G2', 'G3', 'H3'}}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}},
        'JSON': {'lshapes': {'grey': {'H2', 'G3', 'H3'}, 'red': {'F4', 'E5', 'F5'}, 'blue': {'D6', 'C7', 'D7'}, 'green': {'B8', 'A9', 'B9'}}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}},
        'Image 14×14': {'lshapes': {}, 'dots': {'K5': 'grey', 'K11': 'red', 'K15': 'blue'}},
        'Image 15×15': {'lshapes': {}, 'dots': {'J9': 'grey'}},
        'Image 16×16': {'lshapes': {}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue'}},
        'Image 17×17': {'lshapes': {'grey': {'H2', 'G3', 'H3'}, 'red': {'F4', 'E5', 'F5'}, 'blue': {'D6', 'C7', 'D7'}, 'green': {'B8', 'A9', 'B9'}}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}},
        '24×24-1148': {'lshapes': {}, 'dots': {'M7': 'grey', 'M11': 'red', 'M15': 'blue', 'M19': 'green'}},
        '24×24-1205': {'lshapes': {}, 'dots': {'M9': 'grey', 'M13': 'red', 'M17': 'blue', 'M21': 'green'}},
        'Image 768×768': {'lshapes': {'green': {'A5', 'B5', 'B6', 'B7'}, 'blue': {'C4', 'D4', 'D5', 'D6'}, 'red': {'E3', 'F3', 'F4', 'F5'}, 'grey': {'G2', 'H2', 'H3', 'H4'}}, 'dots': {'K7': 'grey', 'K11': 'red', 'K15': 'blue', 'K19': 'green'}}
    }
    
    scores = {}
    for modality_name, claims in modalities_data.items():
        total_penalty = 0.0
        for color, gt_set in gt_lshapes.items():
            claimed_set = claims['lshapes'].get(color, set())
            _, penalty = format_feature_score_md(f"{color.capitalize()} L-shape", points_per_lshape, gt_set, claimed_set)
            total_penalty += penalty
        for dot_coord, dot_color in gt_dots.items():
            gt_set = {dot_coord}
            claimed_set = {c for c, col in claims['dots'].items() if col == dot_color and c == dot_coord}
            _, penalty = format_feature_score_md(f"Dot {dot_coord}", points_per_dot, gt_set, claimed_set)
            total_penalty += penalty
        scores[modality_name] = 100 - total_penalty
    return scores


In [21]:
def calculate_challenge_0934a4d8_scores():
    """Calculate scores for challenge 0934a4d8 and return scores dict"""
    gt_block = parse_range('Z15', 'AC23')
    block_base_score, block_penalty_multiplier = 50, 100
    
    modalities_data = {
        'Row-only': {'block': 'R15-Z26 to R23-AC29', 'dots_enum': True, 'symmetry': 'bilateral'},
        'Column-only': {'block': 'Z17:AC24', 'dots_enum': True, 'symmetry': 'vertical'},
        'ASCII': {'block': 'AA19:AD19; AA20:AD20; Z21:AD21; Z22:AD22; AA23:AD23', 'dots_enum': True, 'symmetry': 'horizontal_local'},
        'JSON': {'block': 'Z15:AD23', 'dots_enum': True, 'symmetry': 'horizontal'},
        'Image 14×14': {'block': 'Y15:AD24', 'dots_enum': True, 'symmetry': None},
        'Image 15×15': {'block': 'Z15:AC23', 'dots_enum': True, 'symmetry': 'partial'},
        'Image 16×16': {'block': 'Z15:AD18; Z21:AD24; Z27:AD30', 'dots_enum': True, 'symmetry': None},
        'Image 17×17': {'block': 'U15:Z21', 'dots_enum': True, 'symmetry': None},
        '24×24-1148': {'block': 'Z15:Z24', 'dots_enum': True, 'symmetry': None},
        '24×24-1205': {'block': 'Z15:AC23', 'dots_enum': True, 'symmetry': None},
        'Image 768×768': {'block': 'Y15:AC24', 'dots_enum': True, 'symmetry': None}
    }
    
    symmetry_bonuses = {'bilateral': 10, 'vertical': 10, 'horizontal': 10, 'horizontal_local': 10, 'partial': 5, None: 0}
    
    scores = {}
    for modality_name, claims in modalities_data.items():
        total_score = 0.0
        claimed_block = parse_divider_claims(claims['block'])
        if 'R15-Z26' in claims['block']:
            claimed_block = set()
        if not claimed_block:
            block_score = 0.0
        else:
            diff_value = calculate_diff(gt_block, claimed_block)
            penalty = block_penalty_multiplier * diff_value
            block_score = block_base_score - penalty
        total_score += block_score
        dots_penalty = -5.0 if claims['dots_enum'] else 0.0
        total_score += dots_penalty
        symmetry_bonus = symmetry_bonuses.get(claims['symmetry'], 0)
        total_score += symmetry_bonus
        scores[modality_name] = total_score
    return scores


## Calculate Scores for All Challenges

Run the scoring functions to compute scores for all challenges and modalities.


In [22]:
# Calculate scores for all challenges
scores_13e47133 = calculate_challenge_13e47133_scores()
scores_135a2760 = calculate_challenge_135a2760_scores()
scores_136b0064 = calculate_challenge_136b0064_scores()
scores_142ca369 = calculate_challenge_142ca369_scores()
scores_0934a4d8 = calculate_challenge_0934a4d8_scores()

print("Scores calculated successfully!")


Scores calculated successfully!


## Summary Table

Create and display a summary table matching the LaTeX format, with modalities as rows and challenges as columns.


In [23]:
# Create DataFrame with modalities as rows and challenges as columns
# Ensure consistent modality order matching the LaTeX table
modality_order = [
    'Row-only',
    'Column-only',
    'ASCII',
    'JSON',
    'Image 14×14',
    'Image 15×15',
    'Image 16×16',
    'Image 17×17',
    '24×24-1148',
    '24×24-1205',
    'Image 768×768'
]

# Build the data dictionary
data = {
    'Modality': modality_order,
    '13e47133': [scores_13e47133.get(m, None) for m in modality_order],
    '135a2760': [scores_135a2760.get(m, None) for m in modality_order],
    '136b0064': [scores_136b0064.get(m, None) for m in modality_order],
    '142ca369': [scores_142ca369.get(m, None) for m in modality_order],
    '0934a4d8': [scores_0934a4d8.get(m, None) for m in modality_order]
}

df = pd.DataFrame(data)
df.set_index('Modality', inplace=True)

# Format scores to 2 decimal places
df = df.round(2)

# Display the table
print("\n" + "=" * 80)
print("PERCEPTION ACCURACY SCORES SUMMARY TABLE")
print("=" * 80)
display(df)



PERCEPTION ACCURACY SCORES SUMMARY TABLE


Unnamed: 0_level_0,13e47133,135a2760,136b0064,142ca369,0934a4d8
Modality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Row-only,60.61,100.0,100.0,87.5,5.0
Column-only,70.0,100.0,100.0,100.0,21.67
ASCII,87.32,100.0,100.0,91.67,-11.67
JSON,100.0,100.0,100.0,100.0,30.0
Image 14×14,49.15,100.0,90.0,25.0,-21.67
Image 15×15,52.56,76.25,100.0,0.0,50.0
Image 16×16,45.73,96.25,100.0,37.5,-66.11
Image 17×17,68.05,96.25,100.0,100.0,-132.78
24×24-1148,100.0,100.0,90.0,0.0,-32.78
24×24-1205,85.0,96.25,50.31,0.0,45.0
