In [6]:
"""
Style Accessibility Ranking Analysis
===================================

This script analyzes which visual styles are most suitable for accessibility
by ranking styles based on expert scores with weighted focus on accessibility dimensions.

Weighting Strategy (Option A):
- Text-Image Alignment: 60% (primary accessibility factor)
- Image Simplicity: 25% (cognitive accessibility)
- Image Quality: 15% (visual clarity)

Key Features:
- Handles both _IAA (multi-expert) and single-expert images
- Averages scores when multiple experts rate the same image
- Maps images to styles using CSV mapping file
- Provides statistical confidence measures
- Separate overall score analysis (includes text dimensions)

Author: Accessibility Analysis
Date: June 2025
"""

import json
import pandas as pd
import numpy as np
from pathlib import Path
from tabulate import tabulate
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# CONFIGURATION
# ============================================================================

# File paths (update these to match your system)
ANNOTATION_DIR = r"C:\Users\SouayedBelkiss\OneDrive - gae\Desktop\Thesis\annotation_analysis\expert work"
MAPPING_DIR = r"C:\Users\SouayedBelkiss\OneDrive - gae\Desktop\Thesis\annotation_analysis\required files"

# File mappings
ANNOTATION_FILES = {
    'Expert_A': 'alexa_annotations.json',
    'Expert_K': 'katrin_annotations.json', 
    'Expert_L': 'luisa_annotations.json',
    'Expert_M': 'martin_annotations.json'
}

MAPPING_FILE = 'renamed_images_mapping.csv'

# The 10 styles we're ranking
KNOWN_STYLES = [
    '3d rendered', 'artistic', 'cartoon', 'digital art', 'geometric',
    'minimalistic', 'realistic', 'retro', 'storybook', 'technical'
]

# Dimension configuration for accessibility analysis
ACCESSIBILITY_DIMENSIONS = {
    'txt_img_align': {'scale_max': 20, 'display_name': 'Text-Image Alignment', 'weight': 0.60},
    'img_simplicity': {'scale_max': 15, 'display_name': 'Image Simplicity', 'weight': 0.25},
    'img_quality': {'scale_max': 15, 'display_name': 'Image Quality', 'weight': 0.15}
}

# All dimensions for reference
ALL_DIMENSIONS = {
    'img_simplicity': {'scale_max': 15, 'display_name': 'Image Simplicity'},
    'img_quality': {'scale_max': 15, 'display_name': 'Image Quality'},
    'txt_simplicity': {'scale_max': 15, 'display_name': 'Text Simplicity'},
    'txt_quality': {'scale_max': 15, 'display_name': 'Text Quality'},
    'ethics': {'scale_max': 20, 'display_name': 'Ethics'},
    'txt_img_align': {'scale_max': 20, 'display_name': 'Text-Image Alignment'}
}

# ============================================================================
# DATA LOADING FUNCTIONS
# ============================================================================

def load_style_mapping():
    """Load the CSV mapping file to link images to styles."""
    print("Loading style mapping...")
    
    mapping_path = Path(MAPPING_DIR) / MAPPING_FILE
    
    try:
        df_mapping = pd.read_csv(mapping_path)
        print(f"✓ Loaded mapping for {len(df_mapping)} images")
        
        # Create lookup: new_filename -> style
        style_mapping = {}
        for _, row in df_mapping.iterrows():
            new_filename = row['new_filename']
            style = row['style'].lower().strip()
            style_mapping[new_filename] = style
        
        # Show style distribution
        style_counts = df_mapping['style'].value_counts()
        print(f"Style distribution in mapping:")
        for style, count in style_counts.items():
            print(f"  {style}: {count} images")
        
        return style_mapping
        
    except FileNotFoundError:
        print(f"✗ ERROR: Could not find {MAPPING_FILE}")
        return None
    except Exception as e:
        print(f"✗ ERROR loading mapping: {e}")
        return None

def load_expert_annotations(expert_name, filename):
    """Load and parse annotations for one expert."""
    filepath = Path(ANNOTATION_DIR) / filename
    
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    annotations = []
    
    for record in data:
        if record['annotations']:
            annotation = record['annotations'][0]
            result = annotation['result']
            
            # Extract image info
            image_filename = record['data'].get('original_filename', 'unknown')
            
            # Extract dimension scores
            scores = {'expert': expert_name, 'image': image_filename}
            
            for item in result:
                if item['type'] == 'number' and item['from_name'] in ALL_DIMENSIONS:
                    scores[item['from_name']] = item['value']['number']
            
            # Calculate total score (sum of all dimensions)
            total_score = sum([scores.get(dim, 0) for dim in ALL_DIMENSIONS.keys()])
            scores['total_score'] = total_score
            
            annotations.append(scores)
    
    return pd.DataFrame(annotations)

def load_all_expert_data():
    """Load annotations from all experts."""
    print("Loading expert annotations...")
    
    all_dataframes = []
    
    for expert_name, filename in ANNOTATION_FILES.items():
        print(f"Loading {expert_name}...")
        df = load_expert_annotations(expert_name, filename)
        all_dataframes.append(df)
        print(f"  - Loaded {len(df)} annotations")
    
    # Combine all data
    all_data = pd.concat(all_dataframes, ignore_index=True)
    print(f"\nTotal annotations loaded: {len(all_data)}")
    
    return all_data

# ============================================================================
# STYLE MAPPING AND AGGREGATION
# ============================================================================

def map_images_to_styles(all_data, style_mapping):
    """Map image annotations to their styles and handle multi-expert ratings."""
    print("\nMapping images to styles and aggregating multi-expert ratings...")
    
    # Add style column
    def get_style_for_image(image_filename):
        # Handle _IAA images by removing suffix
        base_filename = image_filename.replace('_IAA', '')
        return style_mapping.get(base_filename, 'unknown')
    
    all_data['style'] = all_data['image'].apply(get_style_for_image)
    
    # Filter out unknown styles
    valid_data = all_data[all_data['style'] != 'unknown'].copy()
    print(f"Images successfully mapped to styles: {len(valid_data)}")
    print(f"Images with unknown styles: {len(all_data) - len(valid_data)}")
    
    # Identify multi-expert images
    image_expert_counts = valid_data.groupby('image')['expert'].count()
    multi_expert_images = image_expert_counts[image_expert_counts > 1]
    
    print(f"\nMulti-expert annotation summary:")
    print(f"  Total unique images: {len(image_expert_counts)}")
    print(f"  Images with multiple experts: {len(multi_expert_images)}")
    print(f"  Images with single expert: {len(image_expert_counts) - len(multi_expert_images)}")
    
    # Aggregate scores (average when multiple experts rate same image)
    aggregated_data = []
    
    for image_name, group in valid_data.groupby('image'):
        # Get the style (should be same for all experts rating this image)
        style = group['style'].iloc[0]
        
        # Calculate aggregated scores
        agg_scores = {'image': image_name, 'style': style, 'num_experts': len(group)}
        
        # Average all dimension scores
        for dim in ALL_DIMENSIONS.keys():
            if dim in group.columns:
                dim_scores = group[dim].dropna()
                if len(dim_scores) > 0:
                    agg_scores[dim] = dim_scores.mean()
                    agg_scores[f'{dim}_std'] = dim_scores.std() if len(dim_scores) > 1 else 0
        
        # Average total score
        total_scores = group['total_score'].dropna()
        if len(total_scores) > 0:
            agg_scores['total_score'] = total_scores.mean()
            agg_scores['total_score_std'] = total_scores.std() if len(total_scores) > 1 else 0
        
        aggregated_data.append(agg_scores)
    
    aggregated_df = pd.DataFrame(aggregated_data)
    print(f"Aggregated dataset: {len(aggregated_df)} unique images")
    
    return aggregated_df

# ============================================================================
# ACCESSIBILITY SCORE CALCULATION
# ============================================================================

def calculate_accessibility_scores(aggregated_df):
    """Calculate weighted accessibility scores for each image."""
    print("\nCalculating accessibility scores...")
    
    accessibility_scores = []
    
    for _, row in aggregated_df.iterrows():
        # Calculate weighted accessibility score
        weighted_score = 0
        max_possible_score = 0
        dimension_scores = {}
        
        for dim, config in ACCESSIBILITY_DIMENSIONS.items():
            if dim in row and pd.notna(row[dim]):
                score = row[dim]
                max_score = config['scale_max']
                weight = config['weight']
                
                # Normalize to 0-1 scale, then apply weight
                normalized_score = score / max_score
                weighted_contribution = normalized_score * weight
                
                weighted_score += weighted_contribution
                max_possible_score += weight
                
                dimension_scores[dim] = {
                    'raw_score': score,
                    'max_score': max_score,
                    'normalized': normalized_score,
                    'weighted_contribution': weighted_contribution
                }
        
        # Convert to 0-100 scale
        if max_possible_score > 0:
            accessibility_score = (weighted_score / max_possible_score) * 100
        else:
            accessibility_score = 0
        
        accessibility_scores.append({
            'image': row['image'],
            'style': row['style'],
            'num_experts': row['num_experts'],
            'accessibility_score': accessibility_score,
            'total_score': row.get('total_score', 0),
            'dimension_scores': dimension_scores
        })
    
    return pd.DataFrame(accessibility_scores)

# ============================================================================
# STYLE RANKING ANALYSIS
# ============================================================================

def rank_styles_by_accessibility(accessibility_df):
    """Rank the 10 styles by their accessibility scores."""
    print(f"\n{'='*80}")
    print("STYLE ACCESSIBILITY RANKING ANALYSIS")
    print(f"{'='*80}")
    print("Weighting: Text-Image Alignment (60%), Image Simplicity (25%), Image Quality (15%)")
    
    # Calculate style statistics
    style_stats = []
    
    for style in KNOWN_STYLES:
        style_data = accessibility_df[accessibility_df['style'] == style]
        
        if len(style_data) == 0:
            # No data for this style
            stats = {
                'Style': style,
                'N_Images': 0,
                'Mean_Accessibility_Score': 0,
                'Std_Accessibility_Score': 0,
                'Mean_Total_Score': 0,
                'Std_Total_Score': 0,
                'Multi_Expert_Images': 0,
                'Confidence': 'No Data'
            }
        else:
            # Calculate statistics
            acc_scores = style_data['accessibility_score']
            total_scores = style_data['total_score']
            multi_expert_count = (style_data['num_experts'] > 1).sum()
            
            # Confidence level based on sample size and multi-expert coverage
            n_images = len(style_data)
            if n_images >= 10 and multi_expert_count >= 3:
                confidence = 'High'
            elif n_images >= 5 and multi_expert_count >= 1:
                confidence = 'Medium'
            elif n_images >= 3:
                confidence = 'Low'
            else:
                confidence = 'Very Low'
            
            stats = {
                'Style': style,
                'N_Images': n_images,
                'Mean_Accessibility_Score': acc_scores.mean(),
                'Std_Accessibility_Score': acc_scores.std(),
                'Mean_Total_Score': total_scores.mean(),
                'Std_Total_Score': total_scores.std(),
                'Multi_Expert_Images': multi_expert_count,
                'Confidence': confidence
            }
        
        style_stats.append(stats)
    
    # Create results dataframe
    results_df = pd.DataFrame(style_stats)
    
    # Sort by accessibility score (descending)
    results_df = results_df.sort_values('Mean_Accessibility_Score', ascending=False)
    
    # Add ranking
    results_df['Rank'] = range(1, len(results_df) + 1)
    
    # Round numerical columns
    numeric_cols = ['Mean_Accessibility_Score', 'Std_Accessibility_Score', 
                   'Mean_Total_Score', 'Std_Total_Score']
    results_df[numeric_cols] = results_df[numeric_cols].round(2)
    
    return results_df

def print_detailed_ranking(results_df):
    """Print detailed ranking results."""
    print(f"\n{'='*80}")
    print("STYLE ACCESSIBILITY RANKING RESULTS")
    print(f"{'='*80}")
    
    # Main ranking table
    display_cols = ['Rank', 'Style', 'Mean_Accessibility_Score', 'Std_Accessibility_Score', 
                   'N_Images', 'Multi_Expert_Images', 'Confidence']
    
    print("\nACCESSIBILITY SCORE RANKING:")
    print(tabulate(results_df[display_cols], headers='keys', tablefmt='grid', showindex=False))
    
    # Separate total score ranking for comparison
    total_score_ranking = results_df.copy()
    total_score_ranking = total_score_ranking.sort_values('Mean_Total_Score', ascending=False)
    total_score_ranking['Total_Score_Rank'] = range(1, len(total_score_ranking) + 1)
    
    comparison_cols = ['Total_Score_Rank', 'Style', 'Mean_Total_Score', 'Std_Total_Score']
    print(f"\nTOTAL SCORE RANKING (includes text dimensions - for reference):")
    print(tabulate(total_score_ranking[comparison_cols], headers='keys', tablefmt='grid', showindex=False))
    
    # Top accessibility performers
    print(f"\n🏆 TOP 3 STYLES FOR ACCESSIBILITY:")
    print("=" * 50)
    
    top_3 = results_df.head(3)
    for idx, (_, row) in enumerate(top_3.iterrows(), 1):
        print(f"\n#{idx}. {row['Style'].upper()}")
        print(f"   📊 Accessibility Score: {row['Mean_Accessibility_Score']:.1f}/100 (±{row['Std_Accessibility_Score']:.1f})")
        print(f"   📈 Total Score: {row['Mean_Total_Score']:.1f}/100 (±{row['Std_Total_Score']:.1f})")
        print(f"   🔍 Sample Size: {row['N_Images']} images ({row['Multi_Expert_Images']} multi-expert)")
        print(f"   ✅ Confidence: {row['Confidence']}")
    
    # Statistical significance note
    print(f"\n📋 INTERPRETATION GUIDE:")
    print("• Accessibility Score: Weighted combination of Text-Image Alignment (60%), Image Simplicity (25%), Image Quality (15%)")
    print("• Total Score: All 6 dimensions combined (includes text dimensions that don't vary by style)")
    print("• Confidence levels based on sample size and multi-expert validation")
    print("• Higher scores indicate better accessibility performance")

def analyze_dimension_performance_by_style(aggregated_df):
    """Analyze how each style performs on individual accessibility dimensions."""
    print(f"\n{'='*80}")
    print("DIMENSION-BY-STYLE PERFORMANCE ANALYSIS")
    print(f"{'='*80}")
    
    dimension_analysis = []
    
    for style in KNOWN_STYLES:
        style_data = aggregated_df[aggregated_df['style'] == style]
        
        if len(style_data) == 0:
            continue
        
        style_dim_stats = {'Style': style, 'N_Images': len(style_data)}
        
        for dim, config in ACCESSIBILITY_DIMENSIONS.items():
            if dim in style_data.columns:
                dim_scores = style_data[dim].dropna()
                if len(dim_scores) > 0:
                    mean_score = dim_scores.mean()
                    utilization = (mean_score / config['scale_max']) * 100
                    
                    style_dim_stats[f"{config['display_name']}_Mean"] = mean_score
                    style_dim_stats[f"{config['display_name']}_Util%"] = utilization
        
        dimension_analysis.append(style_dim_stats)
    
    dim_df = pd.DataFrame(dimension_analysis)
    
    if len(dim_df) > 0:
        # Round numerical columns
        numeric_cols = [col for col in dim_df.columns if 'Mean' in col or 'Util%' in col]
        dim_df[numeric_cols] = dim_df[numeric_cols].round(2)
        
        # Sort by Text-Image Alignment (most important dimension)
        align_col = "Text-Image Alignment_Mean"
        if align_col in dim_df.columns:
            dim_df = dim_df.sort_values(align_col, ascending=False)
        
        print("\nDIMENSION PERFORMANCE BY STYLE:")
        print(tabulate(dim_df, headers='keys', tablefmt='grid', showindex=False))

# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():
    """Main function to run the complete style accessibility ranking analysis."""
    print("="*80)
    print("STYLE ACCESSIBILITY RANKING ANALYSIS")
    print("="*80)
    print("Finding the most suitable visual style for accessibility")
    print("Focus: Text-Image Alignment (60%), Image Simplicity (25%), Image Quality (15%)")
    
    # Load data
    style_mapping = load_style_mapping()
    if style_mapping is None:
        return None
    
    all_data = load_all_expert_data()
    if all_data is None or len(all_data) == 0:
        print("ERROR: No annotation data loaded")
        return None
    
    # Map images to styles and aggregate multi-expert ratings
    aggregated_df = map_images_to_styles(all_data, style_mapping)
    if len(aggregated_df) == 0:
        print("ERROR: No images successfully mapped to styles")
        return None
    
    # Calculate accessibility scores
    accessibility_df = calculate_accessibility_scores(aggregated_df)
    
    # Rank styles
    results_df = rank_styles_by_accessibility(accessibility_df)
    
    # Print detailed results
    print_detailed_ranking(results_df)
    
    # Dimension analysis
    analyze_dimension_performance_by_style(aggregated_df)
    
    print(f"\n{'='*80}")
    print("ANALYSIS COMPLETE!")
    print(f"{'='*80}")
    print("Variables created:")
    print("• results_df: Main ranking results")
    print("• accessibility_df: Individual image accessibility scores")
    print("• aggregated_df: Aggregated annotation data")
    
    return {
        'results': results_df,
        'accessibility_scores': accessibility_df,
        'aggregated_data': aggregated_df,
        'style_mapping': style_mapping
    }

# ============================================================================
# EXECUTION
# ============================================================================

if __name__ == "__main__":
    analysis_results = main()

STYLE ACCESSIBILITY RANKING ANALYSIS
Finding the most suitable visual style for accessibility
Focus: Text-Image Alignment (60%), Image Simplicity (25%), Image Quality (15%)
Loading style mapping...
✓ Loaded mapping for 2000 images
Style distribution in mapping:
  artistic: 200 images
  technical: 200 images
  cartoon: 200 images
  realistic: 200 images
  storybook: 200 images
  3d rendered: 200 images
  geometric: 200 images
  retro: 200 images
  minimalistic: 200 images
  digital art: 200 images
Loading expert annotations...
Loading Expert_A...
  - Loaded 250 annotations
Loading Expert_K...
  - Loaded 250 annotations
Loading Expert_L...
  - Loaded 200 annotations
Loading Expert_M...
  - Loaded 276 annotations

Total annotations loaded: 976

Mapping images to styles and aggregating multi-expert ratings...
Images successfully mapped to styles: 976
Images with unknown styles: 0

Multi-expert annotation summary:
  Total unique images: 814
  Images with multiple experts: 113
  Images with 