In [14]:
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon
from statsmodels.stats.multitest import multipletests
import os
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

def calculate_metrics():
    """
    Calculate metrics from the CSV files and save results to the specified directory.
    Returns the base directory path where files are saved.
    """
    # Read the CSV files
    test_df = pd.read_csv('../data/perceived_ranking_results/raw_data/RankingTest_with_numeric.csv')
    practice_df = pd.read_csv('../data/perceived_ranking_results/raw_data/RankingPractice_with_numeric.csv')
    
    # Define metrics for each dataset
    test_metrics = ['Difficulty', 'Satisfaction', 'Stress']
    practice_metrics = ['Difficulty', 'Satisfaction', 'Stress', 'Helpfulness', 'Adherence']
    
    # Create base directory and results subdirectory if they don't exist
    base_dir = "../data/perceived_ranking_results/"
    results_dir = os.path.join(base_dir, "results")
    os.makedirs(results_dir, exist_ok=True)
    
    # Create mean tables and save as CSV
    # For test tasks
    test_means = []
    for metric in test_metrics:
        pivot = pd.pivot_table(test_df, 
                             values=metric, 
                             index='Group_ID', 
                             columns='Task_ID', 
                             aggfunc='mean').round(2)
        pivot.index = [f'Group {i}' for i in pivot.index]
        test_means.append(pivot)
        
        # Save to CSV in results directory
        file_path = os.path.join(results_dir, f"test_tasks_{metric.lower()}_means.csv")
        pivot.to_csv(file_path)
        # print(f"Saved to {file_path}")
    
    # For practice tasks
    practice_means = []
    for metric in practice_metrics:
        pivot = pd.pivot_table(practice_df, 
                             values=metric, 
                             index='Group_ID', 
                             columns='Task_ID', 
                             aggfunc='mean').round(2)
        pivot.index = [f'Group {i}' for i in pivot.index]
        practice_means.append(pivot)
        
        # Save to CSV in results directory
        file_path = os.path.join(results_dir, f"practice_tasks_{metric.lower()}_means.csv")
        pivot.to_csv(file_path)
        # print(f"Saved to {file_path}")
    
    # Print tables
    '''
    print("Test Tasks Mean Values:")
    for metric, df in zip(test_metrics, test_means):
        print(f"\n{metric} Means:")
        print(df)
        print("\n" + "="*50)
    
    print("\nPractice Tasks Mean Values:")
    for metric, df in zip(practice_metrics, practice_means):
        print(f"\n{metric} Means:")
        print(df)
        print("\n" + "="*50)
    '''
    # Create comprehensive dataframes with all metrics for easier analysis
    
    # For test tasks - combine all metrics into one dataframe
    test_all_metrics = pd.DataFrame()
    for metric, pivot_df in zip(test_metrics, test_means):
        # Reset index to get Group as a column
        temp_df = pivot_df.reset_index()
        # Melt to get Task_ID as a column
        temp_df = temp_df.melt(id_vars='index', var_name='Task_ID', value_name=metric)
        temp_df = temp_df.rename(columns={'index': 'Group'})
        
        if test_all_metrics.empty:
            test_all_metrics = temp_df
        else:
            # Merge with existing dataframe
            test_all_metrics = pd.merge(test_all_metrics, temp_df, on=['Group', 'Task_ID'])
    
    # Save the combined test metrics dataframe
    file_path = os.path.join(results_dir, "test_tasks_all_metrics.csv")
    test_all_metrics.to_csv(file_path, index=False)
    # print(f"Saved combined test metrics to {file_path}")
    
    # For practice tasks - combine all metrics into one dataframe
    practice_all_metrics = pd.DataFrame()
    for metric, pivot_df in zip(practice_metrics, practice_means):
        # Reset index to get Group as a column
        temp_df = pivot_df.reset_index()
        # Melt to get Task_ID as a column
        temp_df = temp_df.melt(id_vars='index', var_name='Task_ID', value_name=metric)
        temp_df = temp_df.rename(columns={'index': 'Group'})
        
        if practice_all_metrics.empty:
            practice_all_metrics = temp_df
        else:
            # Merge with existing dataframe
            practice_all_metrics = pd.merge(practice_all_metrics, temp_df, on=['Group', 'Task_ID'])
    
    # Save the combined practice metrics dataframe
    file_path = os.path.join(results_dir, "practice_tasks_all_metrics.csv")
    practice_all_metrics.to_csv(file_path, index=False)
    # print(f"Saved combined practice metrics to {file_path}")
    
    return base_dir


In [15]:
def visualize_metrics(base_dir=None):
    """
    Create visualizations based on the saved metrics files with enhanced formatting.
    
    Parameters:
    -----------
    base_dir : str
        Base directory where the figures will be saved.
        If None, uses the default directory.
    """
    if base_dir is None:
        base_dir = "../data/perceived_ranking_results/"
    
    # Create figures directory if it doesn't exist
    figures_dir = os.path.join(base_dir, "figures")
    os.makedirs(figures_dir, exist_ok=True)
    
    # Load data from the original files

    test_df = pd.read_csv('../data/perceived_ranking_results/raw_data/RankingTest_with_numeric.csv')
    practice_df = pd.read_csv('../data/perceived_ranking_results/raw_data/RankingPractice_with_numeric.csv')
    
    # Define metrics for each dataset
    test_metrics = ['Difficulty', 'Satisfaction', 'Stress']
    practice_metrics = ['Difficulty', 'Satisfaction', 'Stress', 'Helpfulness', 'Adherence']
    
    # Calculate averages for T1-T5 for each group and metric
    test_avg_by_group = {}
    for metric in test_metrics:
        test_avg_by_group[metric] = {}
        for group_id in [1, 2, 3, 4]:
            # Filter by group and only include T1-T5 (exclude T0)
            group_data = test_df[(test_df['Group_ID'] == group_id) & 
                                (test_df['Task_ID'].isin(['T1', 'T2', 'T3', 'T4', 'T5']))]
            # Calculate the average
            avg_value = group_data[metric].mean().round(3)
            test_avg_by_group[metric][group_id] = avg_value
            print(f"Average {metric} for Group {group_id} (T1-T5): {avg_value}")
    
    # Calculate averages for practice tasks
    practice_avg_by_group = {}
    for metric in practice_metrics:
        practice_avg_by_group[metric] = {}
        for group_id in [1, 2, 3, 4]:
            group_data = practice_df[practice_df['Group_ID'] == group_id]
            avg_value = group_data[metric].mean().round(3)
            practice_avg_by_group[metric][group_id] = avg_value
            print(f"Average {metric} for Group {group_id} (All Practice): {avg_value}")
    
    # Define colorblind-friendly colors
    colors = ['#E69F00',  # orange
              '#56B4E9',  # light blue 
              '#009E73',  # green
              '#CC79A7']  # pink
    
    # Define markers for each group
    markers = ['o',    # circle
              '^',    # triangle up 
              's',    # square
              'D']    # diamond
    
    # Define different line styles for average lines
    avg_styles = [(0, (1, 1)),      # dotted
                  (0, (5, 5)),      # dashed
                  (0, (3, 1, 1, 1)), # dash-dot
                  (0, (8, 5))]      # long dash
    
    # Define line properties for average lines
    linewidth_avg = 3.5  # Increased linewidth for average lines
    alpha_avg = 0.8      # Increased opacity for better visibility
    
    # Define the order we want
    groups_order = [(0, 'G1'), 
                    (1, 'G2'),
                    (2, 'G3'),
                    (3, 'G4')]
    
    # Define offsets for annotations to prevent overlapping
    y_offsets = [0.1, -0.1, 0.15, -0.15]  # Different offsets for each group
    
    # Set consistent figure style
    plt.rcParams.update({
        'figure.facecolor': 'white',
        'axes.facecolor': 'white',
        'savefig.facecolor': 'white',
        'font.size': 12,
        'axes.labelsize': 12,
        'axes.titlesize': 14,
        'xtick.labelsize': 10,
        'ytick.labelsize': 10,
        'legend.fontsize': 10
    })
    
    # Import path effects for better text annotations
    import matplotlib.patheffects as pe
    
    # =====================================================================
    # Create visualization for test tasks metrics
    # =====================================================================
    for metric in test_metrics:
        fig, ax = plt.subplots(figsize=(10, 7))
        
        # Task IDs - convert to strings before sorting to handle mixed types
        task_ids = sorted(test_df['Task_ID'].unique(), key=str)
        # Create positions dictionary for consistent x-axis spacing
        positions = {task: i for i, task in enumerate(task_ids)}
        
        for idx, (group_idx, group_name) in enumerate(groups_order):
            group_id = idx + 1  # Convert to 1-based index for data filtering
            
            # Get the data for this group and metric
            group_data = test_df[test_df['Group_ID'] == group_id]
            means = group_data.groupby('Task_ID')[metric].mean()
            
            # Plot the actual data points with custom positions
            x_values = [positions[t] for t in means.index]
            line = ax.plot(x_values, means.values,
                          marker=markers[group_idx],
                          linewidth=2,
                          label=group_name,
                          color=colors[group_idx],
                          markersize=8)[0]
                          
            # Add value annotations with white outlines - positioned above/below markers
            for x, y in zip(x_values, means.values):
                # Use larger vertical offset to avoid overlap with markers
                vertical_offset = 15 if y_offsets[idx] > 0 else -15
                
                ax.annotate(f'{y:.1f}',
                           (x, y),
                           textcoords="offset points",
                           xytext=(0, vertical_offset),  # Fixed vertical offset
                           ha='center',
                           va='bottom' if y_offsets[idx] > 0 else 'top',
                           color=colors[group_idx],
                           fontsize=9,
                           fontweight='bold',
                           path_effects=[pe.withStroke(linewidth=2, foreground='white')])
            
            # Add average line (T1-T5)
            avg_value = test_avg_by_group[metric][group_id]
            avg_line = ax.axhline(y=avg_value,
                                 color=colors[group_idx],
                                 linestyle=avg_styles[group_idx],
                                 alpha=alpha_avg,
                                 linewidth=linewidth_avg,
                                 label=f"{group_name} Avg")
                                 
            # Add text annotation for average value directly on the plot
            # Position at the right edge of the plot
            ax.annotate(f'{avg_value:.1f}',
                      xy=(1.01, avg_value),  # Just outside the right edge
                      xycoords=('axes fraction', 'data'),
                      fontsize=10,
                      color=colors[group_idx],
                      fontweight='bold',
                      va='center')
        
        # Add vertical grid lines for task positions
        for task, pos in positions.items():
            ax.axvline(x=pos, color='gray', linestyle='-', alpha=0.1)
        
        # Set x-axis ticks and labels
        ax.set_xticks(list(positions.values()))
        ax.set_xticklabels(list(positions.keys()))
        
        # Set y-axis range starting from 1 with whole number ticks
        ax.set_ylim(1, 5.2)  # 5-point scale starting from 1
        ax.set_yticks([1, 2, 3, 4, 5])  # Set y-ticks to whole numbers only
        
        # Only show horizontal grid lines
        ax.yaxis.grid(True, linestyle='-', alpha=0.2)
        ax.xaxis.grid(False)
        
        # Set labels and title with improved formatting
        ax.set_title(f'Test Tasks: {metric} Scores by Group', pad=20, fontsize=14, fontweight='bold')
        ax.set_xlabel('Task ID', fontsize=12)
        ax.set_ylabel(f'{metric} Score', fontsize=12)
        
        # Create better positioned legend
        handles, labels = ax.get_legend_handles_labels()
        # Separate group lines from average lines
        group_handles = handles[:4]
        group_labels = labels[:4]
        avg_handles = handles[4:]
        avg_labels = labels[4:]
        
        # Create legend with two columns - first for groups, then for averages
        all_handles = group_handles + avg_handles
        all_labels = group_labels + avg_labels
        ax.legend(all_handles, all_labels,
                 fontsize=10,
                 ncol=1,
                 loc='center left',
                 bbox_to_anchor=(1.05, 0.5),
                 handlelength=3,
                 borderaxespad=0.)
        
        plt.tight_layout()
        fig_path = os.path.join(figures_dir, f'test_tasks_{metric.lower()}.png')
        plt.savefig(fig_path, bbox_inches='tight', dpi=300, facecolor='white')
        print(f"Saved individual {metric} test task visualization to {fig_path}")
        plt.close()
    
    # =====================================================================
    # Create visualization for practice tasks metrics
    # =====================================================================
    for metric in practice_metrics:
        fig, ax = plt.subplots(figsize=(10, 7))
        
        # Task IDs - convert to strings before sorting to handle mixed types
        task_ids = sorted(practice_df['Task_ID'].unique(), key=str)
        # Create positions dictionary for consistent x-axis spacing
        positions = {task: i for i, task in enumerate(task_ids)}
        
        for idx, (group_idx, group_name) in enumerate(groups_order):
            group_id = idx + 1  # Convert to 1-based index for data filtering
            
            # Get the data for this group and metric
            group_data = practice_df[practice_df['Group_ID'] == group_id]
            means = group_data.groupby('Task_ID')[metric].mean()
            
            # Plot the actual data points with custom positions
            x_values = [positions[t] for t in means.index]
            line = ax.plot(x_values, means.values,
                          marker=markers[group_idx],
                          linewidth=2,
                          label=group_name,
                          color=colors[group_idx],
                          markersize=8)[0]
                          
            # Add value annotations with white outlines - positioned above/below markers
            for x, y in zip(x_values, means.values):
                # Use larger vertical offset to avoid overlap with markers
                vertical_offset = 15 if y_offsets[idx] > 0 else -15
                
                ax.annotate(f'{y:.1f}',
                           (x, y),
                           textcoords="offset points",
                           xytext=(0, vertical_offset),  # Fixed vertical offset
                           ha='center',
                           va='bottom' if y_offsets[idx] > 0 else 'top',
                           color=colors[group_idx],
                           fontsize=9,
                           fontweight='bold',
                           path_effects=[pe.withStroke(linewidth=2, foreground='white')])
            
            # Add average line for all practice tasks
            avg_value = practice_avg_by_group[metric][group_id]
            avg_line = ax.axhline(y=avg_value,
                                 color=colors[group_idx],
                                 linestyle=avg_styles[group_idx],
                                 alpha=alpha_avg,
                                 linewidth=linewidth_avg,
                                 label=f"{group_name} Avg")
                                 
            # Add text annotation for average value directly on the plot
            # Position at the right edge of the plot
            ax.annotate(f'{avg_value:.1f}',
                      xy=(1.01, avg_value),  # Just outside the right edge
                      xycoords=('axes fraction', 'data'),
                      fontsize=10,
                      color=colors[group_idx],
                      fontweight='bold',
                      va='center')
        
        # Add vertical grid lines for task positions
        for task, pos in positions.items():
            ax.axvline(x=pos, color='gray', linestyle='-', alpha=0.1)
        
        # Set x-axis ticks and labels
        ax.set_xticks(list(positions.values()))
        ax.set_xticklabels(list(positions.keys()))
        
        # Set y-axis range starting from 1 with whole number ticks
        ax.set_ylim(1, 5.2)  # 5-point scale starting from 1
        ax.set_yticks([1, 2, 3, 4, 5])  # Set y-ticks to whole numbers only
        
        # Only show horizontal grid lines
        ax.yaxis.grid(True, linestyle='-', alpha=0.2)
        ax.xaxis.grid(False)
        
        # Set labels and title with improved formatting
        ax.set_title(f'Practice Tasks: {metric} Scores by Group', pad=20, fontsize=14, fontweight='bold')
        ax.set_xlabel('Task ID', fontsize=12)
        ax.set_ylabel(f'{metric} Score', fontsize=12)
        
        # Create better positioned legend
        handles, labels = ax.get_legend_handles_labels()
        # Separate group lines from average lines
        group_handles = handles[:4]
        group_labels = labels[:4]
        avg_handles = handles[4:]
        avg_labels = labels[4:]
        
        # Create legend with two columns - first for groups, then for averages
        all_handles = group_handles + avg_handles
        all_labels = group_labels + avg_labels
        ax.legend(all_handles, all_labels,
                 fontsize=10,
                 ncol=1,
                 loc='center left',
                 bbox_to_anchor=(1.05, 0.5),
                 handlelength=3,
                 borderaxespad=0.)
        
        plt.tight_layout()
        fig_path = os.path.join(figures_dir, f'practice_tasks_{metric.lower()}.png')
        plt.savefig(fig_path, bbox_inches='tight', dpi=300, facecolor='white')
        print(f"Saved individual {metric} practice task visualization to {fig_path}")
        plt.close()
    
    # =====================================================================
    # Create combined visualizations (panels of metrics)
    # =====================================================================
    
    # Create subplots for test tasks
    fig1, axes1 = plt.subplots(1, 3, figsize=(15, 5))
    fig1.suptitle('Test Tasks Metrics by Group', fontsize=16, fontweight='bold', y=1.05)
    
    # Plot test metrics
    for i, metric in enumerate(test_metrics):
        ax = axes1[i]
        
        # Task IDs - convert to strings before sorting to handle mixed types
        task_ids = sorted(test_df['Task_ID'].unique(), key=str)
        # Create positions dictionary for consistent x-axis spacing
        positions = {task: i for i, task in enumerate(task_ids)}
        
        for idx, (group_idx, group_name) in enumerate(groups_order):
            group_id = idx + 1  # Convert to 1-based index for data filtering
            
            # Get the data for this group and metric
            group_data = test_df[test_df['Group_ID'] == group_id]
            means = group_data.groupby('Task_ID')[metric].mean()
            
            # Plot the actual data points with custom positions
            x_values = [positions[t] for t in means.index]
            line = ax.plot(x_values, means.values,
                          marker=markers[group_idx],
                          linewidth=2,
                          label=group_name,
                          color=colors[group_idx],
                          markersize=6)[0]  # Smaller markers for panel
            
            # Add average line (T1-T5)
            avg_value = test_avg_by_group[metric][group_id]
            avg_line = ax.axhline(y=avg_value,
                                 color=colors[group_idx],
                                 linestyle=avg_styles[group_idx],
                                 alpha=alpha_avg,
                                 linewidth=linewidth_avg)
                                 
            # Add text annotation for average value at the right edge
            ax.annotate(f'{avg_value:.1f}',
                      xy=(1.01, avg_value),
                      xycoords=('axes fraction', 'data'),
                      fontsize=8,
                      color=colors[group_idx],
                      fontweight='bold',
                      va='center')
        
        # Add vertical grid lines for task positions
        for task, pos in positions.items():
            ax.axvline(x=pos, color='gray', linestyle='-', alpha=0.1)
        
        # Set x-axis ticks and labels
        ax.set_xticks(list(positions.values()))
        ax.set_xticklabels(list(positions.keys()))
        
        # Set y-axis range with some padding
        ax.set_ylim(0, 5.2)  # 5-point scale with a little padding
        
        # Only show horizontal grid lines
        ax.yaxis.grid(True, linestyle='-', alpha=0.2)
        ax.xaxis.grid(False)
        
        # Set labels and title with improved formatting
        ax.set_title(f'{metric} Scores', fontsize=12, fontweight='bold')
        ax.set_xlabel('Task ID', fontsize=10)
        ax.set_ylabel(f'{metric} Score', fontsize=10)
        
        # Only add legend to the first subplot
        if i == 0:
            ax.legend(fontsize=9, loc='upper right')
    
    plt.tight_layout()
    fig_path = os.path.join(figures_dir, 'test_tasks_metrics.png')
    plt.savefig(fig_path, bbox_inches='tight', dpi=300, facecolor='white')
    print(f"Saved test tasks visualization to {fig_path}")
    plt.close()
    
    # Create subplots for practice tasks
    fig2, axes2 = plt.subplots(1, 5, figsize=(25, 5))
    fig2.suptitle('Practice Tasks Metrics by Group', fontsize=16, fontweight='bold', y=1.05)
    
    # Plot practice metrics
    for i, metric in enumerate(practice_metrics):
        ax = axes2[i]
        
        # Task IDs - convert to strings before sorting to handle mixed types
        task_ids = sorted(practice_df['Task_ID'].unique(), key=str)
        # Create positions dictionary for consistent x-axis spacing
        positions = {task: i for i, task in enumerate(task_ids)}
        
        for idx, (group_idx, group_name) in enumerate(groups_order):
            group_id = idx + 1  # Convert to 1-based index for data filtering
            
            # Get the data for this group and metric
            group_data = practice_df[practice_df['Group_ID'] == group_id]
            means = group_data.groupby('Task_ID')[metric].mean()
            
            # Plot the actual data points with custom positions
            x_values = [positions[t] for t in means.index]
            line = ax.plot(x_values, means.values,
                          marker=markers[group_idx],
                          linewidth=2,
                          label=group_name,
                          color=colors[group_idx],
                          markersize=6)[0]  # Smaller markers for panel
            
            # Add average line for all practice tasks
            avg_value = practice_avg_by_group[metric][group_id]
            avg_line = ax.axhline(y=avg_value,
                                 color=colors[group_idx],
                                 linestyle=avg_styles[group_idx],
                                 alpha=alpha_avg,
                                 linewidth=linewidth_avg)
                                 
            # Add text annotation for average value at the right edge
            ax.annotate(f'{avg_value:.1f}',
                      xy=(1.01, avg_value),
                      xycoords=('axes fraction', 'data'),
                      fontsize=8,
                      color=colors[group_idx],
                      fontweight='bold',
                      va='center')
        
        # Add vertical grid lines for task positions
        for task, pos in positions.items():
            ax.axvline(x=pos, color='gray', linestyle='-', alpha=0.1)
        
        # Set x-axis ticks and labels
        ax.set_xticks(list(positions.values()))
        ax.set_xticklabels(list(positions.keys()))
        
        # Set y-axis range with some padding
        ax.set_ylim(0, 5.2)  # 5-point scale with a little padding
        
        # Only show horizontal grid lines
        ax.yaxis.grid(True, linestyle='-', alpha=0.2)
        ax.xaxis.grid(False)
        
        # Set labels and title with improved formatting
        ax.set_title(f'{metric} Scores', fontsize=12, fontweight='bold')
        ax.set_xlabel('Task ID', fontsize=10)
        ax.set_ylabel(f'{metric} Score', fontsize=10)
        
        # Only add legend to the first subplot
        if i == 0:
            ax.legend(fontsize=9, loc='upper right')
    
    plt.tight_layout()
    fig_path = os.path.join(figures_dir, 'practice_tasks_metrics.png')
    plt.savefig(fig_path, bbox_inches='tight', dpi=300, facecolor='white')
    print(f"Saved practice tasks visualization to {fig_path}")
    plt.close()
    
    print("\nAll visualizations have been saved to the directory:", figures_dir)

In [16]:
def run_analysis():
    """
    Main function to run the complete analysis workflow
    """
    print("Step 1: Calculating and saving metrics...")
    base_dir = calculate_metrics()
    
    print("\nStep 2: Creating visualizations...")
    visualize_metrics(base_dir)
    
    print("\nAnalysis complete.")

# Run the analysis
if __name__ == "__main__":
    run_analysis()

Step 1: Calculating and saving metrics...

Step 2: Creating visualizations...
Average Difficulty for Group 1 (T1-T5): 2.979
Average Difficulty for Group 2 (T1-T5): 3.156
Average Difficulty for Group 3 (T1-T5): 3.205
Average Difficulty for Group 4 (T1-T5): 2.408
Average Satisfaction for Group 1 (T1-T5): 3.447
Average Satisfaction for Group 2 (T1-T5): 2.822
Average Satisfaction for Group 3 (T1-T5): 3.14
Average Satisfaction for Group 4 (T1-T5): 3.755
Average Stress for Group 1 (T1-T5): 2.818
Average Stress for Group 2 (T1-T5): 2.837
Average Stress for Group 3 (T1-T5): 2.273
Average Stress for Group 4 (T1-T5): 1.959
Average Difficulty for Group 1 (All Practice): 2.771
Average Difficulty for Group 2 (All Practice): 3.354
Average Difficulty for Group 3 (All Practice): 3.304
Average Difficulty for Group 4 (All Practice): 3.02
Average Satisfaction for Group 1 (All Practice): 3.625
Average Satisfaction for Group 2 (All Practice): 3.327
Average Satisfaction for Group 3 (All Practice): 3.217
Ave