# JND Frequency Analysis: Age Effect

This notebook analyzes the effect of Age Group (Old vs. Young) on JND frequency under four different conditions:
1. Frequency 500Hz, ISI 1000ms
2. Frequency 500Hz, ISI 100ms
3. Frequency 3000Hz, ISI 1000ms
4. Frequency 3000Hz, ISI 100ms

## Methodology
1. **Data Loading**: Load the summary data processed from individual CSV files.
2. **Preprocessing**: Filter outliers using the IQR (Interquartile Range) method.
3. **Statistical Analysis**:
    - Check for Normality (Shapiro-Wilk test).
    - If normal: Independent T-test.
    - If not normal: Mann-Whitney U test.
4. **Visualization**: Boxplots with stripplots to show individual data points.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set visual style
sns.set_style("whitegrid")
# Support Chinese characters if needed, fallback to Arial for generic compatibility
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial'] 
plt.rcParams['axes.unicode_minus'] = False

In [None]:
# Load Data
file_path = r'c:\mpc_new_data\jnd_summary.csv'
df = pd.read_csv(file_path)

print("Data Head:")
display(df.head())
print("\nData Info:")
print(df.info())
print("\nGroup Counts:")
print(df['Group'].value_counts())

In [None]:
def remove_outliers_iqr(df_in, column, group_col):
    """
    Removes outliers from a dataframe based on IQR, calculated separately for each group.
    """
    df_clean = df_in.copy()
    indices_to_drop = []
    
    unique_groups = df_in[group_col].dropna().unique()
    
    for group in unique_groups:
        # Get data for this group
        group_mask = df_in[group_col] == group
        group_data = df_in.loc[group_mask, column]
        
        if len(group_data) < 4:
            continue # Skip outlier detection for very small samples
            
        Q1 = group_data.quantile(0.25)
        Q3 = group_data.quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        outliers = group_data[(group_data < lower_bound) | (group_data > upper_bound)]
        current_indices = outliers.index.tolist()
        indices_to_drop.extend(current_indices)
        
        if len(outliers) > 0:
            print(f"[{group}] Column '{column}': Dropped {len(outliers)} outliers (Indices: {current_indices}, Values: {outliers.values})")
            
    return df_clean.drop(indices_to_drop)

def analyze_and_plot(df_input, value_col, group_col='Group'):
    print(f"\n{'='*20}\nAnalyzing: {value_col}\n{'='*20}")
    
    # 1. Remove Outliers
    df_clean = remove_outliers_iqr(df_input, value_col, group_col)
    
    # 2. Get groups
    # Ensure consistent order: Old, Young or similar
    group_labels = sorted(df_clean[group_col].unique())
    groups = [df_clean[df_clean[group_col] == g][value_col] for g in group_labels]
    
    # 3. Normality Test (Shapiro-Wilk)
    print("\nNormality Test (Shapiro-Wilk):")
    is_normal = True
    for i, g_data in enumerate(groups):
        if len(g_data) < 3:
            print(f"  {group_labels[i]}: Sample size too small for Shapiro ({len(g_data)})")
            is_normal = False # Fallback to non-parametric if small
        else:
            stat, p = stats.shapiro(g_data)
            print(f"  {group_labels[i]}: p={p:.4f} ({'Normal' if p > 0.05 else 'Not Normal'})")
            if p < 0.05:
                is_normal = False
            
    # 4. Statistical Test
    print("\nStatistical Test:")
    try:
        if is_normal:
            stat, p = stats.ttest_ind(*groups)
            test_name = "Independent T-test"
        else:
            stat, p = stats.mannwhitneyu(*groups)
            test_name = "Mann-Whitney U Test"
            
        print(f"  {test_name}: statistic={stat:.4f}, p-value={p:.4f}")
        
        significance = ""
        if p < 0.001:
            significance = "***"
        elif p < 0.01:
            significance = "**"
        elif p < 0.05:
            significance = "*"
        else:
            significance = "ns"
            
        print(f"  Result: {significance} (p < 0.05 is significant)")
        
    except Exception as e:
        print(f"  Error performing statistical test: {e}")
        p = 1.0 # Set dummy p for plot
        test_name = "Error"
        significance = "Error"

    # 5. Plotting
    try:
        plt.figure(figsize=(8, 6))
        
        # Boxplot
        ax = sns.boxplot(x=group_col, y=value_col, data=df_clean, palette="Set2", showfliers=False, order=group_labels)
        # Strip plot for individual points
        sns.stripplot(x=group_col, y=value_col, data=df_clean, color='black', alpha=0.5, jitter=True, order=group_labels)
        
        plt.title(f'{value_col} by {group_col}\n({test_name}, p={p:.4f})')
        plt.ylabel('JND (Hz)')
        plt.grid(True, axis='y', alpha=0.3)
        plt.show()
    except Exception as e:
        print(f"Error plotting: {e}")

In [None]:
# Run analysis for all 4 conditions
conditions = ['JND_500_1000', 'JND_500_100', 'JND_3000_1000', 'JND_3000_100']

for cond in conditions:
    analyze_and_plot(df, cond)