In [0]:
%pip install pyyaml>=6.0 -q

In [0]:
# Import libraries
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

import numpy as np
import pandas as pd

# Import yaml with error handling
try:
    import yaml
except ImportError:
    raise ImportError(
        "PyYAML is not installed. Please run the previous cell to install it, "
        "or run: %pip install pyyaml>=6.0"
    )

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import our modular functions
from eda.feature_engineering import (
    create_time_features,
    create_task_speed_features,
    create_respondent_behavioral_features,
    add_wonky_features,
    create_fraud_risk_score,
    create_wonky_risk_score
)
from eda.statistical_tests import (
    compare_groups_statistically,
    compare_groups_with_both_tests,
    analyze_thresholds,
    perform_chi_square_tests,
    compare_demographic_groups
)
from eda.visualizations import (
    create_histogram,
    create_box_plot,
    create_scatter_plot,
    create_bar_plot,
    create_temporal_breakdown_summary,
    create_chi_squared_bar_chart,
    create_dual_axis_statistical_chart,
    create_feature_breakdown_table,
    create_distribution_comparison
)

# Load configuration files
with open('../configs/feature_engineering.yaml', 'r') as f:
    feature_config = yaml.safe_load(f)

with open('../configs/statistical_tests.yaml', 'r') as f:
    stats_config = yaml.safe_load(f)

with open('../configs/data_paths.yaml', 'r') as f:
    paths_config = yaml.safe_load(f)

pd.set_option('display.max_columns', None)
print("✓ Imports and configs loaded successfully")


### File Definitions

- **user_info_df**: DataFrame of respondent x task level data for all users (not just wonky studies)
- **wonky_studies_df**: DataFrame of respondents involved in studies with unexpected outcomes (negative impacts when positive expected)

A study is "wonky" if the outcome is unexpected (e.g., advertisement showed negative impacts of media, which is counter-intuitive).


### Load

In [0]:
# Load data files
# Why: Loads processed data from previous data pull notebook
# Files are saved in misc folder to keep them out of git

notebook_path = os.getcwd() 
repo_root = os.path.abspath(os.path.join(notebook_path, ".."))
misc_dir = os.path.join(repo_root, "misc")
os.makedirs(misc_dir, exist_ok=True)

# Handle both commented and uncommented output_files in config
if 'output_files' in paths_config and paths_config['output_files']:
    # Use paths from config if available
    output_path = os.path.join(misc_dir,
                               os.path.basename(paths_config['output_files']['user_info_df']))
    wonky_path = os.path.join(misc_dir,
                              os.path.basename(paths_config['output_files']['wonky_user_counts']))
else:
    # Fallback to default filenames
    output_path = os.path.join(misc_dir, "user_info_df_pullcomplete.parquet")
    wonky_path = os.path.join(misc_dir, "wonky_user_counts_pullcomplete.parquet")

user_info_df = pd.read_parquet(output_path)
wonky_studies_df = pd.read_parquet(wonky_path)

print("Files loaded successfully:")
print(f"  - {output_path}")
print(f"  - {wonky_path}")
print(f"\nData shapes:")
print(f"  - user_info_df: {user_info_df.shape}")
print(f"  - wonky_studies_df: {wonky_studies_df.shape}")

### Feature Engineering

In [0]:
# Create time features using modular function
user_info_df = create_time_features(user_info_df, date_col="date_completed")

print(f"Night tasks: {user_info_df['is_night'].mean()*100:.1f}%")
print(f"Weekend tasks: {user_info_df['is_weekend'].mean()*100:.1f}%")

### Temporal Feature Analysis & Breakdowns

Analyzing temporal patterns to identify differences between wonky and non-wonky study tasks.


In [None]:
# Create temporal feature breakdown summary
# Why: Shows percentage breakdowns for temporal features across all tasks, wonky tasks, and non-wonky tasks
# This helps identify temporal patterns that distinguish wonky from non-wonky behavior

temporal_features = ['is_weekend', 'is_night', 'is_business_hour', 
                     'is_business_hour_weekday', 'is_business_hour_weekend']

# Ensure we have wonky_study_count column for grouping
if 'wonky_study_count' not in user_info_df.columns:
    # Merge wonky counts if needed
    if 'respondentPk' in user_info_df.columns and 'respondentPk' in wonky_studies_df.columns:
        wonky_mapping = wonky_studies_df.set_index('respondentPk')['wonky_task_instances'].to_dict()
        user_info_df['wonky_study_count'] = user_info_df['respondentPk'].map(wonky_mapping).fillna(0)

# Print formatted breakdown
print(create_temporal_breakdown_summary(
    user_info_df,
    temporal_features=temporal_features,
    group_col='wonky_study_count',
    group_threshold=0
))


#### Chi-Squared Tests for Temporal Features

Testing independence between temporal features and wonky study participation.
Chi-squared test determines if temporal patterns differ significantly between wonky and non-wonky groups.


In [None]:
# Perform chi-squared tests for temporal features
# Why: Tests whether temporal features are independent of wonky study participation
# Significant results indicate temporal patterns that distinguish wonky from non-wonky behavior

chi_square_results = perform_chi_square_tests(
    user_info_df,
    temporal_features=temporal_features,
    group_var='wonky_study_count',
    significance_level=0.01
)

print("Chi-Squared Test Results:")
print("=" * 80)
display(chi_square_results)

# Summary
if len(chi_square_results) > 0:
    significant_features = chi_square_results[chi_square_results['significant'] == True]
    print(f"\nSignificant temporal features (p < 0.01): {len(significant_features)}")
    if len(significant_features) > 0:
        print("Features:", ', '.join(significant_features.index.tolist()))


In [None]:
# Visualize chi-squared statistics
# Why: Bar chart makes it easy to see which temporal features show strongest associations with wonky studies
# Higher chi-squared values indicate stronger associations

if len(chi_square_results) > 0:
    fig_chi2 = create_chi_squared_bar_chart(
        chi_square_results,
        chi2_col='chi2',
        p_value_col='chi_p_value',
        significance_level=0.01,
        title="Chi-Squared Statistic by Temporal Feature"
    )
    fig_chi2.show()
else:
    print("No chi-squared test results available for visualization")


In [0]:
# Create task speed features using modular function
user_info_df = create_task_speed_features(
    user_info_df,
    task_time_col="task_time_taken_s",
    suspicious_threshold=feature_config['time_thresholds']['suspicious_fast_seconds'],
    very_fast_threshold=feature_config['time_thresholds']['very_fast_seconds'],
    very_slow_threshold=feature_config['time_thresholds']['very_slow_minutes'] * 60
)

print(f"Suspiciously fast (<{feature_config['time_thresholds']['suspicious_fast_seconds']}s): {user_info_df['is_suspiciously_fast'].sum():,} ({user_info_df['is_suspiciously_fast'].mean()*100:.2f}%)")


In [0]:
# Create respondent-level behavioral features
respondent_features = create_respondent_behavioral_features(
    user_info_df,
    respondent_id_col="respondentPk",
    date_col="date_completed",
    config={
        'high_volume_percentile': feature_config['volume_thresholds']['high_volume_percentile'],
        'extreme_volume_percentile': feature_config['volume_thresholds']['extreme_volume_percentile'],
        'velocity_bins': feature_config['velocity_bins'],
        'velocity_labels': feature_config['velocity_labels']
    }
)

print(f"Aggregated to {respondent_features.shape[0]:,} respondents")
print(f"Avg tasks per respondent: {respondent_features['total_tasks'].mean():.2f}")
print(f"Avg suspicious fast rate: {respondent_features['suspicious_fast_rate'].mean()*100:.2f}%")

In [0]:
# Add wonky features using modular function
respondent_features = add_wonky_features(
    respondent_features,
    wonky_studies_df,
    respondent_id_col="respondentPk"
)

print(f"Wonky features added")
print(f"Respondents with wonky tasks: {(respondent_features['wonky_task_ratio'] > 0).sum():,}")
print(f"High wonky concentration (>50%): {respondent_features['is_high_wonky'].sum():,}")


### Statistical Test Results Visualization

Visualizing statistical test results to identify features with strongest discrimination between wonky and non-wonky groups.


In [None]:
# Create dual-axis chart showing count differences and Welch's t-statistic
# Why: Combines effect size (count differences) with statistical significance (t-statistic)
# Helps identify features that are both statistically significant and practically meaningful

# Prepare data for dual-axis chart
if len(statistical_results) > 0 and 'welch_statistic' in statistical_results.columns:
    # Calculate normalized count differences
    testdf_2 = statistical_results.copy()
    testdf_2['count_difference'] = testdf_2['wonky_mean'] - testdf_2['non_wonky_mean']
    testdf_2['count_difference_nrm'] = testdf_2['count_difference'] / (testdf_2['non_wonky_mean'].replace(0, np.nan))
    
    # Set index to metric name for plotting
    testdf_2 = testdf_2.set_index('metric')
    
    # Create dual-axis chart
    fig_dual = create_dual_axis_statistical_chart(
        testdf_2,
        count_diff_col='count_difference_nrm',
        t_stat_col='welch_statistic',
        title="Ave Wonky Count Difference & Welch's t-statistic by Feature"
    )
    fig_dual.show()
else:
    print("Statistical results not available for dual-axis chart")


### Distribution Comparisons

Comparing distributions of key features between wonky and non-wonky groups to visualize differences.


In [None]:
# Distribution comparisons for key features
# Why: Visual comparison helps understand the shape and spread of differences between groups
# Histograms show full distributions, box plots show quartiles and outliers

key_features_for_dist = ['total_tasks', 'suspicious_fast_rate', 'days_active', 
                         'avg_task_time', 'wonky_task_ratio']

for feature in key_features_for_dist:
    if feature in respondent_features.columns:
        # Create histogram comparison
        fig_dist = create_distribution_comparison(
            respondent_features,
            feature=feature,
            group_col='has_wonky_tasks',
            group1_value=1,
            group2_value=0,
            plot_type='histogram',
            group1_name='Wonky Users',
            group2_name='Non-Wonky Users',
            title=f'{feature.replace("_", " ").title()} Distribution: Wonky vs Non-Wonky Users'
        )
        fig_dist.show()
        
        # Create box plot comparison
        fig_box = create_distribution_comparison(
            respondent_features,
            feature=feature,
            group_col='has_wonky_tasks',
            group1_value=1,
            group2_value=0,
            plot_type='box',
            group1_name='Wonky Users',
            group2_name='Non-Wonky Users',
            title=f'{feature.replace("_", " ").title()} Distribution: Wonky vs Non-Wonky Users'
        )
        fig_box.show()


In [None]:
# Feature summary tables
# Why: Provides detailed statistics (mean, median, std, count) for wonky vs non-wonky groups
# Helps quantify differences identified in visualizations

feature_breakdown = create_feature_breakdown_table(
    respondent_features,
    feature_col='has_wonky_tasks',
    group_col='has_wonky_tasks',
    group1_value=1,
    group2_value=0,
    metrics=key_features_for_dist
)

print("Feature Breakdown: Wonky vs Non-Wonky Users")
print("=" * 100)
display(feature_breakdown)


### Demographic Group Comparisons

Comparing wonky task rates across demographic groups (platforms, hardware versions, locales) using statistical tests.
This helps identify if certain demographics are more associated with wonky study participation.


In [None]:
# Compare wonky task rates across demographic groups
# Why: Identifies if certain platforms, hardware versions, or locales show different wonky task patterns
# Uses Mann-Whitney U and Welch's t-test to compare each pair of demographic groups

demographic_cols = ['platform_name', 'hardware_version', 'survey_locale']

demographic_results = {}

for demo_col in demographic_cols:
    if demo_col in respondent_features.columns:
        print(f"\n{'=' * 80}")
        print(f"Demographic Comparison: {demo_col}")
        print('=' * 80)
        
        # Perform comparisons
        demo_comparisons = compare_demographic_groups(
            respondent_features,
            demographic_col=demo_col,
            target_col='wonky_task_ratio',
            min_group_size=10,
            significance_level=0.05
        )
        
        if len(demo_comparisons) > 0:
            demographic_results[demo_col] = demo_comparisons
            
            # Show summary statistics by group
            group_summary = respondent_features.groupby(demo_col).agg({
                'wonky_task_ratio': ['mean', 'median', 'count'],
                'has_wonky_tasks': 'sum'
            }).round(4)
            group_summary.columns = ['mean_wonky_ratio', 'median_wonky_ratio', 'total_count', 'wonky_count']
            group_summary['wonky_rate'] = (group_summary['wonky_count'] / group_summary['total_count'] * 100).round(2)
            group_summary = group_summary.sort_values('mean_wonky_ratio', ascending=False)
            
            print(f"\nSummary by {demo_col}:")
            display(group_summary)
            
            # Show significant comparisons
            significant_comps = demo_comparisons[demo_comparisons['mw_significant'] == True]
            if len(significant_comps) > 0:
                print(f"\nSignificant differences (p < 0.05): {len(significant_comps)}")
                display(significant_comps[['group1', 'group2', 'mean_difference', 'mw_p_value', 'welch_p_value', 'tests_agree']])
            else:
                print("\nNo significant differences found between groups")
        else:
            print(f"Insufficient data for {demo_col} comparisons")


In [None]:
# Visualize demographic comparisons
# Why: Bar charts make it easy to see which demographic groups have higher wonky task rates
# Helps identify patterns that may inform modeling or business decisions

for demo_col in demographic_cols:
    if demo_col in respondent_features.columns and demo_col in demographic_results:
        # Create summary by group
        demo_summary = respondent_features.groupby(demo_col).agg({
            'wonky_task_ratio': 'mean',
            'has_wonky_tasks': ['sum', 'count']
        }).reset_index()
        demo_summary.columns = [demo_col, 'mean_wonky_ratio', 'wonky_count', 'total_count']
        demo_summary['wonky_rate'] = (demo_summary['wonky_count'] / demo_summary['total_count'] * 100)
        demo_summary = demo_summary.sort_values('mean_wonky_ratio', ascending=False)
        
        # Bar chart of wonky rates by demographic group
        fig_demo = create_bar_plot(
            demo_summary,
            x=demo_col,
            y='wonky_rate',
            title=f'Wonky Task Rate by {demo_col.replace("_", " ").title()}',
            labels={demo_col: demo_col.replace("_", " ").title(), 'wonky_rate': 'Wonky Task Rate (%)'},
            color='wonky_rate',
            color_continuous_scale='Reds',
            text='wonky_rate',
            texttemplate='%{text:.1f}%',
            textposition='outside',
            tickangle=45
        )
        fig_demo.show()
        
        # Box plot comparing wonky task ratios across groups
        fig_demo_box = create_box_plot(
            respondent_features,
            x=demo_col,
            y='wonky_task_ratio',
            title=f'Wonky Task Ratio Distribution by {demo_col.replace("_", " ").title()}',
            labels={demo_col: demo_col.replace("_", " ").title(), 'wonky_task_ratio': 'Wonky Task Ratio'},
            tickangle=45
        )
        fig_demo_box.show()


### Feature Selection Guidance

Based on EDA results, identify which features show strongest discrimination and should be prioritized for modeling.


In [None]:
# Feature importance summary
# Why: Ranks features by statistical significance, effect size, and test agreement
# Helps prioritize which features to include in models

if len(statistical_results) > 0:
    # Create feature ranking
    feature_ranking = statistical_results.copy()
    
    # Calculate composite score (combining significance and effect size)
    if 'welch_statistic' in feature_ranking.columns:
        # Use absolute t-statistic as effect size indicator
        feature_ranking['effect_size'] = feature_ranking['welch_statistic'].abs()
    else:
        feature_ranking['effect_size'] = feature_ranking['mean_difference'].abs()
    
    # Create ranking score (lower p-value + higher effect size = better)
    feature_ranking['ranking_score'] = (
        (1 - feature_ranking['mw_p_value'].clip(0, 1)) * 0.5 +  # Significance component
        (feature_ranking['effect_size'] / feature_ranking['effect_size'].max()) * 0.5  # Effect size component
    )
    
    # Add test agreement indicator
    if 'tests_agree' in feature_ranking.columns:
        feature_ranking['both_tests_agree'] = feature_ranking['tests_agree']
    else:
        feature_ranking['both_tests_agree'] = True
    
    # Sort by ranking score
    feature_ranking = feature_ranking.sort_values('ranking_score', ascending=False)
    
    # Select key columns for display
    display_cols = ['metric', 'mean_difference', 'mw_p_value', 'welch_p_value']
    if 'welch_statistic' in feature_ranking.columns:
        display_cols.append('welch_statistic')
    if 'both_tests_agree' in feature_ranking.columns:
        display_cols.append('both_tests_agree')
    display_cols.append('ranking_score')
    
    available_cols = [col for col in display_cols if col in feature_ranking.columns]
    
    print("Feature Ranking for Modeling (Top Features)")
    print("=" * 100)
    print("Ranked by: Statistical significance + Effect size")
    print("Higher ranking_score = better feature for modeling")
    print()
    display(feature_ranking[available_cols].head(20))
    
    # Identify top features
    top_features = feature_ranking.head(10)['metric'].tolist()
    print(f"\nTop 10 Features Recommended for Modeling:")
    for i, feat in enumerate(top_features, 1):
        print(f"  {i}. {feat}")
else:
    print("Statistical results not available for feature ranking")


#### Modeling Recommendations

Based on EDA findings, recommendations for feature selection and modeling approach.


In [None]:
# Generate modeling recommendations
# Why: Synthesizes EDA findings into actionable recommendations for model development

recommendations = []

# Check statistical test results
if len(statistical_results) > 0:
    significant_features = statistical_results[
        (statistical_results['mw_significant'] == True) & 
        (statistical_results.get('welch_significant', pd.Series([True] * len(statistical_results))) == True)
    ]
    
    if len(significant_features) > 0:
        recommendations.append(f"✓ {len(significant_features)} features show significant differences (both tests agree)")
        recommendations.append(f"  → Prioritize these features: {', '.join(significant_features['metric'].head(5).tolist())}")
    
    # Check effect sizes
    if 'mean_difference' in statistical_results.columns:
        large_effect = statistical_results[statistical_results['mean_difference'].abs() > statistical_results['mean_difference'].abs().quantile(0.75)]
        if len(large_effect) > 0:
            recommendations.append(f"✓ {len(large_effect)} features show large effect sizes")
            recommendations.append(f"  → These features have practical significance: {', '.join(large_effect['metric'].head(3).tolist())}")

# Check temporal features
if len(chi_square_results) > 0:
    sig_temporal = chi_square_results[chi_square_results['significant'] == True]
    if len(sig_temporal) > 0:
        recommendations.append(f"✓ {len(sig_temporal)} temporal features are significantly associated with wonky studies")
        recommendations.append(f"  → Include temporal features: {', '.join(sig_temporal.index.tolist())}")

# Check demographic differences
if demographic_results:
    total_sig_demo = sum(len(df[df['mw_significant'] == True]) for df in demographic_results.values())
    if total_sig_demo > 0:
        recommendations.append(f"✓ Found {total_sig_demo} significant demographic group differences")
        recommendations.append(f"  → Consider including demographic features as interaction terms")

print("=" * 80)
print("MODELING RECOMMENDATIONS")
print("=" * 80)
print()
for rec in recommendations:
    print(rec)
print()
print("=" * 80)
print("NEXT STEPS:")
print("=" * 80)
print("1. Use top-ranked features from feature ranking for initial model")
print("2. Include temporal features that show significant associations")
print("3. Consider demographic interactions if significant differences found")
print("4. Validate feature importance using model-based methods (e.g., Random Forest feature importance)")
print("5. Monitor model performance on features identified as significant in EDA")


In [0]:
# Create fraud risk score using modular function
respondent_features = create_fraud_risk_score(
    respondent_features,
    config={
        'fraud_score_weights': feature_config['fraud_score_weights'],
        'fraud_score_thresholds': feature_config['fraud_score_thresholds'],
        'fraud_score_bins': feature_config['fraud_score_bins'],
        'fraud_score_labels': feature_config['fraud_score_labels'],
        'suspected_fraud_threshold': feature_config['suspected_fraud_threshold']
    }
)

# Create wonky risk score
respondent_features = create_wonky_risk_score(
    respondent_features,
    config={
        'wonky_score_weights': feature_config['wonky_score_weights'],
        'wonky_score_thresholds': feature_config['wonky_score_thresholds'],
        'wonky_score_bins': feature_config['wonky_score_bins'],
        'wonky_score_labels': feature_config['wonky_score_labels']
    }
)

print("Fraud risk distribution:")
print(respondent_features['fraud_risk_tier'].value_counts().sort_index())
print(f"Suspected fraud rate: {respondent_features['suspected_fraud'].mean()*100:.2f}%")


### Hypothesis Testing

Running Mann-Whitney U test and Welch's t-test for validation.

Check if Mann-Whitney U and Welch's t-test agree on significance.
When both tests agree, we have higher confidence in the results.
When they disagree, it signals need for further investigation.

Compare wonky vs non-wonky users using statistical tests.
- Mann-Whitney U test (non-parametric, primary test)
- Welch's t-test (parametric validator/sense check)
Uses `compare_groups_with_both_tests()` to run both tests simultaneously.

In [0]:
# Create binary flag for wonky users
respondent_features['has_wonky_tasks'] = (respondent_features['wonky_task_ratio'] > 0).astype(int)

print(f"Wonky users: {respondent_features['has_wonky_tasks'].sum():,} ({respondent_features['has_wonky_tasks'].mean()*100:.2f}%)")
print(f"Non-wonky users: {(respondent_features['has_wonky_tasks'] == 0).sum():,} ({(respondent_features['has_wonky_tasks'] == 0).mean()*100:.2f}%)")

In [0]:
# Perform statistical tests using modular function
statistical_results = compare_groups_with_both_tests(
    respondent_features,
    group_col=stats_config['group_comparison']['group_col'],
    metrics=stats_config['test_metrics'],
    group1_value=stats_config['group_comparison']['group1_value'],
    group2_value=stats_config['group_comparison']['group2_value'],
    significance_level=stats_config['significance_level']
)

print("Statistical Test Results (Mann-Whitney U + Welch's t-test):")
print("=" * 100)
display(statistical_results)


In [0]:
# Analyze test agreement
if 'tests_agree' in statistical_results.columns:
    agreement_stats = statistical_results['tests_agree'].value_counts()
    print("Test Agreement Analysis:")
    print(f"Metrics where tests agree: {agreement_stats.get(True, 0)}")
    print(f"Metrics where tests disagree: {agreement_stats.get(False, 0)}")
    
    # Show metrics where tests disagree
    disagree = statistical_results[statistical_results['tests_agree'] == False]
    if len(disagree) > 0:
        print("\nMetrics where tests disagree (need investigation):")
        display(disagree[['metric', 'mw_p_value', 'welch_p_value', 'mw_significant', 'welch_significant']])


In [0]:
# Analyze thresholds for key features
print("Threshold Analysis: Task Volume")
print("=" * 80)
volume_thresholds = analyze_thresholds(
    respondent_features,
    feature='total_tasks',
    bins=stats_config['threshold_analysis']['default_bins'],
    target_col='has_wonky_tasks'
)
display(volume_thresholds)

print("\nThreshold Analysis: Suspicious Fast Rate")
print("=" * 80)
speed_thresholds = analyze_thresholds(
    respondent_features,
    feature='suspicious_fast_rate',
    bins=stats_config['threshold_analysis']['default_bins'],
    target_col='has_wonky_tasks'
)
display(speed_thresholds)

In [0]:
# Example visualizations using modular functions

# Histogram of fraud risk score
fig_fraud_risk = create_histogram(
    respondent_features,
    x='fraud_risk_score',
    color='fraud_risk_tier',
    nbins=11,
    title='Fraud Risk Score Distribution',
    labels={'fraud_risk_score': 'Fraud Risk Score (0-10)'}
)
fig_fraud_risk.show()

# Box plot comparing wonky vs non-wonky users
fig_speed = create_box_plot(
    respondent_features,
    x='has_wonky_tasks',
    y='avg_task_time',
    color='has_wonky_tasks',
    title='Average Task Time: Wonky vs Non-Wonky Users',
    labels={'has_wonky_tasks': 'Has Wonky Tasks (1=Yes, 0=No)', 'avg_task_time': 'Average Task Time (seconds)'},
    category_orders={'has_wonky_tasks': [0, 1]},
    boxmean='sd'
)
fig_speed.show()

## Summary

**Key Findings:**
- Statistical tests (Mann-Whitney U + Welch's t-test) identify significant differences between wonky and non-wonky users
- Temporal features show strong associations with wonky study participation (chi-squared tests)
- Feature engineering creates actionable behavioral indicators (task speed, volume, velocity)
- Demographic groups (platforms, hardware, locales) show varying wonky task rates
- Both statistical tests provide validation - agreement increases confidence in findings
- Visualizations highlight features with strongest discrimination power

**Feature Selection Insights:**
- Top-ranked features combine statistical significance with practical effect sizes
- Temporal patterns (business hours, night tasks) are strong discriminators
- Behavioral features (task volume, speed) show consistent differences
- Demographic features may be useful as interaction terms

**Next Steps:**
- Use top-ranked features from feature ranking for model training
- Include temporal features that show significant chi-squared associations
- Consider demographic interactions based on group comparison results
- Apply fraud risk thresholds to filter suspicious users
- Build models using features identified as significant in EDA
- Validate feature importance using model-based methods (Random Forest feature importance)
