# Confirmatory analysis walkthrough

This notebook demonstrates how to move from exploratory calculations to a confirmatory analysis workflow using the new reusable modules.

In [None]:
from pathlib import Path

import pandas as pd

from analysis_pipeline.data_loading import build_metrics_table
from analysis_pipeline.hierarchy import build_hierarchical_summary
from analysis_pipeline.statistics import run_confirmatory_tests, summarize_pairwise_results
from analysis_pipeline.clustering import compute_cluster_profiles

## Load metrics and labels

Metrics are aggregated per participant and image, then normalised by the full participant pool (49 viewers).

In [None]:
FIXATIONS_DIR = Path('fixations')
LABELS_CSV = Path('labels_per_id.csv')

metrics_table = build_metrics_table(FIXATIONS_DIR, LABELS_CSV)
metrics_table.head()

## Hierarchical summaries

Start from all images, then inspect single labels and label combinations.

In [None]:
summary = build_hierarchical_summary(metrics_table)
baseline = summary['baseline'].summary
single_labels = summary['single_labels'].summary
label_combinations = summary['label_combinations'].summary

baseline

In [None]:
single_labels

In [None]:
label_combinations.head()

## Confirmatory statistics

Run ANOVA/Kruskal-Wallis tests and inspect post-hoc comparisons.

In [None]:
tests = run_confirmatory_tests(metrics_table, group_column='label_combo')
tests['fixation_count']['anova']

In [None]:
pairwise = summarize_pairwise_results(tests['fixation_count']['pairwise_parametric'])
pairwise.sort_values('pvalue_adj').head()

## Clustering

Use PCA + k-means to see whether label combinations form metric-based clusters.

In [None]:
cluster_output = compute_cluster_profiles(metrics_table, n_components=2, n_clusters=3)
cluster_output.pca_components.head()