In [None]:
import re
from pathlib import Path

import numpy as np
import pandas as pd
from nilearn import plotting

from dd_kable_analysis.config_loader import load_config

# Basic QA for analysis completion of the traditional model analysis

## Load Configuration and Paths for QA Output
Import required libraries, load configuration, and define output/log directories for QA checks.

### Note: After running this post-analysis, the log files will be deleted.

In [None]:
cfg = load_config()
output_root = Path(cfg.output_root) / 'traditional_model' / 'first_level'
logs_dir = Path(
    '/oak/stanford/groups/russpold/users/buckholtz/DD_Kable/scripts/dd-kable-analysis/logs'
)

print(output_root)
print(logs_dir)

# Note: log files will be deleted after they are checked

In [None]:
sub_id = 'dmp0011'
run = '1'

output_dir = Path(
    f'/oak/stanford/groups/russpold/users/buckholtz/DD_Kable/derivatives/analysis_output/traditional_model/first_level/sub-{sub_id}/contrast_estimates'
)

# Define consistent z-slice coordinates
z_coords = np.linspace(-40, 70, 8)

# Check ll - ss contrast
ll_minus_ss = (
    output_dir
    / f'sub-{sub_id}_ses-scan1_task-itc_run-{run}_contrast-ll_minus_ss_output-effectsize.nii.gz'
)
plotting.plot_stat_map(
    ll_minus_ss,
    display_mode='z',
    cut_coords=z_coords,
    title='Larger Later - Smaller Sooner',
)

In [None]:
# --- Batch-run verification (run after jobs finish) ---
# (logs may have been deleted)
good_subs = pd.read_csv(cfg.subject_lists / 'initial_qa_pass_subjects_runs.csv')

output_root = Path(cfg.output_root) / 'traditional_model' / 'first_level'
logs_dir = Path(
    '/oak/stanford/groups/russpold/users/buckholtz/DD_Kable/scripts/dd-kable-analysis/logs',
)

success_pattern = re.compile(
    r'Analysis complete for sub-(?P<sub_id>\w+), run (?P<run>\d+)'
    r'|SUCCESS: Analysis completed for sub-(?P<sub_id2>\w+), run (?P<run2>\d+)'
)

success_pairs = set()
for log_file in sorted(logs_dir.glob('traditional_model_*.out')):
    try:
        text = log_file.read_text()
    except Exception:
        continue
    for match in success_pattern.finditer(text):
        sub_id = match.group('sub_id') or match.group('sub_id2')
        run = match.group('run') or match.group('run2')
        success_pairs.add((sub_id, int(run)))


def count_maps(sub_id: str, run: int):
    out_dir = output_root / f'sub-{sub_id}' / 'contrast_estimates'
    if not out_dir.exists():
        return 0, out_dir
    files = list(
        out_dir.glob(
            f'sub-{sub_id}_ses-*_task-*_run-{run}_contrast-*_output-effectsize.nii.gz',
        )
    )
    return len(files), out_dir


rows = []
for _, row in good_subs.iterrows():
    sub_id = row['sub_id']
    run = int(row['run'])
    n_maps, out_dir = count_maps(sub_id, run)
    rows.append(
        {
            'sub_id': sub_id,
            'run': run,
            'output_dir': str(out_dir),
            'n_maps': n_maps,
            'has_maps': n_maps > 0,
            'log_success': (sub_id, run) in success_pairs,
        }
    )

summary = pd.DataFrame(rows)
summary['status'] = summary.apply(
    lambda r: 'ok'
    if r.has_maps and r.log_success
    else 'missing'
    if not r.has_maps
    else 'no_log_success',
    axis=1,
)

print(summary['status'].value_counts())

failed = summary[summary['status'] != 'ok']
failed.head()

err_logs = [p for p in logs_dir.glob('traditional_model_*.err') if p.stat().st_size > 0]
print(f'Non-empty error logs: {len(err_logs)}')
err_logs[:5]

In [None]:
# --- Batch-run verification (run after jobs finish) ---
# (logs may have been deleted)
good_subs = pd.read_csv(cfg.subject_lists / 'initial_qa_pass_subjects_runs.csv')

output_root = Path(cfg.output_root) / 'traditional_model' / 'first_level'
logs_dir = Path(
    '/oak/stanford/groups/russpold/users/buckholtz/DD_Kable/scripts/dd-kable-analysis/logs',
)

success_pattern = re.compile(
    r'Analysis complete for sub-(?P<sub_id>\w+), run (?P<run>\d+)'
    r'|SUCCESS: Analysis completed for sub-(?P<sub_id2>\w+), run (?P<run2>\d+)'
)

success_pairs = set()
for log_file in sorted(logs_dir.glob('traditional_model_*.out')):
    try:
        text = log_file.read_text()
    except Exception:
        continue
    for match in success_pattern.finditer(text):
        sub_id = match.group('sub_id') or match.group('sub_id2')
        run = match.group('run') or match.group('run2')
        success_pairs.add((sub_id, int(run)))


def count_maps(sub_id: str, run: int):
    out_dir = output_root / f'sub-{sub_id}' / 'contrast_estimates'
    if not out_dir.exists():
        return 0, out_dir
    files = list(
        out_dir.glob(
            f'sub-{sub_id}_ses-*_task-*_run-{run}_contrast-*_output-effectsize.nii.gz',
        )
    )
    return len(files), out_dir


rows = []
for _, row in good_subs.iterrows():
    sub_id = row['sub_id']
    run = int(row['run'])
    n_maps, out_dir = count_maps(sub_id, run)
    rows.append(
        {
            'sub_id': sub_id,
            'run': run,
            'output_dir': str(out_dir),
            'n_maps': n_maps,
            'has_maps': n_maps > 0,
            'log_success': (sub_id, run) in success_pairs,
        }
    )

summary = pd.DataFrame(rows)
summary['status'] = summary.apply(
    lambda r: 'ok'
    if r.has_maps and r.log_success
    else 'missing'
    if not r.has_maps
    else 'no_log_success',
    axis=1,
)

print(summary['status'].value_counts())

failed = summary[summary['status'] != 'ok']
failed.head()

err_logs = [p for p in logs_dir.glob('traditional_model_*.err') if p.stat().st_size > 0]
print(f'Non-empty error logs: {len(err_logs)}')
err_logs[:5]