In [None]:
"""
Post-processing analysis script for cortical layer intensity data.
This code processes and visualizes results from quality_control_native_surface_slurm.py

#use conda env: niwrap3912
"""
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore, gaussian_kde

# Configuration
base_path = "/Users/dennis.jungchildmind.org/Desktop/exvivo"
LAYER_TYPE = 'inf'
RESOLUTION = '120um'  # 120um or 240um
data_base_path = f"/Users/dennis.jungchildmind.org/Desktop/exvivo_postslurm/output_{RESOLUTION}_max_960um_dist_method0"
bigbrain_base_path = '/Users/dennis.jungchildmind.org/Desktop/BigBrain/PlosBiology2020gii'

# Analysis parameters
data_type = "diff"
do_zscore = True

# Initialize data containers
all_data = {
    'lh': {'intensity_data': [], 'subject_names': []},
    'rh': {'intensity_data': [], 'subject_names': []}
}

def process_intensity_data(data_array, data_type, do_zscore):
    """Process intensity data with optional differencing and z-scoring."""
    if data_type == 'diff':
        data_array = np.diff(data_array, axis=0)
    if do_zscore:
        data_array = zscore(data_array, axis=0)
    return data_array

# Process ex-vivo subjects
for subjects in os.listdir(base_path):
    if subjects:
        for hemispheres in ['lh', 'rh']:
            intensity_file_path = os.path.join(
                data_base_path, subjects, 
                f"{hemispheres}/{LAYER_TYPE}_{RESOLUTION}_method0_manual_raw_intensity.npz"
            )

            if os.path.exists(intensity_file_path):
                data = np.load(intensity_file_path, allow_pickle=True)
                dist_array = data['dist_array']
                
                # Process data
                tmp_dat = process_intensity_data(data['all_values'], data_type, do_zscore)
                
                # Store data by hemisphere
                clean_subject_name = subjects.replace('_new_confidence', '')
                print(f"Loaded ex-vivo subject: {clean_subject_name}, hemisphere: {hemispheres}")
                
                all_data[hemispheres]['intensity_data'].append(tmp_dat)
                all_data[hemispheres]['subject_names'].append(clean_subject_name)

# Load BigBrain data
for hemi in ['lh', 'rh']:
    bb_file_path = os.path.join(bigbrain_base_path, f'bigbrain_{hemi}_layer3_{RESOLUTION}_method0_manual_raw_intensity.npz')
    
    if os.path.exists(bb_file_path):
        bb_data = np.load(bb_file_path)['all_values']
        bb_data = process_intensity_data(bb_data, data_type, do_zscore)
        
        all_data[hemi]['intensity_data'].append(bb_data)
        all_data[hemi]['subject_names'].append('bigbrain')
        print(f"Loaded BigBrain data for hemisphere: {hemi}")

# Find zero index for distance array
zero_indices = np.where(dist_array == 0)[0]
if len(zero_indices) > 0:
    zero_index = zero_indices[0]
else:
    zero_index = np.argmin(np.abs(dist_array))
    print(f"No exact zero found, using closest value at index {zero_index} with value {dist_array[zero_index]}")

# Create convenient access variables
lh_intensity_data_uncut = all_data['lh']['intensity_data']
rh_intensity_data_uncut = all_data['rh']['intensity_data']
lh_subject_names = all_data['lh']['subject_names']
rh_subject_names = all_data['rh']['subject_names']

# Print summary
print(f"\nData loading summary:")
print(f"Left hemisphere: {len(lh_subject_names)} subjects - {lh_subject_names}")
print(f"Right hemisphere: {len(rh_subject_names)} subjects - {rh_subject_names}")

# Calculate rolling average of consecutive pairs for distance array
dist_array_avg = np.array([(dist_array[i] + dist_array[i + 1]) / 2 for i in range(len(dist_array) - 1)])
print(f"Distance array average: {dist_array_avg}")

# Plot depth profiles for both hemispheres
plt.figure(figsize=(8, 4))
xlim_max = 1

for subplot_idx, (hemi, subject_names, subject_data) in enumerate([
    ('lh', lh_subject_names, lh_intensity_data_uncut),
    ('rh', rh_subject_names, rh_intensity_data_uncut)
], 1):
    
    plt.subplot(1, 2, subplot_idx)

    # Plot each subject
    grand_average_intensity = []
    for i, tmp in enumerate(subject_data):
        mean_intensity = np.mean(tmp, axis=1)
        sem_intensity = np.std(tmp, axis=1) / np.sqrt(tmp.shape[1] - 1)
        
        plt.errorbar(mean_intensity, dist_array_avg, xerr=sem_intensity, 
                    fmt='-o', linewidth=0.75, markersize=3, alpha=0.5, 
                    label=subject_names[i])
        grand_average_intensity.append(mean_intensity)

    # Plot grand average
    grand_average_intensity = np.nanmean(grand_average_intensity, axis=0)
    plt.plot(grand_average_intensity, dist_array_avg, 'k-', linewidth=4, 
             alpha=1, label='Grand Average', zorder=0)
    
    # Formatting
    plt.xlim(-xlim_max, xlim_max)
    plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
    plt.xlabel('Mean Intensity Difference (Z-score)')
    plt.ylabel('Distance from Inf Surface (mm)')
    plt.title(f'{hemi.upper()}')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small', frameon=False)
    
    # Remove spines
    for spine in plt.gca().spines.values():
        spine.set_visible(False)

plt.tight_layout()
plt.show()

# Analyze intensity data at specific depth positions
print(f"Data shape: {subject_data[0].shape}")
print(f"Distance array: {dist_array_avg}")

# Find key depth positions
zero_index = np.where(dist_array_avg == 0)[0][0]
one_voxel_up = zero_index + 4
one_voxel_down = zero_index - 4

print(f"Zero index: {zero_index}")
print(f"Depths - Zero: {dist_array_avg[zero_index]:.3f}, Up: {dist_array_avg[one_voxel_up]:.3f}, Down: {dist_array_avg[one_voxel_down]:.3f}")

# Plot KDE distributions for each subject
hemi = 'rh'
subject_data = lh_intensity_data_uncut if hemi == 'lh' else rh_intensity_data_uncut
subject_names = lh_subject_names if hemi == 'lh' else rh_subject_names
n_subjects = len(subject_data)

fig, axes = plt.subplots(2, 7, figsize=(14, 4))
axes = axes.flatten()
x_range = np.linspace(-10, 10, 500)

# Storage for peak distances
peak_distances_all_subjects = {
    'at_inf_surface': [],
    '1_voxel_up': [],
    '1_voxel_down': []
}

# Define positions for analysis
positions = [
    (zero_index, 'at inf surface', 'at_inf_surface'),
    (one_voxel_up, '1 voxel up', '1_voxel_up'),
    (one_voxel_down, '1 voxel down', '1_voxel_down')
]

for i, tmp in enumerate(subject_data):
    ax = axes[i]
    tmp = np.where(~np.isnan(tmp), tmp, 0)  # Remove NaN values
    
    for pos_idx, label, key in positions:
        kde = gaussian_kde(tmp[pos_idx, :])
        kde_vals = kde(x_range)
        line = ax.plot(x_range, kde_vals, alpha=1, linewidth=1.5, label=label)
        
        # Add peak indicator and store peak distance
        peak_idx = np.argmax(kde_vals)
        peak_x, peak_y = x_range[peak_idx], kde_vals[peak_idx]
        ax.plot([peak_x, peak_x], [0, peak_y], ':', alpha=0.5, 
                linewidth=1.25, color=line[0].get_color())
        
        peak_distances_all_subjects[key].append(peak_x)
    
    ax.set_xlim(-1, 1)
    ax.set_ylim(bottom=0)
    ax.set_title(f'{subject_names[i]}')

# Add legend
if n_subjects < len(axes):
    legend_ax = axes[n_subjects]
    legend_ax.axis('off')
    for pos_idx, label, key in positions:
        legend_ax.plot([], [], alpha=1, linewidth=1.5, label=label)
    legend_ax.legend(loc='center', frameon=False)

# Hide remaining empty subplots
for j in range(n_subjects + 1, len(axes)):
    axes[j].set_visible(False)

plt.tight_layout()
plt.show()

# Print peak distances summary
print("\nPeak distances for all subjects:")
for key, values in peak_distances_all_subjects.items():
    print(f"{key}: {values}")
    print(f"  Mean: {np.mean(values):.3f}, Std: {np.std(values):.3f}")

# Convert to numpy arrays for analysis
at_inf_surface = np.array(peak_distances_all_subjects['at_inf_surface'])
voxel_up = np.array(peak_distances_all_subjects['1_voxel_up'])
voxel_down = np.array(peak_distances_all_subjects['1_voxel_down'])

# Create scatter plot with distinguished colors
n_subjects = len(subject_names)
colors = plt.cm.tab10(np.arange(n_subjects)) if n_subjects <= 10 else \
         np.vstack([plt.cm.tab10(np.arange(10)), plt.cm.Set3(np.arange(n_subjects - 10))])

plt.figure(figsize=(8, 6))
for i, subject_name in enumerate(subject_names):
    plt.plot(abs(voxel_up[i] - at_inf_surface[i]), 
             abs(voxel_down[i] - at_inf_surface[i]), 'o', 
             color=colors[i], label=subject_name, markersize=14, 
             markeredgecolor='black')

plt.legend(subject_names, bbox_to_anchor=(1.05, 1), loc='upper left', 
           frameon=False, fontsize=14)
plt.gca().set_aspect('equal', adjustable='box')
plt.xlabel('Distance to Voxel Up', fontsize=14)
plt.ylabel('Distance to Voxel Down', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.plot([0, 1], [0, 1], 'k-', linewidth=0.5, zorder=0)  # x=y line
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.tight_layout()
plt.show()
