Plots saved at /home/localadmin/hpc_mount/Cortical_Microstructure_Changes_in_Schizophrenia/new_group_results/combined

Dots in box plot are the averaged values in significant clusters for each subject.

In [None]:
import os
import numpy as np
import nibabel as nib
import pandas as pd
from mayavi import mlab
from surfer import Brain
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
import subprocess
from pathlib import Path
from scipy import stats
from matplotlib.colors import LinearSegmentedColormap

def setup_freesurfer():
    """Initialize FreeSurfer environment"""
    freesurfer_home = '/home/localadmin/freesurfer'
    subjects_dir = "/home/localadmin/hpc_mount/Cortical_Microstructure_Changes_in_Schizophrenia"
    os.environ['FREESURFER_HOME'] = freesurfer_home
    os.environ['SUBJECTS_DIR'] = subjects_dir
    
    setup_cmd = f"bash -c 'source {freesurfer_home}/SetUpFreeSurfer.sh; env'"
    try:
        process = subprocess.Popen(setup_cmd, stdout=subprocess.PIPE, shell=True)
        output, _ = process.communicate()
        for line in output.decode().split('\n'):
            if '=' in line:
                key, value = line.split('=', 1)
                os.environ[key] = value
        print("FreeSurfer environment initialized successfully")
    except Exception as e:
        print(f"Error setting up FreeSurfer: {e}")
        raise

def find_cluster_file(glmdir_path, for_visualization=True):
    """Find appropriate cluster file in GLM directory"""
    
    if os.path.exists(glmdir_path):
        print(f"\nLooking in directory: {glmdir_path}")
        all_files = os.listdir(glmdir_path)
        print("All files:", all_files)
        
        if for_visualization:
            files = [f for f in all_files 
                    if 'perm' in f and 'cluster' in f and f.endswith('.mgh')]
  
        selected_file = files[0] if files else None
        print(f"Selected file: {selected_file}")
        return selected_file
    return None

def check_significance(base_path, param, contrast):
    """Check if parameter shows significant clusters"""
    is_significant = False
    hemis = ['lh', 'rh']
    
    for hemi in hemis:
        glmdir = os.path.join(base_path, 'new_group_results',
                           f'group_{hemi}_{param}_fwhm6_demeaned_{contrast}.glmdir',
                           f'group_contrast_{contrast}')
        
        if os.path.exists(glmdir):
            cluster_file = find_cluster_file(glmdir, for_visualization=True)
            if cluster_file:
                file_path = os.path.join(glmdir, cluster_file)
                data = nib.load(file_path).get_fdata().squeeze()
                if np.max(data) > 1.3:
                    is_significant = True
                    print(f"Found significant cluster in {hemi} {param} {contrast}")
                    break
    
    return is_significant

def calculate_pvalue(group1, group2, alternative='two-sided'):
    """
    Calculate p-value for the difference between two independent groups using t-test.
    
    Parameters:
    group1 (array-like): First group of observations
    group2 (array-like): Second group of observations
    alternative (str): The alternative hypothesis, one of 'two-sided', 'less', 'greater'
    
    Returns:
    float: p-value
    """
    # Perform independent t-test
    t_stat, p_value = stats.ttest_ind(group1, group2, alternative=alternative)
    
    return p_value

def create_boxplot(data_dict, fig, ax):
    sns.set_style(rc={'axes.facecolor': 'black',
                     'figure.facecolor': 'black',
                     'xtick.color': 'white',
                     'ytick.color': 'white',
                     'axes.edgecolor': 'white',
                     'axes.labelcolor': 'white',
                     'text.color': 'white'})
    
    all_data = []
    labels = []
    # Store data for Cohen's d calculation
    ep_data = np.array(data_dict['EP']) if 'EP' in data_dict else None
    hc_data = np.array(data_dict['HC']) if 'HC' in data_dict else None
    
    for group in ['EP', 'HC']:
        if group in data_dict and len(data_dict[group]) > 0:
            values = np.array(data_dict[group])
            all_data.extend(values.flatten())
            labels.extend([group] * len(values.flatten()))
    
    plot_data = pd.DataFrame({
        'Group': labels,
        'Value': all_data
    })
    
    palette = {'EP': '#E41A1C', 'HC': '#377EB8'}
    bp = sns.boxplot(data=plot_data, x='Group', y='Value',
                    palette=palette, ax=ax)
    
    sns.swarmplot(data=plot_data, x='Group', y='Value',
                  color='white', alpha=0.65, ax=ax)
    
    # Calculate and display Cohen's d
    if ep_data is not None and hc_data is not None:
        cohens_d = calculate_cohens_d(ep_data.flatten(), hc_data.flatten())
        pvalue = calculate_pvalue(ep_data.flatten(), hc_data.flatten())
        
        # p-value first (higher position)
        plt.text(0.5, 0.90, f"p = {pvalue:.2g}", 
                transform=ax.transAxes, color='white',
                horizontalalignment='center', fontsize=20)  # Increased font size
        
        # Cohen's d below it
        plt.text(0.5, 0.80, f"Cohen's d = {cohens_d:.2g}", 
                transform=ax.transAxes, color='white',
                horizontalalignment='center', fontsize=20)  # Increased font size
    
    ax.set_xlabel('')
    ax.set_title('')
    ax.set_facecolor('black')
    plt.setp(ax.spines.values(), color='white')
    
    # Increase font size for tick labels
    ax.tick_params(axis='both', which='major', labelsize=18)
    
    # Increase font size for legend
    plt.legend(fontsize=16)
    
    # Increase font size for y-label
    ax.set_ylabel(ax.get_ylabel(), fontsize=20)
    
    return bp

def load_freesurfer_lut(filename):
    """Load a FreeSurfer-style colormap from file"""
    data = np.loadtxt(filename)
    # Normalize RGB values to 0-1 range
    rgb_data = data[:, :3]  # Take only RGB columns
    return LinearSegmentedColormap.from_list('custom', rgb_data)

def create_brain_views(subject_id, hemi, sig_file, temp_file_base, output_path):
    """Create lateral and medial views of brain surface showing -log10(p-values)"""
    try:
        # Load the cluster data (-log10(p-values))
        cluster_data = nib.load(sig_file).get_fdata().squeeze()
        thresh = 1.3  # Threshold for significance
        
        views = ['lateral', 'medial']
        temp_files = []
        colorbar_files = []
        
        for view in views:
            temp_file = f"{temp_file_base}_{view}.png"
            temp_files.append(temp_file)
            
            brain = Brain(subject_id, hemi, 'inflated',
                         background="black",
                         cortex="low_contrast",
                         size=(800, 600))
            
            # Get the actual maximum value from the data
            max_value = np.max(cluster_data)
            print(f"Maximum -log10(p-value): {max_value}")  # For debugging
            # Load custom colormap
            custom_cmap = load_freesurfer_lut('/home/localadmin/hpc_mount/Cortical_Microstructure_Changes_in_Schizophrenia/results/nih_iso.cmap')
            # Display the actual -log10(p-values)
            brain.add_data(cluster_data,
                         min=thresh,  # Minimum threshold for significance
                         max=max_value,  # Use actual maximum value
                         mid=(thresh + max_value)/2,
                         thresh=thresh,
                         colormap='YlOrRd',
                         alpha=1.0,
                         smoothing_steps=0,
                         colorbar=True,
                         remove_existing=True)
            
            brain.show_view(view, distance=350)
            
            # Save temporary file for combined plot
            brain.save_image(temp_file)
            
            # Save individual brain view
            final_brain_file = os.path.join(output_path, 'individual', 
                                          f'{os.path.basename(temp_file_base)}_{view}.png')
            brain.save_image(final_brain_file)
            
            mlab.close(brain._figures[0])
            
            # Save the colorbar separately
            colorbar_file = os.path.join("/home/localadmin/hpc_mount/Cortical_Microstructure_Changes_in_Schizophrenia/new_group_results/individual", f"{os.path.basename(temp_file_base)}_{view}_colorbar.png")
            colorbar_files.append(colorbar_file)
            
            # Create a standalone colorbar
            fig, ax = plt.subplots(figsize=(4, 0.5))  # Adjust size as needed
            norm = plt.Normalize(vmin=thresh, vmax=max_value)
            sm = plt.cm.ScalarMappable(cmap=custom_cmap, norm=norm)
            sm.set_array([])
            cbar = plt.colorbar(sm, cax=ax, orientation='horizontal')
            cbar.set_label('-log10(p-value)', color='white')
            cbar.ax.yaxis.set_tick_params(color='white')
            cbar.ax.tick_params(labelcolor='white')
            plt.savefig(colorbar_file, bbox_inches='tight', dpi=300, facecolor='black')
            plt.close(fig)
        
        return temp_files, np.any(cluster_data > thresh)
    except Exception as e:
        print(f"Error creating brain views: {e}")
        return [], False

def process_parameter(base_path, param, contrast, output_path, groups_data, counter):
    hemis = ['lh', 'rh']
    temp_dir = os.path.join(output_path, 'temp')
    os.makedirs(temp_dir, exist_ok=True)
    
    try:
        brain_images = []
        brain_data = {hemi: {} for hemi in hemis}
        
        # Collect brain data
        for hemi in hemis:
            glmdir = os.path.join(base_path, 'new_group_results',
                               f'group_{hemi}_{param}_fwhm6_demeaned_{contrast}.glmdir',
                               f'group_contrast_{contrast}')
            
            cluster_file_name = find_cluster_file(glmdir, for_visualization=True)
            
            if cluster_file_name:
                brain_data[hemi] = {
                    'cluster': os.path.join(glmdir, cluster_file_name),
                    'param': os.path.join(base_path, 'new_group_analysis', f'group_{hemi}_{param}.mgh'),
                    'thickness': os.path.join(base_path, 'new_group_analysis', f'group_{hemi}_thickness.mgh')  # Add thickness path
                }
        
        view_order = [(0, 'lateral'), (0, 'medial'), (1, 'medial'), (1, 'lateral')]
        significant_data = []
        significant_thickness = []  # New list for thickness data
        
        # Process brain views
        for hemi_idx, view in view_order:
            hemi = hemis[hemi_idx]
            if 'cluster' in brain_data[hemi]:
                temp_file_base = os.path.join(temp_dir, f'{contrast}_{param}_{hemi}')
                temp_files, is_sig = create_brain_views('fsaverage', hemi, 
                                                      brain_data[hemi]['cluster'], 
                                                      temp_file_base,
                                                      output_path)
                
                if temp_files:
                    img = plt.imread(temp_files[view == 'medial'])
                    brain_images.append(img)
                
                if view == 'lateral' and is_sig:
                    cluster_data = nib.load(brain_data[hemi]['cluster']).get_fdata()
                    sig_vertices = cluster_data > 1.3
                    param_data = nib.load(brain_data[hemi]['param']).get_fdata()
                    thickness_data = nib.load(brain_data[hemi]['thickness']).get_fdata()  # Load thickness data
                    significant_data.append((sig_vertices, param_data))
                    significant_thickness.append((sig_vertices, thickness_data))  # Store significant thickness data
        
        # Process data for both parameter and thickness boxplots
        data_by_group = {'EP': None, 'HC': None}
        thickness_by_group = {'EP': None, 'HC': None}  # New dictionary for thickness
        
        if significant_data:
            for group in data_by_group:
                group_indices = groups_data['group'] == group
                group_values = []
                thickness_values = []  # New list for thickness values
                
                for (sig_vertices, param_data), (_, thickness_data) in zip(significant_data, significant_thickness):
                    significant_values = param_data[sig_vertices]
                    significant_thick = thickness_data[sig_vertices]  # Get thickness values
                    group_data = np.mean(significant_values, axis=0)[group_indices]
                    group_thick = np.mean(significant_thick, axis=0)[group_indices]  # Get group thickness
                    group_values.append(group_data)
                    thickness_values.append(group_thick)
                
                if group_values:
                    data_by_group[group] = np.mean(group_values, axis=0)
                    thickness_by_group[group] = np.mean(thickness_values, axis=0)
        
        # Create parameter boxplot
        if any(v is not None for v in data_by_group.values()):
            # Parameter boxplot
            box_fig = plt.figure(figsize=(8, 6))
            box_ax = box_fig.add_subplot(111)
            create_boxplot(data_by_group, box_fig, box_ax)
            plt.ylabel(param.upper())
            plt.savefig(os.path.join(output_path, 'individual', f'{contrast}_{param}_boxplot.png'),
                       facecolor='black', bbox_inches='tight', dpi=300, pad_inches=0.1)
            plt.close(box_fig)
            
            box_fig = plt.figure(figsize=(8, 6))
            box_ax = box_fig.add_subplot(111)
            create_boxplot(data_by_group, box_fig, box_ax)
            plt.ylabel(param.upper())
            param_box_img = fig_to_array(box_fig)
            plt.close(box_fig)
            brain_images.append(param_box_img)
            
            # Thickness boxplot
            box_fig = plt.figure(figsize=(8, 6))
            box_ax = box_fig.add_subplot(111)
            create_boxplot(thickness_by_group, box_fig, box_ax)
            plt.ylabel('Thickness (mm)')
            plt.savefig(os.path.join(output_path, 'individual', f'{contrast}_{param}_thickness_boxplot.png'),
                       facecolor='black', bbox_inches='tight', dpi=300, pad_inches=0.1)
            plt.close(box_fig)
            
            box_fig = plt.figure(figsize=(8, 6))
            box_ax = box_fig.add_subplot(111)
            create_boxplot(thickness_by_group, box_fig, box_ax)
            plt.ylabel('Thickness (mm)')
            thickness_box_img = fig_to_array(box_fig)
            plt.close(box_fig)
            brain_images.append(thickness_box_img)
        
        # Create combined figure with 6 subplots (4 brain views + 2 boxplots)
        fig, axes = plt.subplots(1, 6, figsize=(38, 8))  # Increased figure width
        plt.subplots_adjust(wspace=0, hspace=0)
        
        letters = ['A', 'B', 'C', 'D', 'E', 'F']
        
        for idx, (ax, img) in enumerate(zip(axes, brain_images)):
            ax.imshow(img)
            ax.axis('off')
            
            if idx == 3:
                ax.text(0.8, 0.9, 'R', color='white', fontsize=18,
                        transform=ax.transAxes)
            elif idx == 0:
                text = ax.text(0.1, 0.92, f'{letters[counter]}) {param.upper()}: EP>HC', 
                            color='black', fontsize=18,
                            transform=ax.transAxes,
                            bbox=dict(facecolor='white', 
                                    alpha=1.0,
                                    edgecolor='none',
                                    pad=3))
        
        plt.savefig(os.path.join(output_path, 'combined', f'{contrast}_{param}_combined.png'),
                    facecolor='black', bbox_inches='tight', dpi=300, pad_inches=0.1)
        plt.close()
        
    except Exception as e:
        print(f"Error processing {param} - {contrast}: {e}")
    finally:
        if os.path.exists(temp_dir):
            for file in os.listdir(temp_dir):
                try:
                    os.remove(os.path.join(temp_dir, file))
                except Exception as e:
                    print(f"Error removing temp file {file}: {e}")
            try:
                os.rmdir(temp_dir)
            except Exception as e:
                print(f"Error removing temp directory: {e}")

# Helper function to convert figure to array
def fig_to_array(fig):
    fig.canvas.draw()
    data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    return data
    
def calculate_cohens_d(group1, group2):
    """Calculate Cohen's d effect size between two groups."""
    n1, n2 = len(group1), len(group2)
    var1, var2 = np.var(group1, ddof=1), np.var(group2, ddof=1)
    
    # Pooled standard deviation
    pooled_se = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
    
    # Cohen's d
    cohens_d = (np.mean(group1) - np.mean(group2)) / pooled_se
    return cohens_d

def main():
    # Setup paths
    base_path = '/home/localadmin/hpc_mount/Cortical_Microstructure_Changes_in_Schizophrenia'
    output_path = os.path.join(base_path, 'new_group_results')
    os.makedirs(output_path, exist_ok=True)
    os.makedirs(os.path.join(output_path, 'combined'), exist_ok=True)
    os.makedirs(os.path.join(output_path, 'individual'), exist_ok=True)
    
    # Setup FreeSurfer
    setup_freesurfer()
    
    # Load group data
    groups_data = pd.read_csv(os.path.join(base_path, 
                             'MRS_data_curated_+paths+avg_skeleton+registration.csv'))
    
    # Parameters and contrasts
    parameters = ['md']
    contrasts = ['EPHC']
    
    # Process each contrast
    for contrast in contrasts:
        print(f"\nProcessing {contrast} contrast...")
        significant_params = []
        
        # Find significant parameters
        for param in parameters:
            if check_significance(base_path, param, contrast):
                significant_params.append(param)
                print(f"Found significant clusters for {param}")
        
        # Create visualizations for significant parameters
        counter = 0
        for param in significant_params:
            print(f"Creating visualizations for {param}")
            process_parameter(base_path, param, contrast, output_path, groups_data, counter)
            counter += 1

if __name__ == "__main__":
    main()