Creation of plots assessing relationship between SC and FC

In [1]:
%cd ../spreading_dynamics_clinical/
!pwd

/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical
/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical


In [3]:
import os
import re
import glob
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr
import matplotlib.pyplot as plt

In [5]:
csv_pattern = os.path.join(os.getcwd(), 'derivatives', 'sub*', 'dwi', 'processed_sub*_Schaefer2018_400Parcels_Tian_Subcortex_S4_1mm_5000000mio_connectome.csv')

In [None]:
def load_matrices(sc_csv, fc_npy):
    # Load the SC matrix from a CSV file
    sc = pd.read_csv(sc_csv, header=None).values
    
    # Load the FC matrix from a NPY file
    fc = np.load(fc_npy)
    
    return sc, fc

def correlate_sc_fc(sc, fc):
    # Step 1: Create a binary mask of the SC matrix
    sc_mask = (sc != 0).astype(int)
    
    # Step 2: Apply the mask to the FC matrix
    masked_fc = fc * sc_mask
    
    # Step 3: Vectorize the non-zero values of both matrices
    sc_values = sc[sc_mask == 1]
    fc_values = masked_fc[sc_mask == 1]
    
    # Step 4: Calculate the correlation
    correlation, p_value = pearsonr(sc_values, fc_values)
    
    # Step 5: Log-transform the SC matrix and repeat the correlation
    log_sc = np.log10(sc_values)
    log_correlation, p_log_val = pearsonr(log_sc, fc_values)
    
    return correlation, p_value, log_correlation, p_log_val, sc_values, fc_values, log_sc

def plot_scatter(sc_values, fc_values, correlation, subject_id, log=False, avg=False):
    plt.figure()
    
    # Create hexbin plot
    plt.hexbin(sc_values, fc_values, gridsize=50, cmap='viridis', mincnt=1)
    plt.colorbar(label='Counts')
    
    # Title and labels
    title = f'{"Average" if avg else "Subject"} {subject_id} {"(Log-Transformed)" if log else ""}\nCorrelation: {correlation:.2f}'
    plt.title(title)
    plt.xlabel('SC Values' + (' (Log-Transformed)' if log else ''))
    plt.ylabel('FC Values')
    
    plt.tick_params(axis='both', which='major', labelsize=14)  # Major ticks
    plt.tick_params(axis='both', which='minor', labelsize=12)
    # Optional: Add grid
    plt.grid(True)
    
    # Save the plot
    plot_filename = f'{"average_" if avg else "subject_"}{subject_id}_{"log_" if log else ""}scatter_plot.png'
    plt.savefig(plot_filename, bbox_inches='tight')
    plt.close()

    print(f'Saved scatter plot for {"average" if avg else "subject"} {subject_id} as {plot_filename}')

# Define the pattern for the CSV files
csv_pattern = os.path.join(os.getcwd(), 'derivatives', 'sub*', 'dwi', 'mod_sub*_Schaefer2018_400Parcels_Tian_Subcortex_S4_1mm_5000000mio_connectome.csv')

# Lists to store subject IDs and correlation results
subject_ids = []
correlations = []
log_correlations = []

# Lists to aggregate log-transformed SC values and FC values
all_log_sc_values = []
all_fc_values = []

# Loop through CSV files and process them
csv_files = glob.glob(csv_pattern)
for csv_file in csv_files:
    subject_id_match = re.search(r"sub-(\d+)_", csv_file)
    if subject_id_match:
        subject_id = subject_id_match.group(1)
        npy_file_name = f"mod_sub-{subject_id}_rs_correlation_matrix.npy"
        npy_path = os.path.join(os.getcwd(), 'derivatives', f'sub-{subject_id}', 'func', npy_file_name)
        if not os.path.exists(npy_path):
            print(f"Matching NPY file not found for {csv_file}")
            continue
        sc, fc = load_matrices(csv_file, npy_path)
        correlation, p_value, log_correlation, p_log_val, sc_values, fc_values, log_sc_values = correlate_sc_fc(sc, fc)
        
        print(f'Correlation for subject {subject_id}:', correlation, p_value, log_correlation, p_log_val)
        subject_ids.append(subject_id)
        correlations.append(correlation)
        log_correlations.append(log_correlation)
        
        # Aggregate log-transformed SC values and FC values
        all_log_sc_values.extend(log_sc_values)
        all_fc_values.extend(fc_values)
        
        # Plot scatter plot for original SC values
        plot_scatter(sc_values, fc_values, correlation, subject_id)
        
        # Plot scatter plot for log-transformed SC values
        plot_scatter(log_sc_values, fc_values, log_correlation, subject_id, log=True)

# Calculate average correlations
avg_correlation = np.mean(correlations)
avg_log_correlation = np.mean(log_correlations)
print(f'Average correlation: {avg_correlation}')
print(f'Average log-transformed correlation: {avg_log_correlation}')

# Save results to a text file
results_file = 'correlation_results.txt'
with open(results_file, 'w') as f:
    for i, subject_id in enumerate(subject_ids):
        f.write(f'Subject {subject_id}: correlation = {correlations[i]}, log-transformed correlation = {log_correlations[i]}\n')
    f.write(f'\nAverage correlation: {avg_correlation}\n')
    f.write(f'Average log-transformed correlation: {avg_log_correlation}\n')

print(f'Results saved to {results_file}')

# Plot average scatter plot for log-transformed SC values
average_log_correlation, _ = pearsonr(all_log_sc_values, all_fc_values)
plot_scatter(all_log_sc_values, all_fc_values, average_log_correlation, "average", log=True)


Scatter plot for AW-FC relationship

In [None]:
def load_matrices(aw_csv, fc_npy):
    # Load the AW matrix from a CSV file
    aw = pd.DataFrame.to_numpy(pd.read_csv(aw_csv, header=None))
    
    # Load the FC matrix from a NPY file
    fc = np.load(fc_npy)
    
    return aw, fc

def correlate_aw_fc(aw, fc):
    # Step 1: Create a binary mask of the AW matrix
    aw_mask = (aw != 0).astype(int)
    
    # Step 2: Apply the mask to the FC matrix
    masked_fc = fc * aw_mask
    
    # Step 3: Vectorize the non-zero values of both matrices
    aw_values = aw[aw_mask == 1]
    fc_values = masked_fc[aw_mask == 1]
    
    # Step 4: Calculate the correlation
    correlation, p_value = spearmanr(aw_values, fc_values)
    
    return correlation, p_value, aw_values, fc_values

def plot_scatter(aw_values, fc_values, correlation, subject_id, avg=False):
    plt.figure()
    
    # Create scatter plot with tiny dark blue dots
    plt.scatter(aw_values, fc_values, color='#0047AB', s=4)  # s=10 for small dots
    
    # Add a regression line (line of best fit) with dark red
    m, b = np.polyfit(aw_values, fc_values, 1)  # Linear regression
    plt.plot(aw_values, m*aw_values + b, color='#D2042D')  # Dark red line

    # Title and labels
    title = f'{"Average" if avg else "Subject"} {subject_id}\nPearson\'s r: {correlation:.2f}'
    plt.title(title)
    plt.xlabel('Association Weight')
    plt.ylabel('Functional Connectivity')
    
    plt.tick_params(axis='both', which='major', labelsize=14)  # Major ticks
    plt.tick_params(axis='both', which='minor', labelsize=12)
    plt.grid(False)
    
    # Save the plot
    plot_filename = f'{"average_" if avg else "subject_"}{subject_id}_scatter_rho20.png'
    plt.savefig(plot_filename, bbox_inches='tight')
    plt.close()

    print(f'Saved scatter plot for {"average" if avg else "subject"} {subject_id} as {plot_filename}')

# Define the pattern for the CSV files
csv_pattern = os.path.join(os.getcwd(), 'derivatives', 'sub*', 'dwi', 'processed_association_matrix_sub-*_20seeds.csv')

# Lists to store subject IDs and correlation results
subject_ids = []
correlations = []

# Lists to aggregate AW values and FC values
all_aw_values = []
all_fc_values = []

# Loop through CSV files and process them
csv_files = glob.glob(csv_pattern)
for csv_file in csv_files:
    subject_id_match = re.search(r"sub-(\d+)_", csv_file)
    if subject_id_match:
        subject_id = subject_id_match.group(1)
        npy_file_name = f"processed_functional_connectivity_sub-{subject_id}.npy"
        npy_path = os.path.join(os.getcwd(), 'derivatives', f'sub-{subject_id}', 'func', npy_file_name)
        if not os.path.exists(npy_path):
            print(f"Matching NPY file not found for {csv_file}")
            continue
        aw, fc = load_matrices(csv_file, npy_path)
        correlation, p_value, aw_values, fc_values = correlate_aw_fc(aw, fc)
        
        print(f'Correlation for subject {subject_id}:', correlation, p_value)
        subject_ids.append(subject_id)
        correlations.append(correlation)
        
        # Aggregate AW values and FC values
        all_aw_values.extend(aw_values)
        all_fc_values.extend(fc_values)
        
        # Plot scatter plot for original AW values
        plot_scatter(aw_values, fc_values, correlation, subject_id)

# Calculate average correlation
avg_correlation = np.mean(correlations)
print(f'Average correlation: {avg_correlation}')

# Save results to a text file
results_file = 'correlation_results.txt'
with open(results_file, 'w') as f:
    for i, subject_id in enumerate(subject_ids):
        f.write(f'Subject {subject_id}: Spearman\'s $\\rho$ correlation = {correlations[i]}\n')
    f.write(f'\nAverage correlation: {avg_correlation}\n')

print(f'Results saved to {results_file}')

# Plot average scatter plot
plot_scatter(all_aw_values, all_fc_values, avg_correlation, 'average', avg=True)
