In [1]:
import nibabel as nib
import numpy as np
import networkx as nx
import os
import math
import pandas as pd
from tqdm.notebook import tqdm  # Import tqdm for notebooks
import matplotlib.pyplot as plt
import subprocess
import pickle

In [8]:
def create_threshold_graph(fc_matrix, std_multiplier=2):
    """
    Create a graph from a functional connectivity matrix by adding edges where the 
    absolute connection strength is above a threshold defined as a multiple of the
    standard deviation of the absolute values in the connectivity matrix.
    """
    n = fc_matrix.shape[0]  # Number of nodes
    G = nx.Graph()
    
    # Calculate the threshold as std_multiplier times the standard deviation of the absolute values
    threshold = std_multiplier * np.std(np.abs(fc_matrix))
    
    # Add nodes
    for i in range(n):
        G.add_node(i)
    
    # Add edges based on the threshold
    for i in range(n):
        for j in range(n):
            if i != j and np.abs(fc_matrix[i, j]) > threshold:  # Avoid self-loops and check threshold
                G.add_edge(i, j, weight=fc_matrix[i, j])
    
    return G
def load_fc_matrix(file_path):
    """ Load functional connectivity matrix from a .pconn.nii file. """
    img = nib.load(file_path)
    fc_matrix = img.get_fdata()
    return fc_matrix

In [9]:
# Directory containing the pconn files
directory = "/home/tico/Desktop/master_classes/project/BSNIP/pconn"
pconn_files = [f for f in os.listdir(directory) if f.endswith('.pconn.nii')]
std = 2
behavior_path = '/home/tico/Desktop/master_classes/project/behavior/'
behavior_files = os.listdir(behavior_path)

# Load behavior data
behavior_source = pd.read_csv(os.path.join(behavior_path, behavior_files[0]), sep='\t')
for behavior_file in behavior_files[1:]:
    curr_behavior_source = pd.read_csv(os.path.join(behavior_path, behavior_file), sep='\t')
    behavior_source = pd.concat([behavior_source, curr_behavior_source], axis=0)
behavior_source = behavior_source[["session_id", "Group"]]

# Prepare a list to store the results
results = []

for file_name in tqdm(pconn_files, desc="Processing .pconn.nii files"):
    fc_file_path = os.path.join(directory, file_name)
    session_id = file_name[:-len('.pconn.nii')]
    label = behavior_source.loc[behavior_source['session_id'] == session_id, 'Group'].values[0]
    fc_matrix = load_fc_matrix(fc_file_path)
    #graph = create_knn_graph(fc_matrix, k=5)
    graph = create_threshold_graph(fc_matrix, std_multiplier=std)
    degrees = [deg for _, deg in graph.degree()]
    n = graph.number_of_nodes()
    m = graph.number_of_edges()
    if n > 1:  # To avoid division by zero in calculations
        average_degree = sum(degrees) / n
        theoretical_avg_c = average_degree / (n - 1)
        theoretical_avg_d = math.log(n) / math.log(average_degree) if average_degree > 1 else 0

        # Calculate clustering and path length on the largest connected component
        largest_cc = max(nx.connected_components(graph), key=len)
        subgraph = graph.subgraph(largest_cc)
        size_of_largest_cc = len(largest_cc)
        avg_clustering = nx.average_clustering(graph)
        avg_path_length = nx.average_shortest_path_length(subgraph) if len(largest_cc) > 1 else 0

        row = [
            file_name,label, n,m, average_degree, theoretical_avg_c, avg_clustering,
            theoretical_avg_d, avg_path_length, math.log(n), math.log(math.log(n)),
            size_of_largest_cc
        ]
    else:
        row = [file_name,label, n,m, 0, 0, 0, 0, 0, 0, 0, 0]

    results.append(row)
# Create a DataFrame
df = pd.DataFrame(results, columns=[
    'File Name','label', 'Number of Nodes','Number of Edges', 'Average Degree', 'Theoretical Avg Clustering',
    'Average Clustering', 'Theoretical Avg Path Length', 'Average Path Length',
    'Log of Nodes', 'Log Log of Nodes', 'Size of Largest CC'
])

# Save to CSV
df.to_csv('graph_statistics.csv', index=False)
print("Data saved to 'graph_statistics.csv'.")

Processing .pconn.nii files:   0%|          | 0/638 [00:00<?, ?it/s]