### Visualize nans from all subjects in a carpet plot and flat map

In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
import sys
sys.path.append(os.getenv('PYTHONPATH')) 
import numpy as np
import matplotlib.pyplot as plt
import h5py
import hcp_utils as hcp
import matplotlib.pyplot as plt
from collections import defaultdict
from matplotlib.colors import LinearSegmentedColormap, Normalize
from nilearn import plotting
import pandas as pd

#local
from src.utils.transforms import SelectROIs

In [None]:
root = os.path.join(os.getenv("DATASETS_ROOT"), "MOSAIC")
project_root = os.path.join(os.getenv("PROJECT_ROOT"))
print(f"root: {root}")
print(f"project_root: {project_root}")
rois = [f"GlasserGroup_{x}" for x in range(1,23)] #["GlasserGroup_1", "GlasserGroup_2", "GlasserGroup_3","GlasserGroup_4", "GlasserGroup_5"] #["LO1","LO2"] #["V1"]
ROI_selection = SelectROIs(selected_rois=rois)


In [None]:
nan_indices_running = set()
numvertices = 59412
number_nans = defaultdict(lambda: np.zeros((numvertices,))) #keep track of how many vertices are nans for each subject
total_responses = defaultdict(lambda: 0) #keep track of how many responses belong to each subject
with h5py.File(os.path.join(root,'mosaic_version-1_0_0_new.hdf5'), 'r') as file:
    print(f"Keys: {file.keys()}")
    for subject_dataset in file.keys():
        print(subject_dataset)
        responses = file[subject_dataset]['betas'].keys()
        for response in responses:
            nan_indices = file[subject_dataset]['betas'][response].attrs['nan_indices']
            total_responses[subject_dataset] += 1
            for nidx in nan_indices:
                number_nans[subject_dataset][nidx] += 1
                if nidx not in nan_indices_running:
                    nan_indices_running.add(nidx)



In [None]:
nan_indices_running_noiseceiling = set()
numvertices = 59412
number_nans_noiseceiling = defaultdict(lambda: np.zeros((numvertices,))) #keep track of how many vertices are nans for each subject
total_responses_noiseceiling  = defaultdict(lambda: 0) #keep track of how many responses belong to each subject
with h5py.File(os.path.join(root,'mosaic_version-1_0_0_new.hdf5'), 'r') as file:
    print(f"Keys: {file.keys()}")
    for subject_dataset in file.keys():
        print(subject_dataset)
        if 'noiseceilings' not in file[subject_dataset].keys():
            continue
        responses = file[subject_dataset]['noiseceilings'].keys()
        for response in responses:
            nan_indices = file[subject_dataset]['noiseceilings'][response].attrs['nan_indices']
            total_responses_noiseceiling[subject_dataset] += 1
            for nidx in nan_indices:
                number_nans_noiseceiling[subject_dataset][nidx] += 1
                if nidx not in nan_indices_running_noiseceiling:
                    nan_indices_running_noiseceiling.add(nidx)

In [None]:
print(len(set(nan_indices_running_noiseceiling).union(set(nan_indices_running))))
for nidx in nan_indices_running_noiseceiling:
    assert nidx in nan_indices_running #checks that no nan indices were only found in noiseceiling data but not the beta data. If this is not true, then we just have to combine the noiseceiling and beta nans

In [None]:
#save the indices that were nan at least once across the whole dataset
np.save(os.path.join(root, f"nan_indices_dataset_new.npy"), np.array(list(nan_indices_running)))

In [None]:
rois_with_nans = defaultdict(lambda: 0)
for idx in nan_indices_running:
    roi = ROI_selection.index_to_roi[idx]
    rois_with_nans[roi] += 1

cols = ['ROI', 'number_nans', 'total_vertices', 'percent_nans']
nan_data = {col: [] for col in cols}
for k,v in rois_with_nans.items():
    nan_data['ROI'].append(k)
    nan_data['number_nans'].append(rois_with_nans[k])
    nan_data['total_vertices'].append(len(ROI_selection.roi_to_index[k]))
    nan_data['percent_nans'].append(f"{rois_with_nans[k]/len(ROI_selection.roi_to_index[k])*100:.2f}")
    print(f"ROI {k}: {rois_with_nans[k]/len(ROI_selection.roi_to_index[k])*100:.4f}% {rois_with_nans[k]}")


In [None]:
nan_df = pd.DataFrame(nan_data)

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))

# Create the gray bars for total_vertices
ax.bar(nan_df['ROI'], nan_df['total_vertices'], color='gray', label='Total Vertices')

# Create the red bars for number_nans
ax.bar(nan_df['ROI'], nan_df['number_nans'], color='red', label='Undefined Vertices')

# Add percentages as text on top of the bars
for i, row in nan_df.iterrows():
    ax.text(
        i, 
        row['total_vertices'] - 60,  # Position below the bar
        f"{row['number_nans']}", 
        ha='center', 
        va='bottom', 
        fontsize=10, 
        color='black'
    )
    ax.text(
        i, 
        row['total_vertices'] + 5,  # Position above the bar
        f"{row['total_vertices']}", 
        ha='center', 
        va='bottom', 
        fontsize=10, 
        color='black'
    )

# Labels and title
ax.set_xlabel('ROI')
ax.set_ylabel('Number of Vertices')
ax.set_title('Number of Undefined Vertices and Total Vertices by ROI')
ax.legend()
plt.xticks(rotation=45)

# Show the plot
plt.tight_layout()
plt.show()



In [None]:
def sort_key(item):
    subject, dataset = item.split("_")  # Split into subject and dataset
    subject_number = int(subject.split("-")[1])  # Extract the numeric part of "sub-XX"
    return (dataset.lower(), subject_number)  # Return a tuple: (dataset, subject_number)

In [None]:
percent_nans = {k: number_nans[k]/t for k, t in total_responses.items()}
#housekeeping
fs = 10 #fontsize

# Extract y-axis labels and values
y_labels = list(percent_nans.keys())
# Sort the list
y_labels = sorted(list(percent_nans.keys()), key=sort_key)
data_matrix = np.array([percent_nans[key] for key in y_labels])  # Rows are the keys

# Create a custom colormap: white for zero, black to red for positive values
colors = [(1, 1, 1), (0, 0, 0), (1, 0, 0)]  # RGB for white -> black -> red
n_bins = 256
custom_cmap = LinearSegmentedColormap.from_list("custom_white_to_red", colors, N=n_bins)

# Normalize data so that 0 maps to white, and positive values start the gradient
norm = Normalize(vmin=0, vmax=np.max(data_matrix))  # Adjust vmin and vmax as needed

# Plot the heatmap
fig, ax = plt.subplots(figsize=(10, 14))
cax = plt.imshow(data_matrix, aspect='auto', cmap=custom_cmap, norm=norm, origin='upper')

# Add x and y ticks
plt.xticks([])
plt.yticks(ticks=np.arange(data_matrix.shape[0]), labels=y_labels, fontsize=fs)

# Define a custom colormap between two colors (e.g., white to red)
colors = [(0.0, 0.0, 0.0), (1.0, 0.0, 0.0)]  #black to Red
custom_cmap = LinearSegmentedColormap.from_list("black_red", colors, N=256)

# Add a colorbar to the axis
cbar_ax = fig.add_axes([0.92, 0.15, 0.03, 0.7])  # [left, bottom, width, height]
cbar = plt.colorbar(
    plt.cm.ScalarMappable(cmap=custom_cmap, norm=plt.Normalize(vmin=0, vmax=1)),
    cax=cbar_ax,
    orientation="vertical",
)

# Customize the colorbar
cbar.set_label("Value", rotation=270, labelpad=15)  # Label on the side
cbar.set_ticks([0, 0.5, 1])  # Optional: Add ticks
cbar.ax.set_yticklabels(["0 (white)", "0.5", "1 (red)"])  # Optional: Custom labels

# Add axis labels
plt.xlabel("Vertex", fontsize=fs)
plt.ylabel("Subject", fontsize=fs)

# Show the plot
plt.title("Carpet Plot", fontsize=fs)
plt.show()

### Plot nans in flat map

In [None]:
ext_list = ['png'] #only matters if save_flag is True 

save_flag=False #set to True to save plots or False to not save plots
data_matrix_mean = data_matrix.mean(axis=0)
print(sum(data_matrix_mean>0))
print(data_matrix_mean.shape)
if save_flag:
    save_root = os.path.join(project_root, "src", "output_decoding")
    if not os.path.exists(save_root):
        os.makedirs(save_root)
    filename_core = f"nan_indices_ROIs-all_target"

views = [] #['lateral', 'medial'] #['lateral', 'medial', 'dorsal', 'ventral', 'anterior', 'posterior']
stat = np.zeros((91282,1))
stat[:data_matrix_mean.shape[0]] = np.reshape(data_matrix_mean, (-1,1))
#stat = ROI_selection.sample2wb(np.reshape(results, (-1,1)),fill_value=0)
stat = stat.squeeze() * 100
print(stat.shape)
for hemi in ['left','right']:
    mesh = hcp.mesh.inflated
    cortex_data = hcp.cortex_data(stat)
    bg = hcp.mesh.sulc
    for view in views:
        display = plotting.plot_surf_roi(mesh, cortex_data, hemi=hemi,
        threshold=1e-9, bg_map=bg, view=view, cmap='hot')
        if save_flag:
            for ext in ext_list:
                if ext == 'png':
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-inflated_view-{view}_hemi-{hemi}.{ext}"),dpi=300)
                else:
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-inflated_view-{view}_hemi-{hemi}.{ext}"))

    #flattened brain
    if hemi == 'left':
        cortex_data = hcp.left_cortex_data(stat)
        display = plotting.plot_surf_roi(hcp.mesh.flat_left, cortex_data,
        threshold=1e-9, bg_map=hcp.mesh.sulc_left, colorbar=True, cmap='hot')
        if save_flag:
            for ext in ext_list:
                if ext == 'png':
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-flat_hemi-{hemi}.{ext}"),dpi=300)
                else:
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-flat_hemi-{hemi}.{ext}"))
        plt.show()

    if hemi == 'right':
        cortex_data = hcp.right_cortex_data(stat)
        display = plotting.plot_surf_roi(hcp.mesh.flat_right, cortex_data,
        threshold=1e-9, bg_map=hcp.mesh.sulc_right, colorbar=True, cmap='hot')
        if save_flag:
            for ext in ext_list:
                if ext == 'png':
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-flat_hemi-{hemi}.{ext}"),dpi=300)
                else:
                    plt.savefig(os.path.join(save_root, f"{filename_core}_mesh-flat_hemi-{hemi}.{ext}"))
        plt.show()