### Background info

Followed the 3-year reliability paper: https://doi.org/10.1016/j.neuroimage.2021.118516
1. "Using these source estimates, we then estimated the power of cortical activity in the canonical frequency bands: delta (2–4 Hz), theta (4–8 Hz), alpha (8–12 Hz), beta (15–30 Hz), low gamma (30–80 Hz), and high gamma (80–150 Hz). We used Welch's method for estimating power spectrum densities (PSD) per four-second epoch across each MEG recording, with 1-second sliding Hamming windows overlapping at 50%. We then standardized the PSD values at each frequency bin to the total power across the frequency spectrum. We then averaged PSD maps (ie. source estimates) across epochs for each participant to obtain one set of six PSD maps (one per frequency band) per participant per visit."
2. "We projected these maps onto the MNI ICBM152 brain template (Fonov et al., 2009) and applied a 3 mm FWHM smoothing kernel."
3. "Used single rater two-way mixed-effects model and absolute agreement definition, or ICC(A,1)... ICC estimates and their 95% confidence intervals were calculated using the Matlab Central file-exchange ICC.m function. This ICC calculation was applied at every vertex in the PSD maps to obtain spatially specific reliability estimates at each of the frequency bands. This resulted in an ICC map per frequency band."
4. "To further visualize the reliability of source power in each frequency band, regions of interest (Brainstorm “scouts”) in the frontal, parietal, temporal, and occipital lobes were applied to each participant's PSD map. The average power (relative to total spectral power) across each lobe was extracted for each participant and each visit. ICCs of these values were then calculated using the same ICC(A,1) model."
- "ICC .5 indicates poor reliability, values between .5 to .75 indicates moderate reliability, values between .75 and .9 indicates good reliability, and values greater than .9 indicate excellent reliability. Importantly, we evaluated the level of reliability based on the 95% confidence interval of the ICC estimate, not the estimate itself, since the interval reveals the chance that the true ICC value lands on any point between the bounds."

### Morph

Morph before sprint
- The 3yr reliability paper applied a 3mm smoothing kernel
Sources: 
- https://mne.tools/stable/auto_examples/inverse/morph_surface_stc.html

### ICC

https://github.com/raphaelvallat/pingouin/blob/dcfdc82bbc7f1ba5991b80717a5ca156617443e8/pingouin/reliability.py#L158
ICC2 (single random raters)

### Set up

In [None]:
# IMPORT PACKAGES
import numpy as np
import pandas as pd
import pingouin as pg
import os
import pickle
from functools import partial
from multiprocessing import Pool

# Things to loop through
# dictionary = { 
#     "NVAR008": ["251016", "251017", "251023", "251113"], 
#     "NVAR010": ["251027", "251028", "251103", "251124"], 
#     "NVAR011": ["251030", "251031", "251106", "251127"]
# }
# runs = ["rest1", "rest2"]
# bands = ["delta", "theta", "alpha", "beta", "g_low", "g_high"]

base_dir = "/home/isw3/sprint/output_noSSP_nomorph_winL4"

# Constants
N_WINDOWS = 115 # 0 to 114
N_VERTICES = 3 #8196
N_ITERATIONS = 3 #100






# WITHIN-SCAN

# Filenames: Day 1, rest 1
runnames = ["sub_NVAR008_251016_rest1", 
            "sub_NVAR010_251027_rest1"]

r = 2

# Pre-generate list of random windows
windowAs = []
for i in range(N_ITERATIONS):
    np.random.seed(i)
    windowA = np.random.choice(range(N_WINDOWS - r))
    windowAs.append(windowA)


# Pre-load all exponents
# data_cache is a dictionary; format data_cache[runname][vertex] = series of exponents
print("Pre-loading files:")
data_cache = {}
for runname in runnames:
    data_cache[runname] = {}
    for vertex in range(N_VERTICES):
        file_path = os.path.join(base_dir, "output", runname + "_fooof_vertex" + str(vertex) + ".csv")
        df_file = pd.read_csv(file_path, usecols=["exponent"]) 
        data_cache[runname][vertex] = df_file["exponent"].values 
print("Done loading files.")


def compute_icc_for_vertex(vertex, windowA): 
    """ 
    Computes ICC on a single vertex
    Parameters: idx of one vertex; idx of one window pair
    Returns: [vertex idx, icc value] -> will be one row 
    """
    print(f"Now working on vertex {vertex} windowA {windowA}")

    try: 
        # Create dataframe of ratings
        ratings = []
        for runname in runnames: # Here, levels are runnames because there is only one per subject, but caution that this won't be true later
            ratings.extend([
                (runname, "valA", data_cache[runname][vertex][windowA]),
                (runname, "valB", data_cache[runname][vertex][windowA + r])
            ])
        df_vertex = pd.DataFrame(ratings, columns=["TARGET", "RATER", "RATING"])

        # Compute ICC
        #try: 
            #results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        #except Exception as e:
            #print(f"pg.intraclass_corr for {vertex} {windowA} returned error: {e}")
        #icc = results.loc[results['Type'] == 'ICC2', 'ICC'].values[0]
        icc = windowA

        # Return vertex and corresponding ICC
        return [vertex, icc]
    
    except Exception as e: 
        print(f"compute_icc_for_vertex for {vertex} {windowA} returned error: {e}")
        return None


def compute_icc():
    rows = []
    for windowA in windowAs: 
        with Pool() as pool:
            for result in pool.starmap(compute_icc_for_vertex, [(vertex, windowA) for vertex in range(N_VERTICES)]):
                if result is not None:
                    rows.extend([windowA] + result)

    df = pd.DataFrame(rows, columns=["WINDOW_A", "VERTEX", "ICC"])
    df["ICC_Z"] = np.arctanh(df["ICC"])

    with open(os.path.join(base_dir, "ICC_" + str(r) + ".pkl"), "wb") as f:
        pickle.dump(df, f)


compute_icc()

Pre-loading files:
Done loading files.


In [None]:
# BETWEEN-SCANS: 

# Filenames: To start, always use visit_1_scan_1 as the reference
comparison_dictionary = {
    "40min" : {
        "sub_NVAR008" : ["sub_NVAR008_251016_rest1_fooof_vertex", "sub_NVAR008_251016_rest2_fooof_vertex"], 
        "sub_NVAR010" : ["sub_NVAR010_251027_rest1_fooof_vertex", "sub_NVAR010_251027_rest2_fooof_vertex"], 
        "sub_NVAR999" : ["sub_NVAR999_251016_rest1_fooof_vertex", "sub_NVAR999_251016_rest2_fooof_vertex"]
    },
    "day" : {
        "sub_NVAR008" : ["sub_NVAR008_251016_rest1_fooof_vertex", "sub_NVAR008_251017_rest1_fooof_vertex"], 
        "sub_NVAR010" : ["sub_NVAR010_251027_rest1_fooof_vertex", "sub_NVAR010_251028_rest1_fooof_vertex"], 
        "sub_NVAR999" : ["sub_NVAR999_251016_rest1_fooof_vertex", "sub_NVAR999_251017_rest1_fooof_vertex"]
    },
    "week" : {
        "sub_NVAR008" : ["sub_NVAR008_251016_rest1_fooof_vertex", "sub_NVAR008_251023_rest1_fooof_vertex"], 
        "sub_NVAR010" : ["sub_NVAR010_251027_rest1_fooof_vertex", "sub_NVAR010_251103_rest1_fooof_vertex"], 
        "sub_NVAR999" : ["sub_NVAR999_251016_rest1_fooof_vertex", "sub_NVAR999_251023_rest1_fooof_vertex"]
    },
    "month" : {
        "sub_NVAR008" : ["sub_NVAR008_251016_rest1_fooof_vertex", "sub_NVAR008_251113_rest1_fooof_vertex"], 
        "sub_NVAR010" : ["sub_NVAR010_251027_rest1_fooof_vertex", "sub_NVAR010_251124_rest1_fooof_vertex"], 
        "sub_NVAR999" : ["sub_NVAR999_251016_rest1_fooof_vertex", "sub_NVAR999_251113_rest1_fooof_vertex"]
    }
}

# Test with sparse r-values
r = 0 # Just the same window in both scans

scale = "40min"

# Initialize rows, which will turn into the dataframe that will contain ICC estimates for each vertex
rows = []

# 100 iterations
for i in range(100): 

    # Randomly sample two time windows that are separated by a distance r
    np.random.seed(i)
    windowA = np.random.choice(range(N_WINDOWS - r))
    windowB = windowA + r

    # Loop through all vertices
    for vertex in range(N_VERTICES):
        
        # Since ICC has to be computed separately for each vertex, you need a mini-dataframe for each one
        df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"])

        # For each subject, extract features from time windows
        for subject in comparison_dictionary[scale]: 

            # Load files for this vertex
            fileA = pd.read_csv(os.path.join(base_dir, "output", comparison_dictionary[scale][subject][0] + str(vertex) + ".csv"))
            fileB = pd.read_csv(os.path.join(base_dir, "output", comparison_dictionary[scale][subject][1] + str(vertex) + ".csv"))

            # Extract features from windows A and B - call these valA and valB
            valA = fileA.loc[windowA, "exponent"]
            valB = fileB.loc[windowB, "exponent"]

            # Add the pair of values to the dataframe
            df_vertex.loc[len(df_vertex)] = [subject, "valA", valA]
            df_vertex.loc[len(df_vertex)] = [subject, "valB", valB]

        # At this point, df_vertex has two vectors - one containing each valA per subject, and one of each valB
        # Compute ICC(A, 1) (single-rater) (ICC2)
        results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        icc2 = results.loc[results['Type'] == 'ICC2', 'ICC'].values[0]

        # Add ICC for this vertex to the total output
        rows.append([vertex, icc2])

# Save the entire thing
# There will be 100 ICC estimates for each vertex, meaning that the output will have length N_VERTICES*100
df = pd.DataFrame(rows, columns=["VERTEX", "ICC"])
# Later, need to Fisher transform per vertex
with open(os.path.join(base_dir, "ICC_" + scale + str(r) + ".pkl"), "wb") as f: 
    pickle.dump(df, f)

In [None]:
rows

In [None]:
# BETWEEN-RUN

# Filenames
subject1_file = "sub_NVAR008_251016_rest1_fooof_window"
subject2_file = "sub_NVAR010_251027_rest1_fooof_window"

r = 2

for i in range(100): 

    # Get indices of valA and valB
    np.random.seed(i)
    windowA = np.random.choice(range(N_WINDOWS - r + 1))
    windowB = windowA + r

    # The dataframe will have the ICC (and CI) at each vertex
    df = pd.DataFrame(columns=["VERTEX", "ICC2", "CI"])

    # Loop through vertices
    for vertex in range(N_VERTICES):
        
        # Since ICC has to be computed separately for each vertex, you need a dataframe for each vertex
        df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"])

        # For each participant, for each vertex, extract valA and valB
        for subject_file in [subject1_file, subject2_file]: 

            # Extract subject name
            subject = re.search(r'sub_[A-Z0-9]+', subject_file).group()

            # Load file
            valA_file = pd.read_csv(os.path.join(base_dir, "output", subject_file + str(windowA) + ".csv"))
            valB_file = pd.read_csv(os.path.join(base_dir, "output", subject_file + str(windowB) + ".csv"))

            # Extract values
            valA = valA_file.loc[vertex, "exponent"]
            valB = valB_file.loc[vertex, "exponent"]

            print(vertex)
            print(subject)
            print("valA " + str(valA))
            print("valB " + str(valB))

            # Add the pair of values
            df_vertex.loc[len(df_vertex)] = [subject, "valA", valA]
            df_vertex.loc[len(df_vertex)] = [subject, "valB", valB]

        # Compute ICC on this vertex
        results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        icc2 = results.loc[1, 'ICC']
        ci = results.loc[1, "CI95%"]

        # Add to big df
        df.loc[len(df)] = [vertex, icc2, ci]

# Save the entire thing
with open(os.path.join(base_dir, "ICC_" + str(r) + ".pkl"), "wb") as f: 
    pickle.dump(df, f)

# Fisher transform


In [None]:
subject_file = "sub_NVAR008_251016_rest1_fooof_window"
valA_file = pd.read_csv(os.path.join(base_dir, "output", subject_file + str(windowA) + ".csv"))

In [None]:
base_path = "C:/meg/0215_NVAR_sprint_fooof"
prefix = "sub_NVAR008_251016_rest1"
input_path = os.path.join(base_path, prefix)

print("Path to stc: " + input_path)
stc = mne.read_source_estimate(input_path)
print("stc loaded")

print("Morphing to fsaverage:")
subject = re.search(r'sub_[A-Z0-9]+', prefix).group()
morph = mne.compute_source_morph(
    stc,
    subject_from = subject, 
    subject_to = "fsaverage", # to fsaverage
    subjects_dir = "C:/meg/NVAR_ICC_day/MRI/freesurfer"
    )
stc_morphed = morph.apply(stc)
stc_morphed.save(os.path.join(base_path, prefix + "_morphed"))
print("Morph done")


In [None]:
# SPARSE ICC
# Reference point always visit1_scan1

base_filename = "sub_NVAR008_251016_rest1_fooof_window"

r = 2
valA = 0
valB = valA + r

valAfile = pd.read_csv(os.path.join(base_dir, "output", base_filename + str(valA) + ".csv"))
valBfile = pd.read_csv(os.path.join(base_dir, "output", base_filename + str(valB) + ".csv"))

# Loop through vertices
for vertex in range(len(example_stc.vertices)):

    # Since ICC has to be computed separately for each vertex, you need a dataframe for each vertex
    df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"])

    # Get valA for this vertex

                            valA = stc.data[vertex, window]
                        print(valA)
                        valB = stc.data[vertex, window+2]
                        print(valB)

                        # Add the pair of values
                        df_vertex.loc[len(df_vertex)] = ["_".join([subject, session, run]), "valA", valA]
                        df_vertex.loc[len(df_vertex)] = ["_".join([subject, session, run]), "valB", valB]

    # Get valB for this vertex 










# Loop through band, subject, scan, and run
for band in bands: 

    # Create a dataframe just for this band
    # The dataframe will have the ICC (and CI) at each vertex
    df = pd.DataFrame(columns=["VERTEX", "ICC2", "CI"])

    for vertex in range(len(stc.vertices)):

        # Create a dataframe just for this vertex
        df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"]) # subject_scan_run, valA/B, power
        # why is target subject_scan_run? because otherwise there'd be too much variability. We are trying to estimate ICC WITHIN a run

        # Loop through all runs for all sessions for all subjects
        for subject in dictionary: 
            for session in dictionary[subject]:
                for run in runs: 

                    # Read data
                    # TODO

                    # Loop through run windows; the run has 55 windows, numbered 0-54
                    for window in range(53): # Need to stop at 52 so that the final valB is = 54
                        print("Value A: " + str(window))
                        print("Value B: " + str(window+2))

                        valA = stc.data[vertex, window]
                        print(valA)
                        valB = stc.data[vertex, window+2]
                        print(valB)

                        # Add the pair of values
                        df_vertex.loc[len(df_vertex)] = ["_".join([subject, session, run]), "valA", valA]
                        df_vertex.loc[len(df_vertex)] = ["_".join([subject, session, run]), "valB", valB]

        # Compute ICC on this vertex
        results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        icc2 = results.loc[1, 'ICC']
        ci = results.loc[1, "CI95%"]

    # Add to big df
    df.loc[len(df)] = [vertex, icc2, ci]

    # Save the entire thing
    with open("C:/meg/NVAR_ICC/ICC_inrun_" + band + ".pkl", "wb") as f: 
        pickle.dump(df, f)

In [None]:
# ACROSS RUNS: REST1 VS REST2
# Call values from rest1 "valA" and values from rest2 "valB"

# Loop through band, subject, scan, and run
for band in bands: 

    # Create a dataframe just for this band
    # The dataframe will have the ICC (and CI) at each vertex
    df = pd.DataFrame(columns=["VERTEX", "ICC2", "CI"])

    for vertex in range(len(stc.vertices)):

        # Create a dataframe just for this vertex
        df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"]) # subject_scan, valA/B, power
        # why is target subject_scan_run? because otherwise there'd be too much variability. We are trying to estimate ICC within a run 

        # Loop through all runs for all sessions for all subjects
        for subject in dictionary: 
            for session in dictionary[subject]:

                # Loop through run windows; the run has 55 windows, numbered 0-54
                for window in range(55): 

                    # Get file for runA
                    # Read data
                    # TODO
                    stc_rest1 = 
                    valA = stc_rest1.data[vertex, window]

                    # Get file for runB
                    stc_rest2 = 
                    valB = stc_rest2.data[vertex, window]

                    # Add the pair of values
                    df_vertex.loc[len(df_vertex)] = ["_".join([subject, session]), "valA", valA]
                    df_vertex.loc[len(df_vertex)] = ["_".join([subject, session]), "valB", valB]

        # Compute ICC on this vertex
        results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        icc2 = results.loc[1, 'ICC']
        ci = results.loc[1, "CI95%"]

    # Add to big df
    df.loc[len(df)] = [vertex, icc2, ci]

    # Save the entire thing
    with open("C:/meg/NVAR_ICC/ICC_rest1-vs-rest2_" + band + ".pkl", "wb") as f: 
        pickle.dump(df, f)

In [None]:
# ACROSS DAYS: TODO!!!!!!!
# Call values from rest1 "valA" and values from rest2 "valB"

# Loop through band, subject, scan, and run
for band in bands: 

    # Create a dataframe just for this band
    # The dataframe will have the ICC (and CI) at each vertex
    df = pd.DataFrame(columns=["VERTEX", "ICC2", "CI"])

    for vertex in range(len(stc.vertices)):

        # Create a dataframe just for this vertex
        df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"]) # subject_scan, valA/B, power
        # why is target subject_scan_run? because otherwise there'd be too much variability. We are trying to estimate ICC within a run 

        # Loop through all runs for all sessions for all subjects
        for subject in dictionary: 


            # 
            for session in dictionary[subject]:

                # Loop through run windows; the run has 55 windows, numbered 0-54
                for window in range(55): 

                    # Get file for runA
                    # Read data
                    # TODO
                    stc_rest1 = 
                    valA = stc_rest1.data[vertex, window]

                    # Get file for runB
                    stc_rest2 = 
                    valB = stc_rest2.data[vertex, window]

                    # Add the pair of values
                    df_vertex.loc[len(df_vertex)] = ["_".join([subject, session]), "valA", valA]
                    df_vertex.loc[len(df_vertex)] = ["_".join([subject, session]), "valB", valB]

        # Compute ICC on this vertex
        results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
        icc2 = results.loc[1, 'ICC']
        ci = results.loc[1, "CI95%"]

    # Add to big df
    df.loc[len(df)] = [vertex, icc2, ci]

    # Save the entire thing
    with open("C:/meg/NVAR_ICC/ICC_rest1-vs-rest2_" + band + ".pkl", "wb") as f: 
        pickle.dump(df, f)

In [None]:
# DAYS AS RATERS
"""
Shape of stc_average: 
array([[0.0053271 ],
       [0.00452035],
       [0.005533  ],
       ...,
       [0.00313858],
       [0.003465  ],
       [0.00346861]], shape=(20484, 1), dtype=float32)
"""

# Loop through bands
# for band in bands: 
band = "alpha" # test

# Vertex-wise ICC
df = pd.DataFrame(columns=["VERTEX", "ICC2", "CI"])

# Loop through vertices
# There are 20484 vertices
for vertex in range(20484): 

    df_vertex = pd.DataFrame(columns=["TARGET", "RATER", "RATING"]) # subject, session, power

    # Add the value for this vertex for average-morph for each subject/session 
    for subject in dictionary: 
        for session in dictionary[subject]: 
            
            stc_dir = os.path.join(base_dir, "sub_" + subject, session, "beamformer", "stc")
            stc_average_name = os.path.join(stc_dir, "sub_" + subject + "_average_raw_tsss_beamformer_" + band + "_stc_morphed")
            stc = mne.read_source_estimate(stc_average_name)
            power = stc.data[vertex]

            df_vertex.loc[len(df_vertex)] = [subject, session, power[0]]

    # Compute ICC on this
    results = pg.intraclass_corr(data=df_vertex, targets='TARGET', raters='RATER', ratings='RATING')
    icc2 = results.loc[1, 'ICC']
    ci = results.loc[1, "CI95%"]

    # Add to big df
    df.loc[len(df)] = [vertex, icc2, ci]

    # Save the entire thing
    with open("C:/meg/NVAR_ICC/vertexwise_ICC_" + band + ".pkl", "wb") as f: 
        pkl.dump(df, f)

In [None]:
# Plot whole-brain ICC maps

def plot_stc_grid(stcs, labels):
    """
    Plot a list of STCs in an nx3 grid (dorsal, right lateral, left lateral).
    
    Parameters
    ----------
    stcs : list of SourceEstimate
        One STC per row.
    labels : list of str
        Row labels. 
    """
    views = ['lateral', 'dorsal', 'lateral']
    hemis = ['rh', 'both', 'lh']

    clim = dict(kind='value', lims=[0, 0.5, 1])
    colormap = 'viridis'

    n = len(stcs)
    fig, axes = plt.subplots(n, 4, figsize=(13, 4 * n),
                         gridspec_kw=dict(width_ratios=[1, 1, 1, 0.05]))

    if n == 1:
        axes = axes[np.newaxis, :]

    for row, stc in enumerate(stcs):
        for col, (view, hemi) in enumerate(zip(views, hemis)):

            brain = stc.plot(
                subject='fsaverage',
                subjects_dir="C:/meg/NVAR_ICC_day/MRI/freesurfer/",
                hemi=hemi,
                views=view,
                clim=clim,
                colormap= colormap, 
                background='white', 
                #surface = "pial", 
                colorbar = False
            )

            img = brain.screenshot()
            axes[row, col].imshow(img)
            axes[row, col].axis('off')
            plt.close('all')


        mne.viz.plot_brain_colorbar(axes[row, 3], clim, colormap, label="ICC")

        axes[row, 0].text(-0.1, 0.5, labels[row], transform=axes[row, 0].transAxes,
                  fontsize=20, va='center', ha='right', rotation=0)

    col_titles = ['Right Lateral', 'Dorsal', 'Left Lateral']
    for col, title in enumerate(col_titles):
        axes[0, col].set_title(title, fontsize=20)

    fig.tight_layout()
    return fig

def make_and_plot_stc(files, labels): 
    """
    Given a list of paths to pickle files, makes a list of stcs, then passes them to a function for plotting
    
    Parameters
    ----------
    pkls : list of pkl, each a dataframe with columns: vertex, ICC2, CI
    labels : list of str

    """

    stcs = []

    for file in files: 

        with open(pkl, "rb") as f: 
            df = pickle.load(f)

        stc = mne.SourceEstimate(
            data = df["ICC2"].to_numpy(),
            vertices = [np.arange(0, 10242), np.arange(0, 10242)], 
            tmin=0,
            tstep=1,
            subject="fsaverage"
        )

        stcs.append(stc)
    
    return plot_stc_grid(stcs, labels)


fig = make_and_plot_stc(
    pkls=["C:/meg/NVAR_ICC_day/ICC_alpha.pkl", "C:/meg/NVAR_ICC_day/ICC_beta.pkl", "C:/meg/NVAR_ICC_day/ICC_g_low.pkl"],
    labels=['Alpha', 'Beta', 'Gamma']
)

fig.savefig('C:/meg/NVAR_ICC_day/brain_grid.png', dpi=300, bbox_inches='tight')