**Get Packages**

In [29]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot, plot
import colormaps as cmaps
import seaborn as sns
import matplotlib.colors as mc
import matplotlib.pyplot as plt

# include matplotlib widget
%matplotlib widget

# Run this line if you are in a Jupyter Notebook environment
init_notebook_mode(connected=True)

# define paths for data and output
paths = {"base": "C://Users//avonl//OneDrive//Work//Research//projects//2023 - FRESH//code//FRESH//", 
         "alex": "alex//", 
         "data": "data//", 
         "tempfigs": "figs",
         "finalfigs": "figures//" 
        }

# set colormap name
colormap_name = "tofino"
# get color map
cmap = getattr(cmaps, colormap_name)

#plot labels
plot_labels = True

**load CSV file and clean data**

In [30]:
# loads csv to pandas dataframe
df = pd.read_csv(paths["base"]+paths["data"]+"FreshData.csv", encoding='utf-16', delimiter='\t')

# unifies NaNs: Replace all "NaN" values in df with "nan"
df = df.replace(np.nan, 'nan', regex=True)

# specific for this analysis: replace all "NaN"s in df with "Unknown"
df = df.replace('nan', 'Unknown', regex=True)

**Select and Prepare Data for Signal Analysis**

In [31]:
# selection of columns to keep for signal analysis
columns_sig = ['Study', 'ID', 'Toolbox used', \
           'Quality/Pruning: Method','Motion Artifact Method', \
           'Resample/Downsample (Hz)','Filtering Coding', \
           'Removal of the Global Signals during Preprocessing', \
           'Non-GLM: Method', 'GLM: Method', 'GLM: HRF Regressor', \
           'GLM: Other Regressors', 'Block Averaging: Modifiers']
df_siganalysis = df[columns_sig]

# selection of a dedicated number of rows to pull out high-sorensen candidates 
# only high scores
ID = [4,7,18,19,20,22,25,26,31,33,34,36,37]
# all scores
# ID = [4,7,18,19,20,22,25,26,27,28,29,31,33,34,36,37]

# in df_siganalysis and column "Study" drop all rows that contain "Study 2"
df_siganalysis = df_siganalysis[~df_siganalysis['Study'].str.contains('Study 2')]
# in df_siganalysis keep only rows that contain the IDs in the list ID in column "ID"
df_siganalysis = df_siganalysis[df_siganalysis['ID'].isin(ID)]
# in df_siganalysis keep only rows that do not contain the IDs in the list ID in column "ID"
#df_siganalysis = df_siganalysis[~df_siganalysis['ID'].isin(ID)]


# rename 'Non-GLM Method' to 'Approach'
df_siganalysis = df_siganalysis.rename(columns={'Non-GLM: Method': 'Approach'})

# remove lines that contain "Not investigated" in column "Toolbox used"
df_siganalysis = df_siganalysis[~df_siganalysis['Toolbox used'].str.contains('Not investigated')]

# Removes "Custom: " substring from the entries in column "Toolbox used" of df_analysis and keeps only the rest.
df_siganalysis['Toolbox used'] = df_siganalysis['Toolbox used'].str.replace('Custom:', '')
df_siganalysis['Toolbox used'] = df_siganalysis['Toolbox used'].str.replace('Custom: ', '')

column_none = ['Quality/Pruning: Method', 'Motion Artifact Method', \
    'Resample/Downsample (Hz)', 'Filtering Coding']
none_txt = ['No Pruning', 'No Artifact Removal', 'No Resampling', 'No Filtering']
# rename entries in columns indicated in column_none that contain an "Unknown" to the corresponding entry in none_txt
for i in range(len(column_none)):
    df_siganalysis[column_none[i]] = df_siganalysis[column_none[i]].replace('Unknown', none_txt[i], regex=True)

    
# rename "Other" entries in column 'Quality/Pruning: Method' to 'Other Pruning Method'
df_siganalysis['Quality/Pruning: Method'] = df_siganalysis['Quality/Pruning: Method'].replace('Other', 'Other Pruning Method', regex=True)
# In "Quality/Pruning: Method" rename "Manual Selection" and "Visual Inspection of Time Domain" to "Manual Selection"
df_siganalysis['Quality/Pruning: Method'] = df_siganalysis['Quality/Pruning: Method'].replace('Visual Inspection of Time Domain', 'Manual Selection', regex=True)

# rename "Other" entries in column 'Motion Artifact Method' to 'Other Removal Method'
df_siganalysis['Motion Artifact Method'] = df_siganalysis['Motion Artifact Method'].replace('Other', 'Other Removal Method', regex=True)
# rename "Rejection" entries in column 'Motion Artifact Method' to 'Trial Rejection'
df_siganalysis['Motion Artifact Method'] = df_siganalysis['Motion Artifact Method'].replace('Rejection', 'Trial Rejection', regex=True)

# rename "Unknown"" entries in column 'Removal of the Global Signals during Preprocessing' to 'No Removal''
df_siganalysis['Removal of the Global Signals during Preprocessing'] = df_siganalysis['Removal of the Global Signals during Preprocessing'].replace('No', 'No Removal', regex=True)
# rename "No" entries in column 'Removal of the Global Signals during Preprocessing' to 'No Removal''
df_siganalysis['Removal of the Global Signals during Preprocessing'] = df_siganalysis['Removal of the Global Signals during Preprocessing'].replace('Unknown', 'No Removal', regex=True)
# rename "Other" entries in column 'Removal of the Global Signals during Preprocessing' to 'Other Removal''
df_siganalysis['Removal of the Global Signals during Preprocessing'] = df_siganalysis['Removal of the Global Signals during Preprocessing'].replace('Other', 'Other Method', regex=True)

# rename "Unknown" entries in column 'GLM: Method' to 'Unknown Method'
df_siganalysis['GLM: Method'] = df_siganalysis['GLM: Method'].replace('Unknown', 'Unknown Method', regex=True)
# rename "Default" entries in column 'GLM: Method' to 'Default Method'
df_siganalysis['GLM: Method'] = df_siganalysis['GLM: Method'].replace('Default', 'Default Method', regex=True)
# replace any entry in column "GLM: Method" that contains the string "AR-IRLS" completely with only "AR-IRLS"
df_siganalysis['GLM: Method'] = df_siganalysis['GLM: Method'].replace('AR-IRLS.*', 'AR-IRLS', regex=True)
# rename "Other" entries in column 'GLM: Method' to 'Other Method'
df_siganalysis['GLM: Method'] = df_siganalysis['GLM: Method'].replace('Other', 'Other Solver', regex=True)


# rename "Unknown" entries in column 'GLM: HRF Regressor' to 'Unknown HRF Regressor'
df_siganalysis['GLM: HRF Regressor'] = df_siganalysis['GLM: HRF Regressor'].replace('Unknown', 'Unknown HRF Regressor', regex=True)
# rename "Default" entries in column 'GLM: HRF Regressor' to 'Default HRF Regressor'
df_siganalysis['GLM: HRF Regressor'] = df_siganalysis['GLM: HRF Regressor'].replace('Default', 'Default HRF Regressor', regex=True)

# rename "Unknown" entries and "Other" entries in column 'GLM: Other Regressors' to 'Unknown Other Regressors'
df_siganalysis['GLM: Other Regressors'] = df_siganalysis['GLM: Other Regressors'].replace('Unknown', 'Unknown Additional Regressors', regex=True)
df_siganalysis['GLM: Other Regressors'] = df_siganalysis['GLM: Other Regressors'].replace('Other', 'Unknown Additional Regressors', regex=True)


# for all rows that contain "Block Averaging" or "Other" in column "Approach"... 
rw_idx = df_siganalysis.loc[df_siganalysis['Approach'].str.contains('Block Averaging')].index
rw_idx = rw_idx.append(df_siganalysis.loc[df_siganalysis['Approach'].str.contains('Other')].index)
#...
#...
# replace the the cell in column "GLM: Method" with the corresponding cell from "Block Averaging: Modifiers" for these rows
df_siganalysis.loc[rw_idx, 'GLM: Method'] = df_siganalysis.loc[rw_idx, 'Block Averaging: Modifiers']
# for these rows, replace "Unknown" with "No Correction"
df_siganalysis.loc[rw_idx, 'GLM: Method'] = df_siganalysis.loc[rw_idx, 'GLM: Method'].replace('Unknown', 'No Correction', regex=True)

# ...replace "Unknown HRF Regressor" in column "GLM: HRF Regressor" with an empty string
df_siganalysis.loc[rw_idx, 'GLM: HRF Regressor'] = df_siganalysis.loc[rw_idx, 'GLM: HRF Regressor'].replace('Unknown HRF Regressor', '', regex=True)
# ...replace "Unknown Other Regressor" in column "GLM: Other Regressors" with an empty string
df_siganalysis.loc[rw_idx, 'GLM: Other Regressors'] = df_siganalysis.loc[rw_idx, 'GLM: Other Regressors'].replace('Unknown Additional Regressors', '', regex=True)

# delete column "Block Averaging: Modifiers"
df_siganalysis = df_siganalysis.drop(columns=['Block Averaging: Modifiers'])

**Plot Toolbox Statistics**

In [32]:
# create distinct entries (rows) for each item in a list of items separated by commas
df_sig_tb = df_siganalysis['Toolbox used']
numentries = len(df_sig_tb)
df_sig_tb = df_sig_tb.apply(lambda x: x.split(','))
df_sig_tb = df_sig_tb.explode('Toolbox used')
# removes empty entries
df_sig_tb = df_sig_tb[df_sig_tb != '']
# removes all spaces from the beginning and end of each entry
df_sig_tb = df_sig_tb.str.strip()

# Calculate relative frequencies
df_pipe_tb_counts = pd.Series(df_sig_tb).value_counts(normalize=False)/numentries*100
# Create custom text for each slice
custom_text_pipe_tb = [f'{label}<br>{value:.2f}%' for label, value in zip(df_pipe_tb_counts.index, df_pipe_tb_counts)]

# extract and assign colors from colormap
num_categories=len(df_pipe_tb_counts)
# Sample colors from the colormap
#colors = cmap(np.linspace(0, 1, num_categories))
colors = cmap.discrete(num_categories)
# Convert colors to hex format
plotly_colors = ['#' + ''.join([f'{int(c * 255):02x}' for c in colors(i)[:3]]) for i in range(num_categories)]


# plotting with or without labels
lb_tmp = df_pipe_tb_counts.index
txt_tmp = custom_text_pipe_tb
if not plot_labels:
    lb_tmp = [''] * len(df_pipe_tb_counts.index)
    txt_tmp = [''] * len(custom_text_pipe_tb)

# Create the pie chart
fig = go.Figure(data=go.Pie(
    labels=lb_tmp,
    values=df_pipe_tb_counts.values,
    text = txt_tmp,
    textposition='auto',
    textinfo = 'text',
    textfont=dict(size=16),
    marker=dict(colors=plotly_colors)
))


# Set the layout
fig.update_layout(
    title='Use of Tools and Toolboxes for Analysis',
    showlegend=False
)

# Show the chart
fig.show()
# Save the chart
plot(fig, filename='figs/pie_AnalysisToolboxes_highSorensen'+'_label_'+str(plot_labels)+'.html')

'figs/pie_AnalysisToolboxes_highSorensen_label_True.html'

**Prepare Data and Nodes for Analysis Sankey Diagram**

In [33]:

# save number of entries in df_siganalysis
numentries_sa = len(df_siganalysis)
# save copy of df_siganalysis
df_siganalysis_notexp = df_siganalysis.copy()

# split entries and save relative weights in corresponding new columns
""" for columnName in column_list:
    df_siganalysis['weight '+columnName] = df_siganalysis[columnName].apply(lambda x: 1 / len(x.split(',')) if x else 1)
    df_siganalysis[columnName] = df_siganalysis[columnName].apply(lambda x: x.split(','))
    df_siganalysis = df_siganalysis.explode(columnName) """


## ---- dealing with multiple choice entries ---- ##

# insert a new column in df_signalanalysis behind 'Quality/Pruning: Method', which is called 'Quality/Pruning: Simplified'
# For each row, copy the element of 'Quality/Pruning: Method' if it does not contain comma separated values. 
# If it does, insert the string "Combination of Metrics" in the new column.
df_siganalysis.insert(df_siganalysis.columns.get_loc('Quality/Pruning: Method')+1, 'Quality/Pruning: Simplified', df_siganalysis['Quality/Pruning: Method'])
df_siganalysis['Quality/Pruning: Simplified'] = df_siganalysis['Quality/Pruning: Simplified'].apply(lambda x: x if ',' not in x else 'Combination of Metrics')
# copy only column 'Quality/Pruning: Method' from df_signalanalysis into a new pandas dataframe. remove all entries without comma (single entries). then split and explode it
df_siganalysis_quality = df_siganalysis['Quality/Pruning: Method']
df_siganalysis_quality = df_siganalysis_quality[df_siganalysis_quality.str.contains(',')]
b = df_siganalysis_quality
df_siganalysis_quality = df_siganalysis_quality.apply(lambda x: x.split(','))
df_siganalysis_quality = df_siganalysis_quality.explode('Quality/Pruning: Method')
# removes all spaces from the beginning and end of each entry
df_siganalysis_quality= df_siganalysis_quality.str.strip()
# Calculate relative frequencies
df_siganalysis_quality_cts = pd.Series(df_siganalysis_quality).value_counts(normalize=False)#/numentries_sa*100
## plot pie chart
# Create custom text for each slice
df_siganalysis_quality_cts_tb = [f'{label}<br>{value:.2f}%' for label, value in zip(df_siganalysis_quality_cts.index, df_siganalysis_quality_cts)]
# plotting with or without labels
lb_tmp = df_siganalysis_quality_cts.index
txt_tmp = df_siganalysis_quality_cts_tb
if not plot_labels:
    lb_tmp = [''] * len(df_siganalysis_quality_cts.index)
    txt_tmp = [''] * len(df_siganalysis_quality_cts_tb)
# Create the pie chart
fig = go.Figure(data=go.Pie(
    labels=lb_tmp,
    values=df_siganalysis_quality_cts.values,
    text = txt_tmp,
    textposition='auto',
    textinfo = 'text',
    textfont=dict(size=16),
    marker=dict(colors=plotly_colors)
))
# Set the layout
fig.update_layout(
    title='Multiple Pruning Metrics',
    showlegend=False
)
# Show the chart
fig.show()
# Save the chart
plot(fig, filename='figs/pie_MultPruningMetrics_highSorensen'+'_label_'+str(plot_labels)+'.html')

# insert a new column in df_signalanalysis behind 'Motion Artifact Method', which is called 'Motion Artifact Method: Simplified'
# For each row, copy the element of 'Motion Artifact Method' if it does not contain comma separated values. 
# If it does, insert the string "Combination of Methods" in the new column.
df_siganalysis.insert(df_siganalysis.columns.get_loc('Motion Artifact Method')+1, 'Motion Artifact Method: Simplified', df_siganalysis['Motion Artifact Method'])
df_siganalysis['Motion Artifact Method: Simplified'] = df_siganalysis['Motion Artifact Method: Simplified'].apply(lambda x: x if ',' not in x else 'Combination of Methods')
# copy only column 'Motion Artifact Method' from df_signalanalysis into a new pandas dataframe. remove all entries without comma (single entries). then split and explode it
df_siganalysis_artefact = df_siganalysis['Motion Artifact Method']
df_siganalysis_artefact = df_siganalysis_artefact[df_siganalysis_artefact.str.contains(',')]
df_siganalysis_artefact = df_siganalysis_artefact.apply(lambda x: x.split(','))
df_siganalysis_artefact = df_siganalysis_artefact.explode('Motion Artifact Method')
# removes all spaces from the beginning and end of each entry
df_siganalysis_artefact= df_siganalysis_artefact.str.strip()
# Calculate relative frequencies
df_siganalysis_artefact_cts = pd.Series(df_siganalysis_artefact).value_counts(normalize=False)/numentries_sa*100
## plot pie chart
# Create custom text for each slice
df_siganalysis_artefact_cts_tb = [f'{label}<br>{value:.2f}%' for label, value in zip(df_siganalysis_artefact_cts.index, df_siganalysis_artefact_cts)]
# plotting with or without labels
lb_tmp = df_siganalysis_artefact_cts.index
txt_tmp = df_siganalysis_artefact_cts_tb
if not plot_labels:
    lb_tmp = [''] * len(df_siganalysis_artefact_cts.index)
    txt_tmp = [''] * len(df_siganalysis_artefact_cts_tb)
# Create the pie chart
fig = go.Figure(data=go.Pie(
    labels=lb_tmp,
    values=df_siganalysis_artefact_cts.values,
    text = txt_tmp,
    textposition='auto',
    textinfo = 'text',
    textfont=dict(size=16),
    marker=dict(colors=plotly_colors)
))
# Set the layout
fig.update_layout(
    title='Combination of Artifact Methods',
    showlegend=False
)
# Show the chart
fig.show()
# Save the chart
plot(fig, filename='figs/pie_MultArtifactMethods_highSorensen'+'_label_'+str(plot_labels)+'.html')

  
# insert a new column in df_signalanalysis behind 'GLM: Other Regressors', which is called 'Motion Artifact Method: Simplified'
# For each row, copy the element of 'Motion Artifact Method' if it does not contain comma separated values. 
# If it does, insert the string "Combination of Methods" in the new column.
df_siganalysis.insert(df_siganalysis.columns.get_loc('GLM: Other Regressors')+1, 'GLM: Other Regressors: Simplified', df_siganalysis['GLM: Other Regressors'])
df_siganalysis['GLM: Other Regressors: Simplified'] = df_siganalysis['GLM: Other Regressors: Simplified'].apply(lambda x: x if ',' not in x else 'Multiple Regressors')
# copy only column 'Motion Artifact Method' from df_signalanalysis into a new pandas dataframe. remove all entries without comma (single entries). then split and explode it
df_siganalysis_otherregress = df_siganalysis['GLM: Other Regressors']
df_siganalysis_otherregress = df_siganalysis_otherregress[df_siganalysis_otherregress.str.contains(',')]
df_siganalysis_otherregress = df_siganalysis_otherregress.apply(lambda x: x.split(','))
df_siganalysis_otherregress = df_siganalysis_otherregress.explode('GLM: Other Regressors')
# removes all spaces from the beginning and end of each entry
df_siganalysis_otherregress= df_siganalysis_otherregress.str.strip()
# Calculate relative frequencies
df_siganalysis_otherregress_cts = pd.Series(df_siganalysis_otherregress).value_counts(normalize=False)/numentries_sa*100
## plot pie chart
# Create custom text for each slice
df_siganalysis_otherregress_cts_tb = [f'{label}<br>{value:.2f}%' for label, value in zip(df_siganalysis_otherregress_cts.index, df_siganalysis_otherregress_cts)]
# plotting with or without labels
lb_tmp = df_siganalysis_otherregress_cts.index
txt_tmp = df_siganalysis_otherregress_cts_tb
if not plot_labels:
    lb_tmp = [''] * len(df_siganalysis_otherregress_cts.index)
    txt_tmp = [''] * len(df_siganalysis_otherregress_cts_tb)
# Create the pie chart
fig = go.Figure(data=go.Pie(
    labels=lb_tmp,
    values=df_siganalysis_otherregress_cts.values,
    text = txt_tmp,
    textposition='auto',
    textinfo = 'text',
    textfont=dict(size=16),
    marker=dict(colors=plotly_colors)
))
# Set the layout
fig.update_layout(
    title='Combination of Other Regressors',
    showlegend=False
)
# Show the chart
fig.show()
# Save the chart
plot(fig, filename='figs/pie_MultRegressors_highSorensen'+'_label_'+str(plot_labels)+'.html')





## ---- creating nodes for sankey ---- ##

# create distinct entries (rows) for each item in a list of items separated by commas (e.g. for different Toolboxes) for data that will be used for sankey.
column_list = ['Quality/Pruning: Simplified', 'Motion Artifact Method: Simplified', 'Resample/Downsample (Hz)', \
    'Filtering Coding', 'Removal of the Global Signals during Preprocessing', 'Approach', \
        'GLM: Method', 'GLM: HRF Regressor', 'GLM: Other Regressors: Simplified']



# Create a list of all nodes (i.e., unique values in the listed columns in column_list)
nodes = pd.concat([df_siganalysis[col] for col in column_list]).unique()

# Create a dictionary that maps each node to a unique index
node_indices = {node: i for i, node in enumerate(nodes)}

# Create lists to store the source, target, and value for each link
source, target, value = [], [], []

for i, row in df_siganalysis.iterrows():
    for j in range(len(column_list) - 1):
        current_col = column_list[j]
        next_col = column_list[j + 1]
        # Check if current_col or next_col is an empty string
        if row[current_col] == '' or row[next_col] == '':
            break

        source.append(node_indices[row[current_col]])
        target.append(node_indices[row[next_col]])
        value.append(1)


# Generate distinct colors from the seaborn spectral color palette
num_colors = len(nodes)
palette = cmap(np.linspace(0, 1, num_colors))
# Define colors for each node
node_colors_hex = [mc.to_hex(color) for color in palette]

# Function to convert hex color to RGBA with specified transparency
def hex_to_rgba(hex_color, alpha=0.5):
    # Convert hex to an RGB tuple
    rgb_tuple = mc.to_rgb(hex_color)
    # Convert the RGB tuple to an RGBA string with the specified alpha value
    return "rgba({r},{g},{b}, {alpha})".format(r=int(rgb_tuple[0]*255), g=int(rgb_tuple[1]*255), b=int(rgb_tuple[2]*255), alpha=alpha)

# initialize link_colors with length of source with empty strings
link_colors = [''] * len(source)
node_colors = [''] * len(nodes)

# Generate link and node colors 
for i in range(len(nodes)):
    # node colors in rgba and not transparent
    node_colors[i] = hex_to_rgba(node_colors_hex[i], alpha=1)
for s in source:
    # link colors based on the color of their source node with 0.5 transparency
    link_colors[s] = hex_to_rgba(node_colors_hex[source[s]], alpha=0.5)


**Calculate Statistics/Frequencies of Entries in Analysis**

Count the times that methods/steps were performed and report the relative frequencies in percent (sometimes several steps from the same stage are performed. to correct for this use the weights in weights_signalanalysis)

In [34]:
# count the occurence of all items from the list of nodes in df_signalanalysis and save it in a dictionary
abs_node_count = {}

for node in nodes:
# find columns and row indices of values that contain the node
    col_name = [col for col in column_list if node in df_siganalysis[col].unique()]
    # Find rows where 'node' is present
    mask = df_siganalysis.apply(lambda x: node in x.values, axis=1)
    # Get the indices of rows where 'node' is found
    row_indices = mask[mask].index.tolist()
    # calculate the sum of the weights of the rows where 'node' is found
    #abs_node_count[node] = weights_siganalysis.loc[row_indices, col_name].sum(axis=0)[0]
    abs_node_count[node] = df_siganalysis.apply(lambda x: node in x.values, axis=1).sum()

print(abs_node_count)


# calculate relative node count in % by normalizing with the total number of rows in df_signalanalysis
rel_node_count = {key: (abs_node_count[key] / numentries_sa)*100 for key in abs_node_count.keys()}
# round the relative node count to 1 decimal
rel_node_count = {key: round(rel_node_count[key], 1) for key in rel_node_count.keys()}

print(rel_node_count)

# Make the label list for the nodes, adding the relative node count in % to the node name
node_labels = [node + ' (' + str(rel_node_count[node]) + '%)' for node in nodes] 

{'Other Pruning Method': 1, 'No Pruning': 2, 'Combination of Metrics': 8, 'SCI (Scalp-Coupling Index)': 2, 'Trial Rejection': 1, 'TDDR': 1, 'Combination of Methods': 3, 'Targeted PCA': 2, 'No Artifact Removal': 3, 'Hybrid spline wavelet': 3, 'No Resampling': 11, '4': 1, '3': 1, 'BP': 8, 'LP': 4, 'No Filtering': 1, 'Other Method': 6, 'No Removal': 6, 'PCA': 1, 'Block Averaging': 1, 'GLM': 12, 'Detrending': 1, 'OLS   ': 6, 'AR-IRLS': 6, '': 1, 'SPM': 3, 'Canonical': 2, 'Glover': 1, 'Gamma': 4, 'Consecutive Gaussian': 2, 'Short Channels': 6, 'Multiple Regressors': 3, 'Unknown Additional Regressors': 2, 'PCA of the Short Channels': 1}
{'Other Pruning Method': 7.7, 'No Pruning': 15.4, 'Combination of Metrics': 61.5, 'SCI (Scalp-Coupling Index)': 15.4, 'Trial Rejection': 7.7, 'TDDR': 7.7, 'Combination of Methods': 23.1, 'Targeted PCA': 15.4, 'No Artifact Removal': 23.1, 'Hybrid spline wavelet': 23.1, 'No Resampling': 84.6, '4': 7.7, '3': 7.7, 'BP': 61.5, 'LP': 30.8, 'No Filtering': 7.7, 'Oth

**Plot Analysis Sankey Diagram**

In [35]:

# plotting with or without labels
lb_tmp = node_labels
if not plot_labels:
    lb_tmp = [''] * len(node_labels)

fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color='black', width=0.5),
        label=lb_tmp,
        color= node_colors  # Assign the colors to the nodes
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        #color= link_colors  # Assign the colors to the links
    )
)])

fig.update_layout(
    autosize=True,
)

## Rename column labels
column_labels = column_list
# remove ":" from elements in column_labels
column_labels = [x.replace(':', '') for x in column_labels]
# rename "Approach" to "Solver / Modifiers"
column_labels = [x.replace('Approach', 'HRF Estimation') for x in column_labels]
# rename "GLM Method" to "Solver / Modifiers"
column_labels = [x.replace('GLM Method', 'Solver / Modifiers') for x in column_labels]
# rename "Removal of the Global Signals during Preprocessing" to "Physiology Preprocessing"
column_labels = [x.replace('Removal of the Global Signals during Preprocessing', 'Physiology Preprocessing') for x in column_labels]
# rename "Resample/Downsample (Hz)" to "Resampling (Hz)"
column_labels = [x.replace('Resample/Downsample (Hz)', 'Resampling (Hz)') for x in column_labels]
# rename "Filtering Coding" to "Filtering"
column_labels = [x.replace('Filtering Coding', 'Filtering') for x in column_labels]

# Calculate the x position for each column's label.
# This is a rough approximation and may need to be adjusted based on the exact look of your Sankey diagram.
x_positions = [i / (len(column_labels) - 1) for i in range(len(column_labels))]


annotations = [
    dict(
        x=x,
        y=1.05,  # Adjust the y-coordinate as per desired placement
        xref='paper',
        yref='paper',
        text=label,
        showarrow=False,
        font=dict(size=16)
    ) for x, label in zip(x_positions, column_labels)
]

# Update the layout with annotations
fig.update_layout(
    annotations=annotations,
    autosize=True,
    font=dict(size=16),  # Set global font size
)

fig.show()
plot(fig, filename='figs/sankey_analysispipeline_highSorensen'+'label_'+str(plot_labels)+'.html')

'figs/sankey_analysispipeline_highSorensenlabel_True.html'