# Mediators analysis

Imports

In [25]:
import os
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from statsmodels.stats.multitest import multipletests

## Read data

### Read behavioral and questionnarie data

In [26]:
behavioral_data_df = pd.read_csv('../data/current_dataset/current_dataset_behavioral_data.csv')
questionnarie_data_df = pd.read_csv('../data/current_dataset/current_dataset_q_data.csv').rename(columns={'Demo_kod': 'ID'})

# re-code ID
behavioral_data_df['ID'] = behavioral_data_df['Demo_kod'].apply(lambda x: int(x.split('-')[-1]))

# merge behavioral and questionnarie data
data_df = pd.merge(
    behavioral_data_df, 
    questionnarie_data_df, 
    on='ID', 
    how='inner'
)

### Chose columns to analysis

In [27]:
columns_base = [
    'ID',
    'Intolerance of Uncertainty - Prospective Anxiety',
    'Intolerance of Uncertainty - Inhibitory Anxiety',
    'STAI Trait SUM',
    'BIS',
    'Obsessive-Compulsive FULL',
    'Zdecydowanie-FULL',
    'Number error congruent+incongruent',
    'PSWQ',
]

data_df = data_df[columns_base]
data_df.head()

Unnamed: 0,ID,Intolerance of Uncertainty - Prospective Anxiety,Intolerance of Uncertainty - Inhibitory Anxiety,STAI Trait SUM,BIS,Obsessive-Compulsive FULL,Zdecydowanie-FULL,Number error congruent+incongruent,PSWQ
0,0,2.0,1.6,40,3.3,1.555556,2.8,41,64
1,1,4.857143,4.2,52,3.6,2.222222,3.2,26,78
2,2,3.428571,3.0,50,2.7,3.5,3.2,12,60
3,3,2.857143,1.0,32,2.0,1.111111,4.8,14,23
4,4,3.857143,3.0,46,3.0,2.333333,3.8,57,67


## Read EEG data, number of trials in the analysis, and consistency data

In [28]:
d_df = pd.read_csv('../data/current_dataset/sonata_data_standardized.csv')
mapping_df = d_df.drop_duplicates(subset=['ID'])[['ID', 'participant_index']]

ern_consistency_df = pd.read_csv('../results/consistency_test_results/Main_ern_consistency.csv')
crn_consistency_df = pd.read_csv('../results/consistency_test_results/Main_crn_consistency.csv')

# add ID to columns
ern_consistency_df = ern_consistency_df.merge(mapping_df, on='participant_index', how='left').rename(columns={'Internal consistency': 'ERN consistency'})
crn_consistency_df = crn_consistency_df.merge(mapping_df, on='participant_index', how='left').rename(columns={'Internal consistency': 'CRN consistency'})

df_erp = pd.DataFrame()

df_erp[['ID', 'Peak_Ne']] = d_df[d_df['pre_acc'] == -1].groupby('ID')['pre_ne_FCz_standardized'].mean().reset_index()
df_erp[['ID', 'Peak_Nc']] = d_df[d_df['pre_acc'] == 1].groupby('ID')['pre_ne_FCz_standardized'].mean().reset_index()
df_erp[['ID', 'Mean_Ne']] = d_df[d_df['pre_acc'] == -1].groupby('ID')['pre_ne_mean_Fz'].mean().reset_index()
df_erp[['ID', 'Mean_Nc']] = d_df[d_df['pre_acc'] == 1].groupby('ID')['pre_ne_mean_Fz'].mean().reset_index()
df_erp[['ID', 'num_err_trials']] = d_df[d_df['acc'] == -1].groupby('ID')['acc'].count().reset_index()
df_erp[['ID', 'num_corr_trials']] = d_df[d_df['acc'] == 1].groupby('ID')['acc'].count().reset_index()

df_erp[['ID', 'num_err_congruent_trials']] = d_df[(d_df['acc'] == -1) & (d_df['condition'] == 1)].groupby('ID')['acc'].count().reset_index()
df_erp[['ID', 'num_corr_congruent_trials']] = d_df[(d_df['acc'] == 1) & (d_df['condition'] == 1)].groupby('ID')['acc'].count().reset_index()
df_erp[['ID', 'num_err_incongruent_trials']] = d_df[(d_df['acc'] == -1) & (d_df['condition'] == -1)].groupby('ID')['acc'].count().reset_index()
df_erp[['ID', 'num_corr_incongruent_trials']] = d_df[(d_df['acc'] == 1) & (d_df['condition'] == -1)].groupby('ID')['acc'].count().reset_index()

df_erp = df_erp.merge(ern_consistency_df[['ERN consistency', 'ID']], on='ID', how='left').merge(crn_consistency_df[['CRN consistency', 'ID']], on='ID', how='left')

df_erp.head()

Unnamed: 0,ID,Peak_Ne,Peak_Nc,Mean_Ne,Mean_Nc,num_err_trials,num_corr_trials,num_err_congruent_trials,num_corr_congruent_trials,num_err_incongruent_trials,num_corr_incongruent_trials,ERN consistency,CRN consistency
0,0,-1.512579,0.228717,-19.205271,-6.162771,32,187,5.0,148,32,39,0.918129,0.971903
1,1,-1.151616,0.115922,-15.218654,-5.723672,25,224,3.0,172,25,52,0.926275,0.973752
2,2,-0.834879,-0.024213,-1.794045,6.138015,9,200,4.0,137,4,63,0.702198,0.97332
3,3,-2.261904,0.129423,-12.632415,4.486968,13,257,4.0,189,13,68,0.893062,0.984998
4,4,-1.089271,0.18069,-9.309608,1.194268,36,158,1.0,134,33,24,0.961469,0.96675


### Read DDM results

In [29]:
path = '../results/models_results/cond_models/sonata'
model = 'drift_boundary_cond'
file_path = f'{path}/{model}/results/{model}_summary_with_bf_log_az.csv'

ddm_results_df = pd.read_csv(file_path).rename(columns={'Unnamed: 0': 'Variable'})

# remove fixed effects
random_effects_ddm_results_df = ddm_results_df[ddm_results_df['Variable'].str.contains('participant')]
# create index column
random_effects_ddm_results_df['participant_index'] = random_effects_ddm_results_df['Variable'].str.extract(r'\[(\d+)\]').astype('Int64')
# add ID column
random_effects_ddm_results_df = pd.merge(random_effects_ddm_results_df, mapping_df, on='participant_index', how='left')
# remove info on index from variable name
random_effects_ddm_results_df['Variable'] = random_effects_ddm_results_df['Variable'].str.replace(r'\[\d+\]', '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  random_effects_ddm_results_df['participant_index'] = random_effects_ddm_results_df['Variable'].str.extract(r'\[(\d+)\]').astype('Int64')


In [30]:
filter = True

In [31]:
if filter:
    variables = [
        'participants_delta_ern',
        'participants_delta_crn',
        'participants_alpha_ern',
        'participants_alpha_crn',
    ]
    
    random_effects_ddm_results_df = random_effects_ddm_results_df[random_effects_ddm_results_df['Variable'].isin(variables)]

### Merge dataframes

In [32]:
results_df = pd.merge(random_effects_ddm_results_df, data_df, on='ID', how='left').merge(df_erp, on='ID',  how='left')

### Create data for correlation

In [33]:
def pivot_df(df, cols, save=None):
    df_pivoted = df.pivot_table(index=cols, columns='Variable', values='mean', aggfunc='mean').reset_index()
    
    final_data_df = df_pivoted.copy().drop(['ID', 'participant_index'], axis=1)
    final_data_df = final_data_df.astype(float)
    
    # save data
    if save is not None:
        final_data_df.to_csv(f'{save}.csv', index=False)
    
    return final_data_df

In [32]:
cols = [
    'participant_index', 
    'Peak_Ne', 
    'Peak_Nc', 
    'num_err_trials', 
    'num_corr_trials',
    'ERN consistency',
    'CRN consistency'
] + columns_base

final_data_df = pivot_df(results_df, cols, save = '../results/correlation_analysis/behavioral_results_ddm_for_correlation_main')

## Create a correlation matrix

In [33]:
def r_pvalues(df, method='fdr_by'):
    cols = pd.DataFrame(columns=df.columns)
    p = pd.DataFrame(np.ones((df.shape[1], df.shape[1])), columns=df.columns, index=df.columns)
    for r in df.columns:
        for c in df.columns:
            tmp = df[df[r].notnull() & df[c].notnull()]
            p[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)

    # Flatten the p-values matrix for correction
    p_values_flat = p.values[np.triu_indices_from(p, k=1)]
    
    # FDR Correction (Benjamini-Hochberg)
    _, p_fdr, _, _ = multipletests(p_values_flat, alpha=0.05, method=method)
    
    # Reshape the corrected p-values back into a matrix
    p_fdr_matrix = np.ones_like(p)
    
    p_fdr_matrix[np.triu_indices_from(p, k=1)] = p_fdr
    p_fdr_matrix = p_fdr_matrix.T
    p_fdr_matrix[np.triu_indices_from(p, k=1)] = p_fdr
    
    # Create DataFrames for corrected p-values
    p_fdr_df = pd.DataFrame(p_fdr_matrix, columns=df.columns, index=df.columns)
    return p, p_fdr_df

Read data

In [34]:
dir_path = '../results/correlation_analysis'
dataset_name = 'behavioral_results_ddm_for_correlation_main'

final_data_df = pd.read_csv(f'{dir_path}/{dataset_name}.csv', index_col=False)
final_data_df["STAI Trait SUM"] = final_data_df["STAI Trait SUM"] / 20
final_data_df["PSWQ"] = final_data_df["PSWQ"] / 16

final_data_df

Unnamed: 0,Peak_Ne,Peak_Nc,num_err_trials,num_corr_trials,ERN consistency,CRN consistency,Intolerance of Uncertainty - Prospective Anxiety,Intolerance of Uncertainty - Inhibitory Anxiety,STAI Trait SUM,BIS,Obsessive-Compulsive FULL,Zdecydowanie-FULL,Number error congruent+incongruent,PSWQ,participants_alpha_crn,participants_alpha_ern,participants_delta_crn,participants_delta_ern
0,-0.896547,0.158610,27.0,177.0,0.971160,0.979344,4.428571,3.8,2.80,2.7,2.944444,3.2,43.0,4.4375,0.004721,-0.063852,-0.025219,-0.159412
1,0.148713,-0.033743,14.0,256.0,0.949579,0.979535,3.714286,3.0,2.90,3.4,1.888889,3.0,15.0,3.8750,0.003789,-0.041863,0.044176,-0.163234
2,-0.323790,0.128952,25.0,224.0,0.879399,0.967359,3.857143,4.2,2.55,3.0,1.277778,3.6,27.0,3.5000,0.005457,-0.070036,-0.012970,-0.156649
3,-0.100440,0.022905,39.0,123.0,0.970033,0.962147,2.428571,2.4,2.25,3.0,2.333333,2.8,69.0,3.1250,0.000460,-0.044970,-0.007375,-0.157360
4,-1.024305,0.112711,27.0,206.0,0.957898,0.989158,3.142857,2.2,2.30,3.7,2.166667,3.2,32.0,3.3125,0.032358,-0.072450,0.009420,-0.139907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,-1.647572,0.065113,14.0,237.0,0.902648,0.983386,2.571429,2.4,2.35,3.0,1.166667,2.8,18.0,3.8750,0.016436,-0.067298,0.026766,-0.160974
218,-1.089271,0.180690,36.0,158.0,0.961469,0.966750,3.857143,3.0,2.30,3.0,2.333333,3.8,57.0,4.1875,-0.010296,-0.049454,0.003016,-0.164992
219,-0.717324,0.097323,23.0,199.0,0.978219,0.982859,4.142857,3.8,2.85,3.3,2.444444,3.0,33.0,4.3125,0.007812,-0.070896,0.003932,-0.148367
220,-0.255942,-0.049052,25.0,209.0,0.965618,0.980628,3.142857,1.8,2.10,2.9,1.944444,3.4,33.0,2.7500,-0.034762,-0.064369,0.003962,-0.158437


In [39]:
columns_order = [
    'participants_alpha_crn',
    'participants_alpha_ern',
    'participants_delta_crn',
    'participants_delta_ern',
    'ERN consistency',
    'CRN consistency',
    'num_err_trials',
    'num_corr_trials',
    'Number error congruent+incongruent',
    'Intolerance of Uncertainty - Prospective Anxiety',
    'Intolerance of Uncertainty - Inhibitory Anxiety',
    'STAI Trait SUM',
    'BIS',
    'Obsessive-Compulsive FULL',
    'PSWQ',
    'Zdecydowanie-FULL'
]

In [40]:
final_data_df[columns_order].describe().T.to_csv('../results/correlation_analysis/variables_description.csv')

In [41]:
# create correlation matrix
correlation_matrix = final_data_df[columns_order].corr()

# calculate p-vales: without and with correction for multipe comparisons
p_values, p_values_fdr_corrected = r_pvalues(final_data_df[columns_order], method='fdr_bh')

significant_correlations = correlation_matrix[(p_values <= 0.1)]
significant_correlations_corrected = correlation_matrix[(p_values_fdr_corrected <= 0.1)]

# Define significance levels for stars
def significance_stars(p):
    if p <= 0.001:
        return '***'
    elif p <= 0.01:
        return '**'
    elif p <= 0.05:
        return '*'
    else:
        return ''

# Create a DataFrame to hold the results
formatted_matrix = correlation_matrix.copy().astype(str)

# Populate the upper triangle with correlation coefficients and significance stars
for i in range(len(correlation_matrix)):
    for j in range(i, len(correlation_matrix)):
        corr_val = correlation_matrix.iloc[i, j]
        p_val = p_values_fdr_corrected.iloc[i, j]
        formatted_matrix.iloc[i, j] = f"{corr_val:.2f}{significance_stars(p_val)}"
        # Set lower triangle to empty strings
        if j < i:
            formatted_matrix.iloc[i, j] = ''

# Set lower triangle values to empty strings for a clean display
for i in range(len(formatted_matrix)):
    for j in range(i):
        formatted_matrix.iloc[i, j] = ''

# Show the formatted matrix
display(formatted_matrix)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  p[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)


Unnamed: 0,participants_alpha_crn,participants_alpha_ern,participants_delta_crn,participants_delta_ern,ERN consistency,CRN consistency,num_err_trials,num_corr_trials,Number error congruent+incongruent,Intolerance of Uncertainty - Prospective Anxiety,Intolerance of Uncertainty - Inhibitory Anxiety,STAI Trait SUM,BIS,Obsessive-Compulsive FULL,PSWQ,Zdecydowanie-FULL
participants_alpha_crn,1.0,-0.01,0.21**,-0.05,0.08,-0.07,0.16*,-0.15,0.17*,0.03,0.05,0.00,-0.01,0.01,-0.04,-0.00
participants_alpha_ern,,1.0,-0.01,-0.25***,0.06,-0.14,0.21**,-0.24**,0.25***,0.09,0.09,0.05,-0.01,-0.01,-0.01,0.05
participants_delta_crn,,,1.00,-0.39***,-0.08,0.04,-0.05,0.05,-0.08,0.02,-0.09,0.03,-0.03,0.04,-0.05,0.02
participants_delta_ern,,,,1.00,0.0,0.05,-0.04,0.08,-0.05,0.1,0.16*,0.14,0.23**,0.05,0.17*,-0.16
ERN consistency,,,,,1.0,0.13,0.61***,-0.44***,0.52***,-0.13,-0.04,-0.13,-0.02,-0.02,-0.09,0.02
CRN consistency,,,,,,1.0,-0.37***,0.51***,-0.47***,-0.03,0.06,-0.03,-0.06,-0.05,0.01,0.03
num_err_trials,,,,,,,1.00,-0.86***,0.91***,-0.08,-0.06,-0.10,-0.02,0.03,-0.08,0.02
num_corr_trials,,,,,,,,1.00,-0.95***,0.08,0.09,0.10,0.13,-0.02,0.14,-0.09
Number error congruent+incongruent,,,,,,,,,1.00,-0.09,-0.09,-0.12,-0.09,0.01,-0.13,0.06
Intolerance of Uncertainty - Prospective Anxiety,,,,,,,,,,1.0,0.47***,0.35***,0.30***,0.24**,0.34***,-0.19*


In [42]:
correlation_matrix.to_csv('../results/correlation_analysis/results/full_correlation_matrix_main.csv')
p_values.to_csv('../results/correlation_analysis/results/full_correlation_matrix_main_p.csv')
p_values_fdr_corrected.to_csv('../results/correlation_analysis/results/full_correlation_matrix_main_p_corrected.csv')
significant_correlations.to_csv('../results/correlation_analysis/results/correlation_matrix_main.csv')
significant_correlations_corrected.to_csv('../results/correlation_analysis/results/correlation_matrix_main_corrected.csv')
formatted_matrix.to_csv('../results/correlation_analysis/results/correlation_matrix_main_corrected_with_stars.csv')

In [43]:
with pd.option_context('display.max_columns', None):
    display(formatted_matrix)

Unnamed: 0,participants_alpha_crn,participants_alpha_ern,participants_delta_crn,participants_delta_ern,ERN consistency,CRN consistency,num_err_trials,num_corr_trials,Number error congruent+incongruent,Intolerance of Uncertainty - Prospective Anxiety,Intolerance of Uncertainty - Inhibitory Anxiety,STAI Trait SUM,BIS,Obsessive-Compulsive FULL,PSWQ,Zdecydowanie-FULL
participants_alpha_crn,1.0,-0.01,0.21**,-0.05,0.08,-0.07,0.16*,-0.15,0.17*,0.03,0.05,0.00,-0.01,0.01,-0.04,-0.00
participants_alpha_ern,,1.0,-0.01,-0.25***,0.06,-0.14,0.21**,-0.24**,0.25***,0.09,0.09,0.05,-0.01,-0.01,-0.01,0.05
participants_delta_crn,,,1.00,-0.39***,-0.08,0.04,-0.05,0.05,-0.08,0.02,-0.09,0.03,-0.03,0.04,-0.05,0.02
participants_delta_ern,,,,1.00,0.0,0.05,-0.04,0.08,-0.05,0.1,0.16*,0.14,0.23**,0.05,0.17*,-0.16
ERN consistency,,,,,1.0,0.13,0.61***,-0.44***,0.52***,-0.13,-0.04,-0.13,-0.02,-0.02,-0.09,0.02
CRN consistency,,,,,,1.0,-0.37***,0.51***,-0.47***,-0.03,0.06,-0.03,-0.06,-0.05,0.01,0.03
num_err_trials,,,,,,,1.00,-0.86***,0.91***,-0.08,-0.06,-0.10,-0.02,0.03,-0.08,0.02
num_corr_trials,,,,,,,,1.00,-0.95***,0.08,0.09,0.10,0.13,-0.02,0.14,-0.09
Number error congruent+incongruent,,,,,,,,,1.00,-0.09,-0.09,-0.12,-0.09,0.01,-0.13,0.06
Intolerance of Uncertainty - Prospective Anxiety,,,,,,,,,,1.0,0.47***,0.35***,0.30***,0.24**,0.34***,-0.19*
