In [1]:
import os
import matplotlib.pyplot as plt
import pathlib
import numpy as np
import pandas as pd
import altair as alt

In [2]:
res_path = pathlib.Path('../camcan')
list(res_path.glob('*.csv'))

[PosixPath('../camcan/Result_single_FREQ_all_imp_outer_10_inner.csv')]

In [3]:
df = pd.read_csv(res_path / "Result_single_FREQ_all_imp_outer_10_inner.csv")
df[['indice','frequency']] = df.variable.str.split("_", n=1, expand=True)
df

Unnamed: 0,method,variable,importance,p_value,score_MAE,score_R2,indice,frequency
0,CPI-DNN,0_alpha,0.009375,0.418111,7.888284,0.685297,0,alpha
1,CPI-DNN,1_alpha,-0.013247,0.679910,7.888284,0.685297,1,alpha
2,CPI-DNN,2_alpha,0.046841,0.184859,7.888284,0.685297,2,alpha
3,CPI-DNN,3_alpha,-0.004825,0.594019,7.888284,0.685297,3,alpha
4,CPI-DNN,4_alpha,-0.005616,0.569441,7.888284,0.685297,4,alpha
...,...,...,...,...,...,...,...,...
4027,CPI-DNN,443_theta,0.007424,0.405616,7.888284,0.685297,443,theta
4028,CPI-DNN,444_theta,-0.002824,0.524321,7.888284,0.685297,444,theta
4029,CPI-DNN,445_theta,0.000164,0.485027,7.888284,0.685297,445,theta
4030,CPI-DNN,446_theta,-0.003017,0.612250,7.888284,0.685297,446,theta


In [4]:
df['p_value'] = -np.log10(df['p_value'])
df_sig_05 = df[df['p_value'] > -np.log10(0.05)]
df_sig_05

Unnamed: 0,method,variable,importance,p_value,score_MAE,score_R2,indice,frequency
129,CPI-DNN,129_alpha,0.013340,1.304583,7.888284,0.685297,129,alpha
133,CPI-DNN,133_alpha,0.037675,1.311369,7.888284,0.685297,133,alpha
135,CPI-DNN,135_alpha,0.049327,1.780963,7.888284,0.685297,135,alpha
253,CPI-DNN,253_alpha,0.113187,1.684056,7.888284,0.685297,253,alpha
525,CPI-DNN,77_beta_high,0.120302,1.677077,7.888284,0.685297,77,beta_high
...,...,...,...,...,...,...,...,...
3765,CPI-DNN,181_theta,0.069703,1.634512,7.888284,0.685297,181,theta
3771,CPI-DNN,187_theta,0.088230,1.404526,7.888284,0.685297,187,theta
3779,CPI-DNN,195_theta,0.069688,1.370318,7.888284,0.685297,195,theta
3837,CPI-DNN,253_theta,0.252530,1.893633,7.888284,0.685297,253,theta


In [5]:
df_perc = df_sig_05.groupby(by='frequency').count()[['indice']]
df_perc = (df_perc / 448) * 100
df_perc = df_perc.reset_index()
df_perc

Unnamed: 0,frequency,indice
0,alpha,0.892857
1,beta_high,2.232143
2,beta_low,3.571429
3,delta,1.116071
4,gamma_high,1.339286
5,gamma_lo,1.116071
6,gamma_mid,1.116071
7,low,2.678571
8,theta,1.5625


___

In [6]:
df['band'] = np.array(df.variable.str.split('_', n=1).values.tolist())[:, 1]
df.p_value

0       0.378709
1       0.167549
2       0.733160
3       0.226200
4       0.244551
          ...   
4027    0.391885
4028    0.280403
4029    0.314234
4030    0.213071
4031    0.222184
Name: p_value, Length: 4032, dtype: float64

In [7]:
df['significant'] = df.p_value > -np.log10(0.05)

In [8]:
bands = ['low', 'delta', 'theta', 'alpha', 'beta_low', 
         'beta_high', 'gamma_lo', 'gamma_mid', 'gamma_high']
bands_names = ['low', 'delta', 'theta', 'alpha', 'beta_low', 
               'beta_high', 'gamma_low', 'gamma_mid', 'gamma_high']
df['band'] = df['band'].map(dict(zip(bands, bands_names)))

df_perc['frequency'] = df_perc['frequency'].map(dict(zip(bands, bands_names)))

In [9]:
df_perc = df_perc.set_index('frequency').loc[bands_names].reset_index()

In [10]:
title = (f"Age-prediction (MAE={df['score_MAE'].values[0]:0.2f}, "
         f"R2={df['score_R2'].values[0]:0.2f})")
title

'Age-prediction (MAE=7.89, R2=0.69)'

In [11]:
fig_camcan2 = alt.Chart(
    df
).mark_circle(size=24).encode(
    x = alt.X('p_value:Q', title='-log10(p value)', scale=alt.Scale(domain=[0, 4])),
    yOffset="jitter:Q",
    y = alt.Y('band:N', title="frequency band", sort=bands_names,
              axis=alt.Axis(labels=False, title=None, tickSize=0)),
    color = alt.Color('significant:N', title="Significant"),
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter="0.2 * sqrt(-2*log(random()))*cos(2*PI*random())"
#     jitter='random()'
)
fig_camcan2b = alt.Chart(
    df_perc
).mark_text(size=18).encode(
    y=alt.Y('frequency:N', sort=bands, axis=alt.Axis(title='Frequency band')),
    x=alt.value(365),
    text=alt.Text('label:N')
).transform_calculate(label=f'format(datum.indice,".2f") + " %"')

text = alt.Chart({'values':[{'x': 0, 'y': 0}]}).mark_text(
    text='').encode(
    x='x:Q', y=alt.Y('y:Q', title='% of significant brain regions', axis=alt.Axis(labels=False, ticks=False))
)

my_font = 'Helvetica'
fig_fin = alt.layer(fig_camcan2b, fig_camcan2, text).resolve_scale(y='independent').configure_axis(
    grid=True, 
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    labelFontSize=22,
    titleFontSize=26,
    labelLimit=0,
    titlePadding=20,
).configure_header(
    titleFont=my_font,
    titleFontWeight='normal',
    labelFont=my_font,
    titleFontSize=26,
    labelFontSize=22,
).configure_view(
    strokeWidth=0
).configure_title(
    font=my_font,
    fontSize=24
).properties(
    title=title,
    height=340,
    width=400,
).configure_legend(
    legendX=190,
    legendY=250,
    orient='none',
    titleFontSize=22,
    labelFontSize=18,
    titleLimit=0
)
# fig_fin.save("fig_meg.svg")
# fig_fin.save("fig_meg.png", scale_factor=3)
fig_fin

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
