In [1]:
import sys
sys.path.append('/pl/active/banich/studies/Relevantstudies/abcd/data/clustering/analysis/')

from import_data import *
from import_subtypes import *

CBCL: cbcl_base_t, cbcl_base_factors 
fMRI nback: nback
SST: sst
NIH: pcs
Stroop Behavioral: stroop
Matrix: Reasoning
All demos: demos_baseline
UPPS Factors: upps_factors
Cognitive: cog_all
COG EF Factors: cog_ef_factors
Resting State Include: sample1_rest_include, sample2_rest_include, full_sample_rest_include /n sample1_rest_include_idsub, sample2_rest_include_idsub, sample2_rest_idsub, full_sample_rest_include_idsub
Resting State Combined: sample1_rest_combined, sample2_rest_combined, full_sample_rest_combined /n sample1_rest_combined_idsub, sample2_rest_combined_idsub, full_sample_rest_combined_idsub
Resting State Dont Include: full_sample_rest_dont_include /n full_sample_rest_dont_include_idsub


In [2]:
def std_data(data, std_vars):
    
    for col in data[std_vars].columns:
        data[col] = (data[col] - data[col].mean())/data[col].std(ddof=0)
        
    return data

def get_stds(data): 
    
    import numpy as np
    from scipy import stats

    def confidence_interval(data):
        """Return 95% confidence interval for mean."""
        mean = np.mean(data)
        ci_low, ci_high = stats.t.interval(0.95, len(data)-1, loc=mean, scale=stats.sem(data))
        return ci_low, ci_high 

    cis_low_list=[]
    cis_high_list=[]
    cis_dif_list=[]
    
    columns_to_check = data.columns.to_list()

    for i in data.Subtype.unique():

        # Filter out rows with NA values in specific columns for the specific Subtype
        filtered_data = data.query('Subtype == @i').dropna(subset=columns_to_check)

        # Check if filtered_data is not empty
        if not filtered_data.empty:

            cis = (filtered_data
                   .apply(lambda x: confidence_interval(x))
                   .reset_index()
                   .iloc[:, 2:].T.rename({0:'low', 1:'high'}, axis=1)
                  )

            cis['dif'] = (cis['high'] - cis['low'])/4

            cis_low_list.append(cis['low'].to_list())
            cis_high_list.append(cis['high'].to_list())
            cis_dif_list.append(cis['dif'].to_list())

    cis_low_dict = {i+1: cis_low_list[i] for i in range(len(cis_low_list))}
    cis_high_dict = {i+1: cis_high_list[i] for i in range(len(cis_high_list))}
    stds = {i+1: cis_dif_list[i] for i in range(len(cis_dif_list))}
    
    return stds


def get_means(data):
    
    means = data.groupby('Subtype').mean().reset_index().T.iloc[1:]
    means.columns = list(range(1, means.shape[1]+1))
    means= {col: means[col].tolist() for col in means.columns}
    
    return means

def get_means_stds(data):
    
    means = get_means(data)
    stds = get_stds(data)
    
    return means, stds

import numpy as np
import matplotlib.pyplot as plt


In [3]:
full_sample_all = (full_sample_rest_include_idsub
 .merge(cog_all, on='ID', how='outer')
 .merge(cog_ef_factors, on='ID', how='outer')
 .merge(cbcl_b_t, on='ID', how='outer')
 .merge(cbcl_base_factors, on='ID', how='outer')
 .merge(upps_factors, on='ID', how='outer')
 .merge(stroop_beh, on='ID', how='outer')
 .drop_duplicates('ID', keep='first')
 .dropna(subset=['Subtype'])
 #.query('abcd_site != "site22"')
)

sample1_all = (sample1_rest_include_idsub
 .merge(cog_all, on='ID', how='outer')
 .merge(cog_ef_factors, on='ID', how='outer')
 .merge(cbcl_b_t, on='ID', how='outer')
 .merge(cbcl_base_factors, on='ID', how='outer')
 .merge(upps_factors, on='ID', how='outer')
 .merge(stroop_beh, on='ID', how='outer')
 .drop_duplicates('ID', keep='first')
 .dropna(subset=['Subtype'])
 #.query('abcd_site != "site22"')
)


sample2_all = (sample2_rest_include_idsub
 .merge(cog_all, on='ID', how='outer')
 .merge(cog_ef_factors, on='ID', how='outer')
 .merge(cbcl_b_t, on='ID', how='outer')
 .merge(cbcl_base_factors, on='ID', how='outer')
 .merge(upps_factors, on='ID', how='outer')
 .merge(stroop_beh, on='ID', how='outer')
 .drop_duplicates('ID', keep='first')
 .dropna(subset=['Subtype'])
 #.query('abcd_site != "site22"')
)

In [4]:
def radar(plot_data, radar_var, min_val, max_val, num_breaks,  dont_include_x = None, save=None):
    
    means, stds = get_means_stds(plot_data)
    rest_colors = ['#f6511d', '#ffb400', '#7fb800', '#0d2c54']

    data = {
        'Subtype 1': {'scores': means[1], 'std': stds[1], 'color': rest_colors[0]},
        'Subtype 2': {'scores': means[2], 'std': stds[2], 'color': rest_colors[1]},
        'Subtype 3': {'scores': means[3], 'std': stds[3], 'color': rest_colors[2]},
        'Subtype 4': {'scores': means[4], 'std': stds[4], 'color': rest_colors[3]}
    }

    import matplotlib.colors

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, polar=True)

    angles = np.linspace(0, 2 * np.pi, len(radar_vars), endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))
    #radar_vars.append(radar_vars[0])
    
    local_radar_vars = radar_var.copy()
    local_radar_vars.append(local_radar_vars[0])

    # Loop through the data dictionary to plot each student's data
    for sub, attributes in data.items():

        scores = attributes['scores'] + [attributes['scores'][0]]

        std_dev = attributes['std'] + [attributes['std'][0]/2]
        std_dev_pos = [sum(x) for x in zip(scores, std_dev)]
        std_dev_neg = [s - d for s, d in zip(scores, std_dev)]

        color = attributes['color']

        plot = ax.errorbar(angles, scores, 
                           yerr=std_dev, 
                           fmt='o-', 
                           color=color, 
                           linewidth=0, 
                           label=sub)

        ax.plot(angles, scores, 'o-', color=color, linewidth=2, label=sub, zorder=3, markersize=4)
        alpha_value = 0.1  # Adjust this value as needed. 1.0 is opaque, 0.0 is transparent.
        #ax.plot(angles, scores, 'o-', color=color, linewidth=2, label=sub)
        # Now, we will fill between the scores and std_dev_pos, and scores and std_dev_neg
        ax.fill_between(angles, scores, std_dev_pos, alpha=alpha_value, color=color, edgecolor='none')
        ax.fill_between(angles, scores, std_dev_neg, alpha=alpha_value, color=color, edgecolor='none')

        plot[-1][0].set_color(color)
        
    ax.set_facecolor(None)
    ax.set_thetagrids(angles * 180 / np.pi, local_radar_vars, zorder=1)

    ax.grid(axis='y', color='#CECDC7', linewidth=1,  zorder=3)
    ax.grid(axis='x', color='#CECDC7', linewidth=.5, zorder=2)
    # Remove the vertical lines on the radar plot
    #ax.spines["polar"].set_visible(False)
    
    for spine in ax.spines.values():
        spine.set_color('white')
        
     # Adjust y-ticks to spread them out
    #yticks = np.linspace(min_val, max_val, num_breaks)  # adjust min_val, max_val, num_breaks as needed
    yticks = np.arange(min_val, max_val + 0.1, num_breaks)  # Updated this line for 0.1 increments
    ax.set_yticks(yticks)
    
    #ax.xaxis.grid(False)
    # Adjust position of radial labels to ensure they don't touch the plot
    # Set the maximum radial value to push the labels outward
    #ax.set_rmax(max_val +.1)  # Adding an offset, adjust this as needed
    # Adjust position of radial labels to ensure they don't touch the plot
    ax.set_rlabel_position(36)  # Adjust the angle as needed. It's reduced to 15 degrees for better placement.
        
    #plt.tight_layout()
     # Instead of using min_val and max_val, use fixed_min and fixed_max
    ax.set_ylim(min_val-.1, max_val+.1)  # Set the fixed min and max for the radar plot

    if dont_include_x is not None:
        ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        ax.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)

    plt.tight_layout()
    
    if save is not None:
        
        #plt.figure(facecolor='none')
            
        plt.savefig(f'/pl/active/banich/studies/Relevantstudies/abcd/data/clustering/analysis/group_differences/pheno_figs/rest/{save}.png', 
                    #facecolor=None, 
                    dpi=300, 
                    transparent=True, 
                    #edgecolor='none'
                   )
        plt.clf()
        
    else:
        plt.show()

In [5]:
radar_vars = ['total_r', 'internalizing_r', 'anxious_depressed_r', 'withdrawn_depressed_r','social_problems_r','somatic_complaints_r',
              'thought_problems_r','attention_problems_r','rule_breaking_r','agressive_r', 'externalizing_r', 'negative_urgency', 
              'positive_urgency', 'predmeditation', 'perserverance', 'sensation_seeking', 'CommonEF', 'UpdatingSpecific', 'Intelligence', 
              'pc1_new_r','pc2_new_r','pc3_new_r', 'LMT_r', 'RAVLT_r','Stroop_interf_acc_all_r','Happy_Acc_Eq_r','Angry_Acc_Eq_r']
#std_vars_all = sample1_all.iloc[:, 2:].columns.to_list()

for i,j in zip([full_sample_all, sample1_all, sample2_all], 
               ['full_sample_all', 'sample1_all', 'sample2_all']):
    
    sample_all_sel = std_data(i[['Subtype'] + radar_vars], radar_vars)
    radar(sample_all_sel, radar_vars, min_val = -.4, max_val = .3, num_breaks = .2, save=j)

for i,j in zip([full_sample_all, sample1_all, sample2_all], 
           ['full_sample_all_nox', 'sample1_all_nox', 'sample2_all_nox']):

    sample_all_sel = std_data(i[['Subtype'] + radar_vars], radar_vars)
    radar(sample_all_sel, radar_vars, min_val = -.4, max_val = .3, num_breaks = .2, dont_include_x=True, save=j)

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

In [6]:
for i,j in zip([sample1_rest_include_idsub, sample2_rest_include_idsub],
               ['sample1_rest_include_radar_cbcl', 'sample2_rest_include_radar_cbcl']): 
    
    beh_all.iloc[:, 2:12].columns.to_list()
    data_subs = pd.merge(i, beh_all, on='ID')
    radar_vars = beh_all.iloc[:, 2:12].columns.to_list()

    radar_vars = [
         'internalizing_r',
         'withdrawn_depressed_r',
         'somatic_complaints_r',
         'social_problems_r',
         'thought_problems_r',
         'attention_problems_r',
         'rule_breaking_r',
         'agressive_r',
         'externalizing_r',
         'total_r']
    
    sample = std_data(data_subs[['Subtype'] + radar_vars], radar_vars)

    radar(sample, radar_vars, min_val = -.20, max_val = .20, num_breaks = .1, save=j)

<Figure size 1000x1000 with 0 Axes>

<Figure size 1000x1000 with 0 Axes>

In [None]:
min_val = -.60
max_val = .2
num_breaks = 5

np.linspace(min_val, max_val, num_breaks).round(3)

In [None]:
ef_vars = list(cog_ef_factors.iloc[:, 1:].columns) + ['LMT_r', 'RAVLT_r']
cog_ef_all = pd.merge(cog_ef_factors, cog_all, on = ['ID'])[['ID'] + ef_vars]

In [None]:
for i,j in zip([sample1_rest_include_idsub, sample2_rest_include_idsub],
               ['sample1_rest_include_radar_cogef', 'sample2_rest_include_radar_cogef']): 
    
    cog_plot_data = pd.merge(cog_ef_factors, cog_all, on ='ID')
    data_subs = pd.merge(i,cog_plot_data, on='ID')
    radar_vars = ['CommonEF', 'UpdatingSpecific', 'Intelligence', 'LMT_r', 'RAVLT_r']
    sample = std_data(data_subs[['Subtype'] + radar_vars], radar_vars)

    radar(sample, radar_vars, min_val = -.4, max_val = .2, num_breaks = .1, save=j)

In [None]:
radar(sample, radar_vars, min_val = -.4, max_val = .2, num_breaks = .1, save=None)

In [None]:
for i,j in zip([sample1_rest_include_idsub, sample2_rest_include_idsub],
               ['sample1_rest_include_radar_upps', 'sample2_rest_include_radar_upps']):
    
    data_subs = pd.merge(i,upps_factors, on='ID')
    radar_vars = ['predmeditation', 'perserverance', 'sensation_seeking','negative_urgency', 'positive_urgency']
    sample = std_data(data_subs[['Subtype'] + radar_vars], radar_vars)
    
    radar(sample, radar_vars, min_val = -.2, max_val = .2, num_breaks = .1, save=j)

In [None]:
radar(sample, radar_vars, min_val = -.2, max_val = .2, num_breaks = .1, save=None)

In [None]:
for i,j in zip([sample1_rest_include_idsub, sample2_rest_include_idsub],
               ['sample1_rest_include_radar_upps', 'sample2_rest_include_radar_upps']):
    
    data_subs = pd.merge(i,stroop_beh, on='ID')
    radar_vars = ['Stroop_interf_acc_all_r','Happy_Acc_Eq_r','Angry_Acc_Eq_r']
    sample = std_data(data_subs[['Subtype'] + radar_vars], radar_vars)

    radar(sample, radar_vars, min_val = -.2, max_val = .2, num_breaks = .1, save=j)

In [None]:
radar(sample, radar_vars, min_val = -.2, max_val = .2, num_breaks = .1, save=None)