In [None]:
import datasets

import nibabel as nib
import os
import pickle
import pandas as pd
import seaborn as sns
import numpy as np

import draw_results
import gene_analysis
import removing_confound
import meta_roi
import meta_voxel
import meta_vertex
import meta_confound
import correlation
from mask import Mask, NiiMask

# Site counts

In [None]:
# calculate ADNI true site counts
import datasets
from collections import Counter
import seaborn as sns

fontsize = 14

tmp_centers = datasets.load_centers_adni()
sites = {}
for center in tmp_centers:
    persons = center.persons
    for person in persons:
        site = person.filename[:3]
        if site not in sites:
            sites[site] = [0, 0, 0]
        values = sites[site]
        values[person.label] += 1
        sites[site] = values


import pandas as pd
df = pd.DataFrame.from_dict(sites, orient='index')
sorted_df = df.sort_index()
nc = sorted_df.values.T[0]
mc = sorted_df.values.T[1]
ad = sorted_df.values.T[2]

import matplotlib.pyplot as plt
ticks = range(len(nc))
fig, ax = plt.subplots(1, 1)
ax.bar(ticks, nc, label='NC')
ax.bar(ticks, mc, bottom=nc, label='MCI')
ax.bar(ticks, ad, bottom=nc+mc, label='AD')
ax.set_xticklabels([])
ax.set_ylabel('Subject Counts', fontsize=fontsize)
ax.set_xlabel('ADNI sites', fontsize=fontsize)
plt.show()
plt.close()

In [None]:
import numpy as np
def plot_center_counts(xlabels, centers, width=0.3, fontsize=14, rotate=False, rotation=45):
    nc = []
    mc = []
    ad = []
    for center in centers:
        nc.append(len(center.get_by_label(0)))
        mc.append(len(center.get_by_label(1)))
        ad.append(len(center.get_by_label(2)))
    nc = np.array(nc)
    mc = np.array(mc)
    ad = np.array(ad)
    ticks = range(len(sites))
    fig, ax = plt.subplots(1, 1)
    ax.bar(ticks, nc, width=width, label='NC')
    ax.bar(ticks, mc, width=width, bottom=nc, label='MCI')
    ax.bar(ticks, ad, width=width, bottom=nc+mc, label='AD')
    ax.set_xticks(ticks)
    ax.set_xticklabels(sites, fontsize=14)
    ax.set_ylabel('Subject Counts', fontsize=fontsize)
    ax.legend()
    if rotate:
        plt.xticks(rotation=rotation)
    plt.show()
    plt.close()
sites = ['ADNI1', 'ADNI2', 'ADNI3']
centers = datasets.load_centers_adni()
plot_center_counts(sites, centers)

sites = ['PL_G', 'PL_S', 'HH_Z', 'QL_W', 'XW_H', 'XW_Z', 'PL_S2', 'ZJ_L']
centers = datasets.load_centers_mcad()
plot_center_counts(sites, centers)

sites = ['AMS', 'BRE', 'CAM', 'DUB', 'FRA', 'FRE', 'MAI', 'MAN', 'MIL',
         'MUN', 'ROS', 'ROS_3T']
centers = datasets.load_centers_edsd()
plot_center_counts(sites, centers, rotate=True)

# Check Image quality

In [None]:
centers = datasets.load_centers_all()
sns.set_theme(style='white')

# Plot image preporcessed quality
image_qualities = 0
for center in centers:
    q, _ = center.get_image_quality()
    if np.size(q) != 1:
        if isinstance(image_qualities, int):
            image_qualities = q
        else:
            image_qualities = np.concatenate((image_qualities, q), axis=0)
image_qualities = image_qualities / 10
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick

titles = ['Resolution', 'Noise', 'Bias', 'IQR']
fontsize = 16
for i in range(4):
    fig, ax = plt.subplots()
    
    ax = sns.histplot(image_qualities[:, i], ax=ax, stat="probability")
    ax.set_title(titles[i], fontsize=fontsize)
    ax.set_xlabel('Percentage Rating Points', fontsize=fontsize)
    ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0))
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))

    ax2 = ax.twiny()

    ax1Ticks = ax.get_xticks()
    ax2Ticks = ax1Ticks

    ax2.set_xticks([0.25,0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1])
    ax2.set_xbound(ax.get_xbound())
    ax2.set_xticklabels(['F', 'E', 'D-','D', 'C -','C', 'B -' ,'B', 
                        'A -' ,'A', 'A+'], fontsize=fontsize)
    plt.show()

In [None]:
# Check image quality ttest
def load_values(centers, label, axis=0):
    a = 0
    for center in centers:
        q, _ = center.get_image_quality(label=label)
        if np.size(q) != 1:
            if isinstance(a, int):
                a = q
            else:
                a = np.concatenate((a, q), axis=axis)
    return a 

import scipy
a = load_values(centers, 2, 0)
b = load_values(centers, 0, 0)

t, p = scipy.stats.ttest_ind(a[:,1], b[:,1])
print(t, p)

# Meta by gender

In [None]:
gs = ['male', 'female']
cs = ['mcad', 'edsd', 'adni']
for center in cs:
    if center == 'mcad':
        centers = datasets.load_centers_mcad()
    elif center == 'edsd':
        centers = datasets.load_centers_edsd()
    elif center == 'adni':
        centers = datasets.load_centers_adni()
    for gender in gs:
        if gender == 'male':
            ismale = 1
        else:
            ismale = 0
        meta_roi.create_csv_for_meta(centers, 2, 0,
                                    csv_prefix='roi_gmv',
                                    out_path='./data/meta_csv/{}/{}'.format(center, gender),
                                    gender=ismale)
        meta_roi.meta_gmv(2, 0, mask,
                        csv_dir='./data/meta_csv/{}/{}'.format(center, gender),
                        csv_prefix='roi_gmv',
                        out_dir='./results/meta/{}/{}'.format(center, gender))

# AAL and Sch Atlas

In [None]:
from mask import GiiMask, NiiMask
import datasets
"""
centers = datasets.load_centers_all()
gii_mask = GiiMask('./data/mask/mesh.intensity_aal.Template_T1_IXI555_MNI152_GS.gii')
gii_mask2 = GiiMask('./data/mask/mesh.intensity_Schaefer_1000_1mm.Template_T1_IXI555_MNI152_GS.gii')

for center in centers:
    center.create_rct_csv_by_gii(gii_mask, csv_dirname='aal_ct')
    center.create_rct_csv_by_gii(gii_mask2, csv_dirname='schaefer_ct')
"""
# %%
import meta_roi
label_pairs = [(2,0), (2,1), (1,0)]
csv_prefixs = ['aal', 'aal_ct', 'roi_gmv_schaefer', 'schaefer_ct']
out_prefix = './data/meta_csv/suppment'

## Create csv for meta analysis
for csv_prefix in csv_prefixs:
    for label_pair in label_pairs:
        label_eg = label_pair[0]
        label_cg = label_pair[1]
        meta_roi.create_csv_for_meta(centers, label_eg, label_cg, csv_prefix,
                                        out_path=out_prefix)
# %%
mask1 = NiiMask('./data/mask/aal.nii')
mask2 = NiiMask('./data/mask/Schaefer_1000_1mm.nii')
for label_pair in label_pairs:
    label_eg = label_pair[0]
    label_cg = label_pair[1]
    # GMV meta-analysis
    meta_roi.meta_gmv(label_eg, label_cg, mask1, csv_prefix='aal',
                csv_dir='./data/meta_csv/suppment',
                out_dir='./results/supp',
                count=116)
    meta_roi.meta_gmv(label_eg, label_cg, mask2, csv_prefix='roi_gmv_schaefer',
                csv_dir='./data/meta_csv/suppment',
                out_dir='./results/supp', count=1000)
    # CT 
    meta_roi.meta_ct(label_eg, label_cg, mask=mask1, csv_prefix='aal_ct',
                     csv_dir_prefix='./data/meta_csv/suppment',
                     out_dir_prefix='./results/supp',
                     save_gii=False, save_nii=True, count=116)
    meta_roi.meta_ct(label_eg, label_cg, mask=mask2, csv_prefix='schaefer_ct',
                     csv_dir_prefix='./data/meta_csv/suppment',
                     out_dir_prefix='./results/supp',
                     save_gii=False, save_nii=True, count=1000)

# Single Datasets

In [None]:
# %%
# perform meta-analysis
## ROI
csv_prefixs = ['roi_gmv_removed' ,'roi_ct_removed']
mcad = datasets.load_centers_mcad()
edsd = datasets.load_centers_edsd()
adni = datasets.load_centers_adni()
races = [mcad, edsd, adni]
names = ['mcad', 'edsd', 'adni']
mask = NiiMask('./data/mask/rBN_Atlas_246_1mm.nii')
label_pairs = [(2, 0)]

In [None]:
### Create csv for meta analysis
for csv_prefix in csv_prefixs:
    for label_pair in label_pairs:
        label_eg = label_pair[0]
        label_cg = label_pair[1]
        for race, name in zip(races, names):
            meta_roi.create_csv_for_meta(race, label_eg, label_cg, csv_prefix,
                                    out_path='./data/meta_csv/suppment/{}'.format(name))

In [None]:
names = ['mcad', 'edsd', 'adni']
roi_models = {}
for label_pair in label_pairs:
    label_eg = label_pair[0]
    label_cg = label_pair[1]
    for race, name in zip(races, names):
        roi_models[name] = meta_roi.meta_gmv(label_eg, label_cg, mask, csv_prefix='roi_gmv_removed',
            csv_dir='./data/meta_csv/suppment/{}'.format(name),
            out_dir='./results/supp/{}'.format(name), save_nii=False)

csv_dir = './data/meta_csv'
out_dir = './results/meta'
roi_models['main'] = meta_roi.meta_gmv(label_eg, label_cg, mask, csv_prefix='roi_gmv_removed', csv_dir=csv_dir, out_dir=out_dir, save_nii=False)

In [None]:
names = ['mcad', 'edsd', 'adni']
roi_models = {}
for label_pair in label_pairs:
    label_eg = label_pair[0]
    label_cg = label_pair[1]
    for race, name in zip(races, names):
        roi_models[name] = meta_roi.meta_ct(label_eg, label_cg, mask, csv_prefix='roi_ct_removed',
            csv_dir_prefix='./data/meta_csv/suppment/{}'.format(name),
            out_dir_prefix='./results/supp/{}'.format(name), save_nii=False, save_gii=False)

csv_dir = './data/meta_csv'
out_dir = './results/meta'
roi_models['main'] = meta_roi.meta_ct(label_eg, label_cg, mask, csv_prefix='roi_ct_removed', csv_dir_prefix=csv_dir, out_dir_prefix=out_dir, save_nii=False, save_gii=False)

In [None]:
len(roi_models['mcad'])
names = ['main', 'mcad', 'edsd', 'adni']
name_pairs = [(0,1), (0,2), (0,3), (1,2), (1,3), (2,3)]
corr = np.zeros((4,4))
for i in range(4):
    corr[i,i] = 1

def get_values(models_dict):
    values = []
    for k, v in models_dict.items():
        values.append(v.total_effect_size)
    return values
from scipy.stats import pearsonr
for name_pair in name_pairs:
    i = name_pair[0]
    j = name_pair[1]
    values1 = get_values(roi_models[names[i]])
    values2 = get_values(roi_models[names[j]])
    r, p = pearsonr(values1, values2)
    corr[i,j] = r
    corr[j,i] = r

import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(9, 9))
cmap = sns.color_palette("cubehelix_r", as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
ax = sns.heatmap(corr, cmap='YlGnBu', vmin=0, vmax=1,
            square=True, linewidths=.5, cbar_kws={"shrink": .8},
            annot_kws={'size':25},
            annot=True, fmt=".2f",)
ax.axis('off')
plt.show()

# PLSR Single dataset

In [None]:
n_perm_boot = 5000
n_components = 5

label_pairs = [(2,0)]

mcad = datasets.load_centers_mcad()
edsd = datasets.load_centers_edsd()
adni = datasets.load_centers_adni()
races = [mcad, edsd, adni]
names = ['mcad', 'edsd', 'adni']
mask = NiiMask('./data/mask/rBN_Atlas_246_1mm.nii')

for name in names:
    for label_pair in label_pairs:
        label_eg = label_pair[0]
        label_cg = label_pair[1]
        out_dir ='./results/supp/{}'.format(name)

        roi_gmv_models = meta_roi.meta_gmv(label_eg, label_cg, mask,
        csv_dir='./data/meta_csv/suppment/{}'.format(name),
        out_dir='./results/supp/{}'.format(name),
         save_nii=False)
        roi_ct_models = meta_roi.meta_ct(label_eg, label_cg,
        csv_dir_prefix='./data/meta_csv/suppment/{}'.format(name),
        out_dir_prefix='./results/supp/{}'.format(name),
         save_gii=False, save_nii=False)
        
        gmv_es_dict = {}
        for k,v in sorted(roi_gmv_models.items()):
            gmv_es_dict[int(k)] = v.total_effect_size
            
        ct_es_dict = {}
        for k,v in sorted(roi_ct_models.items()):
            ct_es_dict[int(k)] = v.total_effect_size
        print('model calculated')
        gmv_plsr = gene_analysis.plsr(gmv_es_dict, n_components=n_components,
                                    n_perm=n_perm_boot, n_boot=n_perm_boot,
                                    out_path=os.path.join(out_dir, 'plsr_gmv.csv'))
        print('GMV Finished')
        ct_plsr = gene_analysis.plsr(ct_es_dict, n_components=n_components,
                                    n_perm=n_perm_boot, n_boot=n_perm_boot,
                                    out_path=os.path.join(out_dir, 'plsr_ct.csv'))

        with open(os.path.join(out_dir, 'plsr_gmv.pickle'), 'wb') as f:
            pickle.dump(gmv_plsr, f)
        with open(os.path.join(out_dir, 'plsr_ct.pickle'), 'wb') as f:
            pickle.dump(ct_plsr, f)

In [None]:
import pandas as pd
from scipy.stats import pearsonr
import numpy as np

feature_names = ['gmv', 'ct']
for feature_name in feature_names:
    main_path = f'./results/gene/2_0/plsr_{feature_name}.csv'
    main_df = pd.read_csv(main_path, index_col=0)
    main_values = main_df['pls1'].values

    names = ['mcad', 'edsd', 'adni']
    for name in names:
        sub_path = f'./results/supp/{name}/plsr_{feature_name}.csv'
        sub_df = pd.read_csv(sub_path, index_col=0)
        sub_values = sub_df['pls1'].values
        r, p = pearsonr(main_values, sub_values)
        print(feature_name, name, np.abs(r))

# Left-right asy

In [None]:
mask_path = './data/mask/rBN_Atlas_246_1mm.nii'
mask = NiiMask(mask_path)

label_pairs = [(2,0), (2,1), (1,0)]

centers = datasets.load_centers_all()

prefixs = ['roi_gmv', 'roi_gmv_removed', 'roi_ct', 'roi_ct_removed']
for i in range(3):
    for prefix in prefixs:
        if 'gmv' in prefix:
            meta_roi.create_csv_for_meta_asy(centers, i, prefix, template=mask)
        else:
            meta_roi.create_csv_for_meta_asy(centers, i, prefix)
        meta_roi.meta_gmv_asy(i, mask, csv_prefix=prefix)

# MMSE result correlation with Meta

In [None]:
# load MMSE results
import meta_roi
mask_path = './data/mask/rBN_Atlas_246_1mm.nii'
mask = NiiMask(mask_path)
# load META results
roi_models = meta_roi.meta_gmv(2, 0, mask, save_nii=False)
values1 = [v.total_effect_size for k,v in sorted(roi_models.items())]

import mixed_lm
# ROI is model's eighth variable
stat_position = 8
gmv_models = mixed_lm.load_models('./results/mixedLM/gmv')
gmv_tvalues = mixed_lm.get_tvalues(gmv_models, i=stat_position)
values2 = [v for k,v in sorted(gmv_tvalues.items())]

draw_results.plot_correlation_joint(values1, values2,
        'Effect sizes of ROI GMV', 't-map of ROI GMV MMSE LMM', fontsize=18, show=True,
        save=False)

In [None]:
# load MMSE results
import meta_roi
mask_path = './data/mask/rBN_Atlas_246_1mm.nii'
mask = NiiMask(mask_path)
# load META results
roi_models = meta_roi.meta_ct(2, 0, mask, save_nii=False, save_gii=False)
values1 = [v.total_effect_size for k,v in sorted(roi_models.items()) if int(k)<211]

import mixed_lm
# ROI is model's eighth variable
stat_position = 8
gmv_models = mixed_lm.load_models('./results/mixedLM/ct')
gmv_tvalues = mixed_lm.get_tvalues(gmv_models, i=stat_position)
values2 = [v for k,v in sorted(gmv_tvalues.items()) if int(k)<211]

draw_results.plot_correlation_joint(values1, values2,
        'Effect sizes of ROI CT', 't-map of ROI CT MMSE LMM', fontsize=18, show=True,
        save=False)

In [None]:
csv_path_1 = r'./results/gene/2_0/plsr_gmv.csv'
csv_paht_2 = r'./results/mixedLM/gene/plsr_gmv.csv'
df1 = pd.read_csv(csv_path_1)
df2 = pd.read_csv(csv_paht_2)
df = df1.merge(df2, on='gene_name')
values1 = df['pls1_x']
values2 = df['pls1_y']
draw_results.plot_correlation_joint(values1, values2,
        'GMV Meta gene loading', 'GMV MMSE gene loading', fontsize=18, show=True,
        save=False)

In [None]:
import draw_results
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
csv_path_1 = r'./results/gene/2_0/plsr_ct.csv'
csv_paht_2 = r'./results/mixedLM/gene/plsr_ct.csv'
df1 = pd.read_csv(csv_path_1)
df2 = pd.read_csv(csv_paht_2)
df = df1.merge(df2, on='gene_name')
values1 = df['pls1_x']
values2 = df['pls1_y']

x_label = 'CT Meta gene loading'
y_label = 'CT MMSE gene loading'

fontsize=18
r, p = pearsonr(values1, values2)
df = pd.DataFrame(
                {x_label: values1,
                y_label: values2,
                })

g = sns.jointplot(x=x_label, y=y_label, data=df, kind="hex")
sns.regplot(x=x_label, y=y_label, data=df, scatter=False,
            ci=None, ax=g.ax_joint)
g.ax_marg_x.remove()
g.ax_marg_y.remove()

g.ax_joint.tick_params(axis='both', which='major', labelsize=fontsize)
g.set_axis_labels(x_label, y_label, fontsize=fontsize)
"""
for item in g.ax_joint.get_xticklabels():
    item.set_rotation(45)
"""
every_nth = 2
for n, label in enumerate(g.ax_joint.xaxis.get_ticklabels()):
    if n % every_nth != 0:
        label.set_visible(False)

plt.title('r={:.2f}, p={:.2e}'.format(r, p), 
          fontdict={'fontsize': fontsize})
plt.show()
plt.close()



In [None]:
import pandas as pd
df1 = pd.read_csv(r'./results\mixedLM\gene\webGestalt\GO_Bio_05_gmv\enrichment_results.csv', index_col=1)
df2 = pd.read_csv(r'./results\mixedLM\gene\webGestalt\GO_Bio_05_ct\enrichment_results.csv', index_col=1)
df = df1.merge(df2, how='outer', left_on='description', right_on='description',
          suffixes=('_gmv', '_ct'))
df = df.fillna(10)
df = df.sort_values('FDR_gmv', ascending=False)
import numpy as np
import matplotlib.pyplot as plt

fontsize=18
alpha=0.6
fig_width=10
fig_height=5

y = range(len(df.index), 0, -1)
values = -np.log10(df['FDR_gmv'].to_numpy())
sub_values = -np.log10(df['FDR_ct'].to_numpy())
values[values==np.inf] = 4
sub_values[sub_values==np.inf] = 4
values[values==-1] = 0
sub_values[sub_values==-1] = 0
y_labels = df.index.to_numpy().tolist()

fig, ax = plt.subplots()
fig = plt.figure(figsize=(fig_width, fig_height))
ax = fig.add_axes([0, 0, 1, 1])

ax.scatter(x=y, y=values, label='GMV', alpha=alpha)
ax.vlines(x=y, ymin=0, ymax=values)
ax.scatter(x=y, y=sub_values, label='CT', alpha=alpha)
ax.vlines(x=y, ymin=0, ymax=sub_values, color='C1')

ax.set_xticks(y)
ax.set_xticklabels(y_labels, {'fontsize': 14}, ha='right')
ax.set_ylabel('-log10 FDR q-value', fontsize=fontsize)
ax.set_xlabel('Gene Sets', fontsize=fontsize)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

for item in ax.get_xticklabels():
    item.set_rotation(90)

plt.draw()

ax.legend()
plt.show()
plt.close()

# LMM supplement

In [None]:
import mixed_lm
import datasets
import warnings
warnings.filterwarnings('ignore')

centers = datasets.load_centers_all()
all_df_gmv = mixed_lm.load_data(centers, csv_prefix='roi_gmv/{}.csv')
# age* diagnose
#mixed_lm.mixed_lm_supp1(all_df_gmv, out_dir='./results/supp/mixedLM/gmv_aged')
# gender* diagnose
#mixed_lm.mixed_lm_supp2(all_df_gmv, out_dir='./results/supp/mixedLM/gmv_genderd')
# age^2
mixed_lm.mixed_lm_supp3(all_df_gmv, out_dir='./results/supp/mixedLM/gmv_age2')

all_df_ct = mixed_lm.load_data(centers, csv_prefix='roi_ct/{}.csv')
mixed_lm.mixed_lm_supp1(all_df_ct, out_dir='./results/supp/mixedLM/ct_aged', value_range=211)
#mixed_lm.mixed_lm_supp2(all_df_ct, out_dir='./results/supp/mixedLM/ct_genderd', value_range=211)
mixed_lm.mixed_lm_supp3(all_df_ct, out_dir='./results/supp/mixedLM/ct_age2', value_range=211)

In [None]:
import mixed_lm
from mask import Mask, NiiMask
# load mask to save values
mask_path = './data/mask/rBN_Atlas_246_1mm.nii'
mask = NiiMask(mask_path)
# ROI is model's eighth variable
stat_position = 8
p = 1
correction = False

# Save significant t-map
gmv_models = mixed_lm.load_models('./results/supp/mixedLM/gmv_aged')
gmv_tvalues, gmv_pvalues = mixed_lm.get_stats(gmv_models, i=stat_position, p=p, correction=correction)
mixed_lm.save_t_nii(gmv_tvalues, mask, './results/supp/mixedLM/gmv_aged.nii')

ct_models = mixed_lm.load_models('./results/supp/mixedLM/ct_aged')
ct_tvalues, ct_pvalues = mixed_lm.get_stats(ct_models, i=stat_position, p=p, correction=correction)
mixed_lm.save_t_nii(ct_tvalues, mask, './results/supp/mixedLM/ct_aged.nii')

gmv_models = mixed_lm.load_models('./results/supp/mixedLM/gmv_age2')
gmv_tvalues, gmv_pvalues = mixed_lm.get_stats(gmv_models, i=stat_position, p=p, correction=correction)
mixed_lm.save_t_nii(gmv_tvalues, mask, './results/supp/mixedLM/gmv_age2.nii')

ct_models = mixed_lm.load_models('./results/supp/mixedLM/ct_age2')
ct_tvalues, ct_pvalues = mixed_lm.get_stats(ct_models, i=stat_position, p=p, correction=correction)
mixed_lm.save_t_nii(ct_tvalues, mask, './results/supp/mixedLM/ct_age2.nii')

# list Abeta/FDG detail

In [None]:
import pet_fdg
import seaborn as sns
from mask import NiiMask

subject_df = pet_fdg.create_subject_df()
"""
mask_path = './data/mask/BN_Atlas_246_1mm.nii'
mask = NiiMask(mask_path)
pet_fdg.create_sum(subject_df, mask)
"""

def label_gender(row):
    if row['male'] == 1:
        return 'Male'
    if row['male'] == 0:
        return 'Female'

def label_label(row):
    if row['Label'] == 0:
        return 'NC'
    if row['Label'] == 1:
        return 'MCI'
    if row['Label'] == 2:
        return 'AD'

subject_df['gender'] = subject_df.apply(lambda row: label_gender(row), axis=1)
subject_df['Label'] = subject_df.apply(lambda row: label_label(row), axis=1)
subject_df['Center'] = subject_df['center']

subject_df = subject_df.sort_values(by=["Label"], ascending=False)

sns.set(font_scale = 1.4)
sns.set_style("white")
ax = sns.boxenplot(x='Label', y='MMSE', hue='gender', data=subject_df, palette="Set2")

ax.legend(loc=(1.01,0))
plt.show()
plt.close()
sns.set_style("white")
ax2 = sns.countplot(x="Label", data=subject_df, hue='gender', palette="Set2")
ax2.legend(loc=(1.01,0))
plt.show()
plt.close()

print(subject_df.groupby(by=["Label"]).count())

In [None]:
subject_df

In [None]:
len(subject_df)

In [None]:
import abeta_pet
import seaborn as sns
import matplotlib.pyplot as plt

subject_df = abeta_pet.create_subject_df()

def label_gender(row):
    if row['male'] == 1:
        return 'Male'
    if row['male'] == 0:
        return 'Female'

def label_label(row):
    if row['Label'] == 0:
        return 'NC'
    if row['Label'] == 1:
        return 'MCI'
    if row['Label'] == 2:
        return 'AD'

subject_df = subject_df.sort_values('Label')
subject_df['gender'] = subject_df.apply(lambda row: label_gender(row), axis=1)
subject_df['Label'] = subject_df.apply(lambda row: label_label(row), axis=1)
subject_df['Center'] = subject_df['center']


sns.set(font_scale = 1.4)
sns.set_style("white")
ax = sns.boxenplot(x='Label', y='MMSE', hue='gender', data=subject_df, palette="Set2")

ax.legend(loc=(1.01,0))
plt.show()
plt.close()
sns.set_style("white")
ax2 = sns.countplot(x="Label", data=subject_df, hue='gender', palette="Set2")
ax2.legend(loc=(1.01,0))
plt.show()
plt.close()

print(subject_df.groupby(by=["Label"]).count())

In [None]:
subject_df.head()

In [None]:
len(subject_df)

In [None]:
# 5HT1B gene PLSR

In [None]:
n_perm_boot = 1
n_components = 2
out_dir ='./results/gene/5HT1B'


df = pd.read_csv('./data/PET/masked_mean/5HT1b_P943_HC22.csv', index_col=0)
pet_values_dict = {}
for index, row in df.iterrows():
    pet_values_dict[index] = row['Volume']

plsr = gene_analysis.plsr(pet_values_dict, n_components=n_components,
                                n_perm=n_perm_boot, n_boot=n_perm_boot,
                                out_path=os.path.join(out_dir, 'plsr.csv'))

In [None]:
df = pd.read_csv('./data/PET/masked_mean/5HT1b_P943_HC22.csv', index_col=0)
df2 = pd.read_csv('./data/PET/masked_mean/5HT1a_WAY_HC36.csv', index_col=0)
draw_results.plot_correlation_joint(df.values.flatten(), df2.values.flatten())


# Voxelwise correlation (For supplementray)

In [None]:
import nibabel as nib
import numpy as np
from scipy.stats import pearsonr
def voxel_corr(path1, path2):
    nii1 = nib.load(path1)
    nii2 = nib.load(path2)
    array1 = np.array(nii1.dataobj).flatten()
    array2 = np.array(nii2.dataobj).flatten()
    array1 = np.nan_to_num(array1, nan=0, posinf=0, neginf=0)
    array2 = np.nan_to_num(array2, nan=0, posinf=0, neginf=0)
    r, p = pearsonr(array1, array2)
    return r, p



## 其他图谱

In [None]:
labels = ['2_0', '2_1', '1_0']
for label in labels:
    print(label)
    p1 = f'./results/meta/{label}/roi_gmv_removed/es.nii'
    sub_pathes = [f'./results/supp/{label}/roi_gmv_schaefer/es.nii', 
                f'./results/supp/{label}/aal/es.nii', ]
    for sub_path in sub_pathes:
        print(voxel_corr(p1, sub_path))

for label in labels:
    print(label)
    p1 = f'./results/meta/{label}/roi_ct_removed/es.nii'
    sub_pathes = [f'./results/supp/{label}/schaefer_ct/es.nii', 
                f'./results/supp/{label}/aal_ct/es.nii', ]
    for sub_path in sub_pathes:
        print(voxel_corr(p1, sub_path))

## 单中心

In [None]:
labels = ['2_0', '2_1', '1_0']
ds = [ 'mcad', 'edsd', 'adni']
print('GMV')
for label in labels:
    print(label)
    p1 = f'./results/meta/{label}/roi_gmv_removed/es.nii'
    for d in ds:
        sub_path = f'./results/supp/{d}/{label}/roi_gmv_removed/es.nii'
        print(d, voxel_corr(p1, sub_path))
print('CT')
for label in labels:
    print(label)
    p1 = f'./results/meta/{label}/roi_ct_removed/es.nii'
    for d in ds:
        sub_path = f'./results/supp/{d}/{label}/roi_ct_removed/es.nii'
        print(d, voxel_corr(p1, sub_path))


# PLSR gene ranking VS 83 gene loci

In [None]:
import pandas as pd

plsr_gene_df = pd.read_csv('./results/gene/2_0/plsr_gmv.csv', index_col=1)
loci_df = pd.read_csv('./external/gene_83.csv')

locis = loci_df['loci'].values

In [None]:
locis

In [None]:
for loci in locis:
    try:
        if abs(plsr_gene_df.loc[loci]['pls1']) > 0.0002:
            print(loci, plsr_gene_df.loc[loci]['pls1'])
    except KeyError:
        #print(f'No {loci} found')
        pass

In [None]:
import os
import pandas as pd

_dir = r'G:\workspace\AD_meta\data\PET\abeta\info'
files = os.listdir(_dir)

dfs = []
for f in files:
    first_ = f.find('_')
    label = f[:first_]
    vis = f[f.find('-')+1:f.find('_', first_+1)]
    df = pd.read_csv(os.path.join(_dir, f))
    df['Label'] = [label for _ in range(len(df))]
    df['VISCODE'] = [vis for _ in range(len(df))]
    dfs.append(df)

In [None]:
total = pd.concat(dfs, ignore_index=True)
total.to_csv('./data/PET/abeta/abeta_info.csv')