In [None]:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import pearsonr
from statsmodels.stats.multitest import multipletests
import statsmodels.api as sm

from pathlib import Path
Path("../out/").mkdir(parents=True, exist_ok=True)

# Data loading

Loading participant master data file, linking food record data, and filtering down to records with complete information.

## Participants master

In [None]:
participants_master = pd.read_excel(
    '../data/main/participants_master.xlsx',
    index_col='id'
)

## Food intake records

In [None]:
intakes_per_person = pd.read_excel(
    '../data/main/intakes_detailed_with_asep.xlsx',
    index_col='id'
)

In [None]:
intakes_per_person

In [None]:
master_data = participants_master.merge(
    intakes_per_person,
    left_index=True,
    right_index=True,    
)

## Filtering for valid data points

In [None]:
fltr = (master_data.diet_main.notnull()) & (master_data.fr_days>=3)
participants = master_data[fltr]

children = participants[participants.age_q < 3650] #choosing 10y as cutoff as study is of preschool children

# Analysis

In [None]:
#reused in several plots
diet_classes=['vegan','vegetarian','mixed_diet']

In [None]:
df = children[['ep-FAPU','e-FASAT','FIBC_per_e','CHOLE_per_e','asep']]

table = []
for c in df.columns[:-1]: #all but last column, i.e. asep
    pr, pp = pearsonr(df.asep,df[c])
    #print(pp)
    table.append(['asep',c,pr,pp])

df = pd.DataFrame(table,columns=['x','y','pearson_r','pearson_p'])
df['fdr_bh_0_05'] = multipletests(df['pearson_p'], alpha=0.05, method='fdr_bh')[0]
df = df.sort_values(by='pearson_p')
#df = df.round(3)
#display(df)
df.to_excel('../out/correlation_macro_intakes_children.xlsx')

In [None]:
df = children.groupby('diet_main').asep.describe()
display(df)
df.to_excel('../out/asep_describe_by_diet_children.xlsx')

# Figures

In [None]:
scale=10
sns.set_theme(style='white',font_scale=2)
sns.set_context("paper")

tricolor_palette=['#55a868','#dd8452','#4c72b0']

In [None]:
from string import ascii_lowercase

markers=['o','s','^']

def regplot_asep(df,ax,y,y_label,set_xlabel=True):
    ax.set_xlim(-0.01, df.asep.max()+0.01)
    sns.regplot(ax=ax, x='asep', y=y, scatter=False, data=df)
    sns.scatterplot(
        ax=ax, 
        x='asep', 
        y=y, 
        hue='diet_main',
        hue_order=diet_classes,
        style='diet_main',
        markers=markers,
        data=df, 
        s=30*scale,
        palette=tricolor_palette,
        legend=False
    )
    if set_xlabel:
        ax.set_xlabel('Animal source energy proportion')
    else:
        ax.set_xlabel(None)
    ax.set_ylabel(y_label)


from matplotlib.lines import Line2D
def splats(n):
    return [
        Line2D(
            [0], 
            [0], 
            marker=markers[i],
            color='w',
            markerfacecolor=tricolor_palette[i],
            markersize=15
        )
        for i in range(0,n)
    ]


def fig_of_regplots(df,ys,cols=2):

    plt.close()
    sns.set_theme(style='white',font_scale=2)
    
    rows = int(len(ys)/cols)+(len(ys)%cols > 0)
    h = rows*scale
    if rows > 1: h+=2
    w = cols*scale+2

    fig, axs = plt.subplots(rows,cols,figsize=(w,h))
        
    if rows > 1:
        axs_flat = [ax for row in axs for ax in row]
    else:
        axs_flat = axs

    l = list(zip(ys.keys(),ys.values(),axs_flat))
    i = 0
    for t in l:
        ax = t[2]
        y = t[0]
        regplot_asep(df,ax,y,t[1],set_xlabel=True)
        i+=1

    for ax in axs_flat[len(ys):]:
        fig.delaxes(ax)
        
    from matplotlib.lines import Line2D
    
    plt.figlegend(splats(3),diet_classes,loc='right')
        
    return fig, axs

In [None]:
# a multi-panel graph of macro intakes linear model correlation with asep
sns.set_context("paper")

ys = {
    'ep-FAT': 'Total Fat (E%)',
    'ep-FAPU': 'Polyunsaturated fatty acids (E%)',
    'ep-FASAT': 'Saturated fatty acids (E%)',
    'ep-CHOAVL': 'Carbohydrates (E%)',
    'ep-PROT': 'Protein (E%)',
    'FIBC_per_e': 'Fiber (g/MJ)',
}

fig, axs = fig_of_regplots(children,ys,cols=3)

#plt.show()
plt.savefig('../out/macro_intakes_children.png')
plt.close()

In [None]:
# a multi-panel graph of fat metabolism intakes linear model correlation with asep
sns.set_context("paper")

ys = {
    'ep-FASAT': 'Saturated fatty acids (E%)',
    'ep-FAPU': 'Polyunsaturated fatty acids (E%)',
    'CHOLE_per_e': 'Cholesterol (mg/MJ)',
    'FIBC_per_e': 'Fiber (g/MJ)',    
}

fig, axs = fig_of_regplots(children,ys,cols=2)

#plt.show()
plt.savefig('../out/fat_fibc_intakes_children.png')
plt.close()

In [None]:
# a multi-panel graph of micronutrient intakes linear model correlation with asep
sns.set_context("paper")

ys = {
    'F20D5N3_daily_mean': 'EPA (mg/d)',
    'F22D6N3_daily_mean': 'DHA (mg/d)',
    'F18D2CN6_daily_mean': 'Linoleic acid (mg/d)',
    'F18D3N3_daily_mean': 'Alpha-linoleic acid (mg/d)',
    'FE_daily_mean': 'Iron (mg/d)',
    'CA_daily_mean': 'Calcium (mg/d)',
    'FOL_daily_mean': 'Folate (HPLC) (μg/d)',
    'VITC_daily_mean': 'Ascorbic acid (mg/d)',
}

fig, axs = fig_of_regplots(children,ys,cols=3)

#plt.show()
plt.savefig('../out/micro_intakes_children.png')
plt.close()

In [None]:
#histogram of asep values in buckets of 5% and coloured according to reported diet

fg = sns.displot(
    children,
    x='asep',
    binwidth=0.05,
    hue='diet_main',
    hue_order=diet_classes,
    multiple='stack',
    height=scale,
    palette=tricolor_palette
)
fg.axes[0,0].set_xlabel('Animal source energy proportion')
fg.axes[0,0].set_ylabel('Number of participants')

plt.savefig(f'../out/displot_asep_diet_class_children.png')
plt.close()