# CFTR Modulators and Nutrition

In [351]:
import pandas as pd
import numpy as np
import plotnine as pn
from tableone import TableOne
wd = '~/Dropbox/Work/CF/'
df = pd.read_csv(wd + 'Edith Zemanick/CF Nutrition/Data_Cleaned/analysis_dataset.csv')

In [352]:
# Only those on effective modulators
ivcaftor = df.loc[df['Modulator'] == 'Kalydeco','Patient ID'].unique().tolist()
trikafta = df.loc[df['Modulator'] == 'Trikafta','Patient ID'].unique().tolist()
on_effective = set(ivcaftor + trikafta)
df = df.loc[df['Patient ID'].isin(on_effective),:]
# Change others to "none"
df.loc[df['Modulator'].isin(['Orkambi','Symdeko']),'Modulator'] = "None"
# Before or after
df['Before/After'] = df['Age at Test (Years)'] < df['Age at Start (years)']
df['Before/After'].replace({True:'Before',False:'After'},inplace=True)
# Count number of measures 
num_values = pd.DataFrame(df.groupby(['Patient ID','Before/After']).apply(lambda x: x.notnull().sum())['25OH-Vitamin D'])
two_measures = pd.DataFrame(num_values.filter(like = 'Before',axis = 0))
num_values.columns = ['Vitamin Measures']
num_values.reset_index(inplace=True)
num_values = num_values.pivot(index='Patient ID',columns='Before/After', values='Vitamin Measures')
num_values.reset_index(inplace=True)
num_values.columns = ['Patient ID', 'Vit. Measures After', 'Vit. Measures Before']
df = df.merge(num_values,on=['Patient ID'])
# At least two annual nutritional measurements prior to first modulator start date
two_measures = two_measures.loc[two_measures['25OH-Vitamin D'] >= 2].index.get_level_values(0)
# At least one measurement of nutritional markers and one weight/height measurement >= three months post-modulator
time_after = df.loc[(df['Age at Test (Years)'] >= (df['Age at Start (years)']+0.25)) & (df['25OH-Vitamin D'].notnull()),'Patient ID']
# Intersection
keep = set(time_after.to_list()).intersection(two_measures.to_list())
df = df[df['Patient ID'].isin(keep)]
# Group by modulators
mods = pd.DataFrame(df.loc[df['Modulator'] != "None"].groupby("Patient ID")['Modulator'].agg(['unique']))
mods.columns = ['Group']
df = df.merge(mods,on=['Patient ID'])
df['First Modulator'] = [l.tolist()[0] for l in df['Group']]
# Genotype group
geno = pd.read_excel(wd+'CF Info/Sweat Tests from Internal Database_genotype class.xlsx',usecols=['PatientID','genoRisk'])
geno.columns = ['Patient ID','genoRisk']
df = df.merge(geno,on=['Patient ID'])
# Time prior to first modulator
df['Years to Start'] = df['Age at Test (Years)'] - df['Age at Start (years)']
years = pd.DataFrame(df[['Patient ID','Before/After','Years to Start']].groupby(['Patient ID','Before/After']).apply(lambda x: abs(x).max())['Years to Start'])
years.reset_index(inplace=True)
years = years.pivot(index='Patient ID',columns='Before/After', values='Years to Start')
years.reset_index(inplace=True)
years.columns = ['Patient ID', 'Max. Years From Start', 'Max. Years to Start']
df = df.merge(years,on=['Patient ID'])
# Demographics
demo = pd.DataFrame(df.groupby('Patient ID').first())

# Table 1: Participant Characteristics

In [353]:
columns = ['Age at Start (years)','Sex','genoRisk',\
    'Vit. Measures After', 'Vit. Measures Before',\
        'Max. Years to Start', 'Max. Years From Start']
categorical = ['Sex','genoRisk']
groupby = 'First Modulator'
not_normal = [c for c in columns if c not in categorical]
min_max = [c for c in columns if "Max." in c]
TableOne(demo, columns, categorical, groupby,nonnormal = not_normal,min_max=min_max)



Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by First Modulator,Grouped by First Modulator,Grouped by First Modulator,Grouped by First Modulator
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,Kalydeco,Trikafta
n,,,47,7,40
"Age at Start (years), median [Q1,Q3]",,0.0,"12.9 [12.1,13.9]","12.5 [12.4,13.2]","13.0 [12.1,14.1]"
"Sex, n (%)",Female,0.0,24 (51.1),3 (42.9),21 (52.5)
"Sex, n (%)",Male,,23 (48.9),4 (57.1),19 (47.5)
"genoRisk, n (%)",High,2.0,40 (88.9),4 (57.1),36 (94.7)
"genoRisk, n (%)",Low,,5 (11.1),3 (42.9),2 (5.3)
"Vit. Measures After, median [Q1,Q3]",,0.0,"2.0 [1.0,3.0]","3.0 [2.0,3.0]","2.0 [1.0,2.2]"
"Vit. Measures Before, median [Q1,Q3]",,0.0,"5.0 [3.0,6.0]","5.0 [4.0,5.0]","5.0 [3.0,6.0]"
"Max. Years to Start, median [min,max]",,0.0,"11.9 [4.8,14.0]","11.1 [9.5,13.2]","12.0 [4.8,14.0]"
"Max. Years From Start, median [min,max]",,0.0,"1.9 [0.3,3.9]","1.9 [0.5,3.4]","1.9 [0.3,3.9]"


# Table 2: Vitamin measures before and after modulator initiation

In [347]:
for c in ['VitaminA_Retinol', '25OH-Vitamin D','Vitamin E Alpha', 'Vitamin E Gamma','BMI','FEV1 % pred']:
    t = df.loc[(df[c].notnull()) & ((df['Years to Start'] < 0) | (df['Years to Start'] > 0.25))].copy()
    t['Years Abs.'] = [abs(y) for y in t['Years to Start']]
    t = t.loc[t.groupby(['Patient ID','Before/After'])['Years Abs.'].idxmin()]

In [348]:
c = 'VitaminA_Retinol'

In [387]:
t = df.loc[(df[c].notnull()) & ((df['Years to Start'] < 0) | (df['Years to Start'] > 0.25))].copy()
t['Years Abs.'] = [abs(y) for y in t['Years to Start']]
t = t.loc[t.groupby(['Patient ID','Before/After'])['Years Abs.'].idxmin()]