# CFTR Modulators and Nutrition

In [227]:
import pandas as pd
import numpy as np
import plotnine as pn
from tableone import TableOne
wd = '~/Dropbox/Work/CF/'
df = pd.read_csv(wd + 'Edith Zemanick/CF Nutrition/Data_Cleaned/analysis_dataset.csv')

In [228]:
# Only those on effective modulators
ivcaftor = df.loc[df['Modulator'] == 'Kalydeco','Patient ID'].unique().tolist()
trikafta = df.loc[df['Modulator'] == 'Trikafta','Patient ID'].unique().tolist()
on_effective = set(ivcaftor + trikafta)
df = df.loc[df['Patient ID'].isin(on_effective),:]
# Change others to "none"
df.loc[df['Modulator'].isin(['Orkambi','Symdeko']),'Modulator'] = "None"
# Before or after
df['Before/After'] = df['Age at Test (Years)'] < df['Age at Start (years)']
df['Before/After'].replace({True:'Before',False:'After'},inplace=True)
# At least three annual nutritional measurements prior to first modulator start date, and 
num_values = df.groupby(['Patient ID','Before/After']).apply(lambda x: x.notnull().sum())['25OH-Vitamin D']
two_measures = pd.DataFrame(num_values.filter(like = 'Before',axis = 0))
two_measures = two_measures.loc[two_measures['25OH-Vitamin D'] >= 2].index.get_level_values(0)
# At least one measurement of nutritional markers and one weight/height measurement >= three months post-modulator
time_after = df.loc[df['Age at Test (Years)'] == df['Age at Start (years)'],'Patient ID']
# Intersection
keep = set(time_after.to_list()).intersection(two_measures.to_list())
df = df[df['Patient ID'].isin(keep)]
# Group by modulators
mods = pd.DataFrame(df.loc[df['Modulator'] != "None"].groupby("Patient ID")['Modulator'].agg(['unique']))
mods.columns = ['Group']
df = df.merge(mods,on=['Patient ID'])
df['First Modulator'] = [l.tolist()[0] for l in df['Group']]
df['Group'] = ['/'.join(map(str, l)) for l in df['Group']]
# Genotype group
geno = pd.read_excel(wd+'CF Info/Sweat Tests from Internal Database_genotype class.xlsx',usecols=['PatientID','genoRisk'])
df = df.merge(geno,left_on=['Patient ID'],right_on=['PatientID'])
# Demographics
demo = pd.DataFrame(df.groupby('Patient ID').first())

# Table 1: Participant Characteristics

In [229]:
columns = ['Age at Start (years)','Sex','genoRisk']
categorical = ['Sex','genoRisk']
groupby = 'First Modulator'
TableOne(demo, columns, categorical, groupby)



Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by First Modulator,Grouped by First Modulator,Grouped by First Modulator,Grouped by First Modulator
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,Kalydeco,Trikafta
n,,,57,9,48
"Age at Start (years), mean (SD)",,0.0,10.3 (2.6),11.5 (3.4),10.1 (2.5)
"Sex, n (%)",Female,1.0,25 (44.6),3 (33.3),22 (46.8)
"Sex, n (%)",Male,,31 (55.4),6 (66.7),25 (53.2)
"genoRisk, n (%)",High,10.0,43 (91.5),4 (50.0),39 (100.0)
"genoRisk, n (%)",Low,,4 (8.5),4 (50.0),


In [217]:
df[]

0        Symdeko
1        Symdeko
2        Symdeko
3        Symdeko
4        Symdeko
          ...   
4347    Trikafta
4348    Trikafta
4349    Trikafta
4350    Trikafta
4351    Trikafta
Name: First Modulator, Length: 4352, dtype: object

In [189]:
df['Group']

0       [Symdeko, Trikafta]
1       [Symdeko, Trikafta]
2       [Symdeko, Trikafta]
3       [Symdeko, Trikafta]
4       [Symdeko, Trikafta]
               ...         
4347             [Trikafta]
4348             [Trikafta]
4349             [Trikafta]
4350             [Trikafta]
4351             [Trikafta]
Name: Group, Length: 4352, dtype: object

In [204]:
g.tolist()

"['Symdeko', 'Trikafta']"