In [42]:
import pandas as pd
import numpy as np
from types import NoneType
wd = '/Users/timvigers/Library/CloudStorage/OneDrive-TheUniversityofColoradoDenver/CF/Edith Zemanick/CF Nutrition/'

In [43]:
# Import
# Modulator dates
mods = pd.read_excel(
    wd+"Data_Raw/Report CFTR Modulators_throughDecember2021.xlsx")
mods.sort_values(by=["Patient ID", "Earliest Date in Registry"],
                 ascending=True, inplace=True)
mods = mods[mods['Earliest Date in Registry'] != mods['Last Date in Registry']]
# vitemins
vit = pd.read_excel(
    wd+"Data_Raw/Report Vitamins_throughDecember2021_revised20220712.xlsx")
# Add FEV information
fev = pd.read_excel(wd+"Data_Raw/Report_PFTs_ThroughDecember2021.xlsx")
fev = fev.rename(columns={'PatientID': 'Patient ID'})
df = pd.merge(vit, fev, how="outer", on=["Patient ID", "Date"])


In [44]:
df.sort_values(by=["Patient ID","Date"],ascending=True, inplace=True)
# Get modulator at each vitamin measure
m = []
p = []
for r in range(df.shape[0]):
    # Get ID
    id = df.iloc[r,0]
    # If no mod information, return NA
    if id not in mods["Patient ID"].unique():
        m.append(np.nan)
        p.append(np.nan)
        continue
    # Get row date
    date = df.iloc[r,2]
    # Get modulator dates by ID
    mod = mods.loc[mods["Patient ID"] == id,:]
    dates = mod.iloc[:,3]
    # Check which dates the row is after
    after = np.where(date > dates)[0]
    # If not after any, return no modulator
    if after.shape[0] == 0: 
        m.append("None")
    else:
        m.append(mod.iloc[max(after),1])
    # Check if pre-/post-effective modulator
    eff_mod = mod[(mod["Modulator"] == "Kalydeco") | (mod["Modulator"] == "Trikafta")]
    if eff_mod.shape[0] > 0: 
        start = eff_mod.iloc[0,3]
        if date <= start:
            p.append("Pre")
        else:
            p.append("Post")
    else:
        p.append(np.nan)
df["Modulator"] = m
df["Pre-/Post-Effective Modulator"] = p
# Get pre-/post- informat
# Drop those without modulator info
df.dropna(subset="Modulator",inplace=True)
# Write for checking
df.to_csv(wd+"Data_Cleaned/all.csv",index=False)

In [157]:
# Convert to numeric
cols = ["VitaminA_Retinol", "25OH-Vitamin D", "Vitamin E Alpha",
        "Vitamin E Gamma", "FEV1 % pred", "BMI percentile (CDC)"]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
# For each person, average all their pre-effective mod values
# but only use the most recent vitamin measure.
# If they have FEV1 or BMI within a month of the vitamin measure,
# use that otherwise missing
# Place to store results
analysis = {"ID": [], "Timepoint": [], "Vitamin A": [], "Vitamin D": [],
            "Vitamin E Alpha": [], "Vitamin E Gamma": [], "ppFEV1": [], "BMI Percentile": []}
# Iterate through by ID
ids = df["Patient ID"].unique().tolist()
for id in ids:
    # Subset
    pre = df[(df["Patient ID"] == id) & (
        df["Pre-/Post-Effective Modulator"] == "Pre")]
    post = df[(df["Patient ID"] == id) & (
        df["Pre-/Post-Effective Modulator"] == "Post")]
    # Pre
    analysis["ID"].append(id)
    analysis["Timepoint"].append("Pre")
    analysis["Vitamin A"].append(pre["VitaminA_Retinol"].mean(skipna=True))
    analysis["Vitamin D"].append(pre["25OH-Vitamin D"].mean(skipna=True))
    analysis["Vitamin E Alpha"].append(
        pre["Vitamin E Alpha"].mean(skipna=True))
    analysis["Vitamin E Gamma"].append(
        pre["Vitamin E Gamma"].mean(skipna=True))
    analysis["ppFEV1"].append(pre["FEV1 % pred"].mean(skipna=True))
    analysis["BMI Percentile"].append(
        pre["BMI percentile (CDC)"].mean(skipna=True))
    # Post
    analysis["ID"].append(id)
    analysis["Timepoint"].append("Post")
    # Most recent vitamin A
    vita = post["VitaminA_Retinol"].last_valid_index()
    if (type(vita) == NoneType):
        vita_value = np.nan
        vita_date = np.nan
    else:
        vita_value = post["VitaminA_Retinol"].loc[vita]
        vita_date = post["Date"].loc[vita]
    # Vitamin D
    vitd = post["25OH-Vitamin D"].last_valid_index()
    if (type(vitd) == NoneType):
        vitd_value = np.nan
        vitd_date = np.nan
    else:
        vitd_value = post["25OH-Vitamin D"].loc[vitd]
        vitd_date = post["Date"].loc[vitd]
    # Vitamin e
    # Alpha
    vitea = post["Vitamin E Alpha"].last_valid_index()
    if (type(vitea) == NoneType):
        vitea_value = np.nan
        vitea_date = np.nan
    else:
        vitea_value = post["Vitamin E Alpha"].loc[vitea]
        vitea_date = post["Date"].loc[vitea]
    # Gamma
    viteg = post["Vitamin E Gamma"].last_valid_index()
    if (type(viteg) == NoneType):
        viteg_value = np.nan
        viteg_date = np.nan
    else:
        viteg_value = post["Vitamin E Gamma"].loc[viteg]
        viteg_date = post["Date"].loc[viteg]
    # Check if there is a BMI value within 1 month
    bmi = post.dropna(subset="BMI percentile (CDC)")
    if(bmi.shape[0] > 0):
        bmi = bmi[(abs((bmi["Date"] - vita_date).dt.days) <= 30) | (abs((bmi["Date"] - vitd_date).dt.days) <= 30) | (abs((bmi["Date"] - vitea_date).dt.days) <= 30)]
        bmi_ind = bmi["BMI percentile (CDC)"].last_valid_index()
        bmi_value = post["BMI percentile (CDC)"].loc[bmi_ind]
        bmi_date = post["Date"].loc[bmi_ind]
    else:
        bmi_value = np.nan
        bmi_date = np.nan
    # Check if there is a fev value within 1 month
    fev = post.dropna(subset='FEV1 % pred')
    if(fev.shape[0]>0):
        fev = fev[(abs((fev["Date"] - vita_date).dt.days) <= 30) | (abs((fev["Date"] - vitd_date).dt.days) <= 30) | (abs((fev["Date"] - vitea_date).dt.days) <= 30)]
        fev_ind = fev['FEV1 % pred'].last_valid_index()
        fev_value = post['FEV1 % pred'].loc[fev_ind]
        fev_date = post["Date"].loc[fev_ind]
    else:
        fev_value = np.nan
        fev_date = np.nan
    # Add to results
    analysis["Vitamin A"].append(vita_value)
    analysis["Vitamin D"].append(vitd_value)
    analysis["Vitamin E Alpha"].append(vitea_value)
    analysis["Vitamin E Gamma"].append(viteg_value)
    analysis["ppFEV1"].append(fev_value)
    analysis["BMI Percentile"].append(bmi_value)
# Convert to DF
analysis = pd.DataFrame(analysis)
analysis.to_csv(wd+"Data_Cleaned/analysis.csv", index=False)


4
13


KeyError: None

In [140]:
analysis = {"ID": [], "Timepoint": [], "Vitamin A": [], "Vitamin D": [],
            "Vitamin E Alpha": [], "Vitamin E Gamma": [], "ppFEV1": [], "BMI Percentile": []}
id = 13
pre = df[(df["Patient ID"] == id) & (df["Pre-/Post-Effective Modulator"] == "Pre")]
post = df[(df["Patient ID"] == id) & (df["Pre-/Post-Effective Modulator"] == "Post")]

In [141]:
analysis["ID"].append(id)
analysis["Timepoint"].append("Post")


In [153]:
# Most recent vitamin A
vita = post["VitaminA_Retinol"].last_valid_index()
if (type(vita) == NoneType):
    vita_value = np.nan
    vita_date = np.nan
else:
    vita_value = post["VitaminA_Retinol"].loc[vita]
    vita_date = post["Date"].loc[vita]
# Vitamin D
vitd = post["25OH-Vitamin D"].last_valid_index()
if (type(vitd) == NoneType):
    vitd_value = np.nan
    vitd_date = np.nan
else:
    vitd_value = post["25OH-Vitamin D"].loc[vitd]
    vitd_date = post["Date"].loc[vitd]
# Vitamin e
# Alpha
vitea = post["Vitamin E Alpha"].last_valid_index()
if (type(vitea) == NoneType):
    vitea_value = np.nan
    vitea_date = np.nan
else:
    vitea_value = post["Vitamin E Alpha"].loc[vitea]
    vitea_date = post["Date"].loc[vitea]
# Gamma
viteg = post["Vitamin E Gamma"].last_valid_index()
if (type(viteg) == NoneType):
    viteg_value = np.nan
    viteg_date = np.nan
else:
    viteg_value = post["Vitamin E Gamma"].loc[viteg]
    viteg_date = post["Date"].loc[viteg]
# Check if there is a BMI value within 1 month
bmi = post.dropna(subset="BMI percentile (CDC)")
if(bmi.shape[0] > 0):
    bmi = bmi[(abs((bmi["Date"] - vita_date).dt.days) <= 30) | (abs((bmi["Date"] - vitd_date).dt.days) <= 30) | (abs((bmi["Date"] - vitea_date).dt.days) <= 30)]
    bmi_ind = bmi["BMI percentile (CDC)"].last_valid_index()
    bmi_value = post["BMI percentile (CDC)"].loc[bmi_ind]
    bmi_date = post["Date"].loc[bmi_ind]
else:
    bmi_value = np.nan
    bmi_date = np.nan
# Check if there is a fev value within 1 month
fev = post.dropna(subset='FEV1 % pred')
if(fev.shape[0]>0):
    fev = fev[(abs((fev["Date"] - vita_date).dt.days) <= 30) | (abs((fev["Date"] - vitd_date).dt.days) <= 30) | (abs((fev["Date"] - vitea_date).dt.days) <= 30)]
    fev_ind = fev['FEV1 % pred'].last_valid_index()
    fev_value = post['FEV1 % pred'].loc[fev_ind]
    fev_date = post["Date"].loc[fev_ind]
else:
    fev_value = np.nan
    fev_date = np.nan
    

KeyError: None

In [147]:
post

Unnamed: 0,Patient ID,Age at Test (years),Date,VitaminA_Retinol,25OH-Vitamin D,Vitamin E Alpha,Vitamin E Gamma,Age At PFT (years),Sex,Location,...,BMI,BMI percentile (CDC),FVC,FVC % pred,FEV1,FEV1 % pred,FEF25-75,FEF25-75 % pred,Modulator,Pre-/Post-Effective Modulator
16,13,18.844627,2020-03-11 16:22:11,20.0,26.0,4.3,,,,,...,,,,,,,,,Trikafta,Post
17,13,19.206023,2020-07-21 14:39:25,,28.0,,,,,,...,,,,,,,,,Trikafta,Post
2796,13,,2020-10-14 00:00:00,,,,,19.452055,Female,Clinic,...,25.105832,79.86,4.9,119.0,3.5,97.0,2.53,64.0,Trikafta,Post


In [155]:
bmi.shape[0] > 0

False

In [119]:
fev_value

113.0

In [120]:
bmi_value

83.17

In [52]:
bmi = post[vita_date <= post["Date"]]

In [41]:
vita_date

Timestamp('2021-02-22 00:00:00')

In [53]:
bmi

Unnamed: 0,Patient ID,Age at Test (years),Date,VitaminA_Retinol,25OH-Vitamin D,Vitamin E Alpha,Vitamin E Gamma,Age At PFT (years),Sex,Location,...,BMI,BMI percentile (CDC),FVC,FVC % pred,FEV1,FEV1 % pred,FEF25-75,FEF25-75 % pred,Modulator,Pre-/Post-Effective Modulator
11,4,15.238877,2021-02-22,57.0,64.0,11.0,,15.249315,Male,Clinic,...,23.268954,83.17,4.66,108.0,4.21,113.0,4.85,119.0,Trikafta,Post
2725,4,,2021-06-28,,,,,15.594521,Male,Clinic,...,23.69123,84.07,5.05,117.0,4.3,115.0,4.44,109.0,Trikafta,Post


In [None]:
from datetime import timedelta


timedelta