In [1]:
import pandas as pd
import numpy as np
import re

# Import EAT Dataset

In [2]:
eat_working = pd.read_excel("Data/EAT_Study/EAT study data 2019_2022-10-12_13-13-35_working.xlsx")

In [3]:
eat_working.shape

(1307, 985)

In [4]:
eat_working.head()

Unnamed: 0,Participant ID,Visit,ethnicity,mother's age at enrolment (years),age mum left full time education,family history of asthma,mother has asthma,father has asthma,family history of eczema,mother has eczema,...,Wheat week 11 consumption in grams,Fish week 11 consumption in grams,Sesame week 11 consumption in grams,Egg week 12 consumption in grams,Milk week 12 consumption in grams,Peanut week 12 consumption in grams,Wheat week 12 consumption in grams,Fish week 12 consumption in grams,Sesame week 12 consumption in grams,case ID
0,1.0,,0.0,32.0,2.0,1.0,1.0,0.0,1.0,1.0,...,,,,,,,,,,1.0
1,2.0,,0.0,27.0,2.0,1.0,1.0,0.0,1.0,1.0,...,4.0,4.0,4.0,1.0,4.0,4.0,4.0,4.0,4.0,2.0
2,3.0,,0.0,32.0,2.0,1.0,0.0,0.0,1.0,0.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0
3,4.0,,3.0,36.0,2.0,1.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,4.0
4,5.0,,0.0,31.0,3.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0


# List of column names to keep from full dataset 
# from "EAT study data 2019_2022-10-12_13-13-35_working"

### "study group"
- 0=Standard introduction, 1=Early introduction

### "Child's sex"
- 0=male, 1=female

### "ethnicity"
- 0/white, 
- 1/mixed, 
- 2/asian or asian british, 
- 3/black or black british, 
- 4/chinese or other ethnic group
- NA/unknown

### Wheal (mm)
- "skin prick test to peanut at 3 month clinic visit" 
- "skin prick test to peanut at 12 month clinic visit", 
- "skin prick test to peanut at 36 month clinic visit",


### Flare (mm): 
"No flare is available; we’ll have to use wheal for now."


### AGE 
AT ENROLLMENT: __3 months__ Source: "We recruited, from the general population, 1303 exclusively breast-fed infants who were 3 months of age" 

Age column will need to be assigned as data is parsed according to the clinic visit age groups.  
Infant's age is equal to the month of the visit, i.e. 3 month clinic visit = 3 month old infant

- 3 MONTH CLINIC VISIT 
- 12 MONTH CLINIC VISIT
- 36 MONTH CLINIC VISIT

Age will also need to be considered in the IgE response columns

- IgE symptoms to EIG food 4-6 months of age	     0/No, 1/Yes
- IgE symptoms to any food 4-6 months of age	     0/No, 1/Yes
- non-IgE symptoms to EIG food 4-6 months of age	 0/No, 1/Yes
- non-IgE symptoms to any food 4-6 months of age	 0/No, 1/Yes
- IgE symptoms to EIG food 4-12 months of age	     0/No, 1/Yes
- IgE symptoms to any food 4-12 months of age	     0/No, 1/Yes
- non-IgE symptoms to EIG food 4-12 months of age	 0/No, 1/Yes
- non-IgE symptoms to any food 4-12 months of age	 0/No, 1/Yes

### Peanut OFCs: 
need to figure out the age these OFCs were performed 
- "primary outcome peanut allergy (only those evaluable and within age range)" 0/no, 1/yes




In [5]:
cols_to_keep = ["case ID", 
                "study group", 
                "Child's sex",
                "ethnicity", 
                "skin prick test to peanut at 3 month clinic visit", 
                "skin prick test to peanut at 12 month clinic visit", 
                "skin prick test to peanut at 36 month clinic visit",
                "primary outcome peanut allergy (only those evaluable and within age range)"]


eat_relevant_cols_df = eat_working[cols_to_keep]



In [6]:
eat_relevant_cols_df = eat_working[cols_to_keep]

In [7]:
eat_relevant_cols_df.shape

(1307, 8)

In [8]:
eat_relevant_cols_df.head()

Unnamed: 0,case ID,study group,Child's sex,ethnicity,skin prick test to peanut at 3 month clinic visit,skin prick test to peanut at 12 month clinic visit,skin prick test to peanut at 36 month clinic visit,primary outcome peanut allergy (only those evaluable and within age range)
0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0
1,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,1.0,1.0,3.0,0.0,,0.0,0.0
4,5.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0


# Reshaping length of dataframe by creating a new row for each age clinic visit
- renaming "skin prick test to peanut at 3 month clinic visit" to be Wheal(mm) 
- renaming "primary outcome peanut allergy (only those evaluable and within age range)" to OFC Pass to match LEAP

In [9]:
# Define the columns to rename
to_rename = [
    "skin prick test to peanut at 3 month clinic visit", 
    "skin prick test to peanut at 12 month clinic visit", 
    "skin prick test to peanut at 36 month clinic visit"
]

# Create a list to hold the reshaped rows
reshaped_rows = []

# Iterate over each row in the original DataFrame
for index, row in eat_relevant_cols_df.iterrows():
    # Create a new row for each age column
    for column in to_rename:
        age = re.search(r"at (\d+)", column).group(1)
        new_column_name = f"Wheal (mm) {age}"

        reshaped_rows.append({
            "case ID": row["case ID"],
            "Age": age,
            "study group": row["study group"],
            "Child's sex": row["Child's sex"],
            "ethnicity": row["ethnicity"],
            "Wheal (mm)": row[column],
            "OFC Pass [need to determine age]": row["primary outcome peanut allergy (only those evaluable and within age range)"]
        })
        

# Create a new DataFrame from the reshaped rows
eat_reshaped_df = pd.DataFrame(reshaped_rows)

In [10]:
eat_reshaped_df.shape

(3921, 7)

In [11]:
eat_reshaped_df.head(15)

Unnamed: 0,case ID,Age,study group,Child's sex,ethnicity,Wheal (mm),OFC Pass [need to determine age]
0,1.0,3,0.0,0.0,0.0,,0.0
1,1.0,12,0.0,0.0,0.0,0.0,0.0
2,1.0,36,0.0,0.0,0.0,0.0,0.0
3,2.0,3,1.0,0.0,0.0,0.0,0.0
4,2.0,12,1.0,0.0,0.0,0.0,0.0
5,2.0,36,1.0,0.0,0.0,0.0,0.0
6,3.0,3,1.0,0.0,0.0,0.0,0.0
7,3.0,12,1.0,0.0,0.0,0.0,0.0
8,3.0,36,1.0,0.0,0.0,0.0,0.0
9,4.0,3,1.0,1.0,3.0,0.0,0.0


In [12]:
sum(eat_reshaped_df["OFC Pass [need to determine age]"].dropna())

#output is 66 which is correct if there are 22 OFC positive peanut results and 
#I duplicated the rows by 3

66.0

# Removing OFC pass value for 3months old 
since the OFC pass value was duplicated across all 3 age groups, however we know OFC was not done at the initial 3 month old intake clinic visit. Only at 12 months or 36 months. 

In [13]:
# Check if "Age" is equal to 3 and update "OFC Pass [need to determine age]" to a NaN value
# because OFCs were not done at the 3 month intake assessment
eat_reshaped_df.loc[eat_reshaped_df['Age'] == '3', 'OFC Pass [need to determine age]'] = np.nan


In [14]:
eat_reshaped_df.shape

(3921, 7)

In [15]:
eat_reshaped_df.head(15)

Unnamed: 0,case ID,Age,study group,Child's sex,ethnicity,Wheal (mm),OFC Pass [need to determine age]
0,1.0,3,0.0,0.0,0.0,,
1,1.0,12,0.0,0.0,0.0,0.0,0.0
2,1.0,36,0.0,0.0,0.0,0.0,0.0
3,2.0,3,1.0,0.0,0.0,0.0,
4,2.0,12,1.0,0.0,0.0,0.0,0.0
5,2.0,36,1.0,0.0,0.0,0.0,0.0
6,3.0,3,1.0,0.0,0.0,0.0,
7,3.0,12,1.0,0.0,0.0,0.0,0.0
8,3.0,36,1.0,0.0,0.0,0.0,0.0
9,4.0,3,1.0,1.0,3.0,0.0,


In [16]:
sum(eat_reshaped_df["OFC Pass [need to determine age]"].dropna())
# output is 44, which is correct if there are 22 peanut positive OFC results and 
# we've doubled them in the dataframe for the 12 month and 36 month rows

44.0

# One Hot Encoding for ethnicity 
note original encoding for "ethnicity" from the raw eat dataset is as follows
- 0/white, 
- 1/mixed, 
- 2/asian or asian british, 
- 3/black or black british, 
- 4/chinese or other ethnic group
- NA/unknown

In the encoding below, I set 4 to "other" instead of "asian". Thinking is to not mix the "other ethnic group" in with the "asian" category. Losing some information under "asian" for participants who identify as "chinese". To consult decision with Dr. Gryak

In [17]:
# Create dummy variables for "ethnicity"
ethnicity_dummies = pd.get_dummies(eat_reshaped_df['ethnicity'])

# Define the mapping between values and column names
ethnicity_mapping = {
    0: 'White',
    3: 'Black',
    2: 'Asian',
    4: 'Other',
    1: 'Mixed',
    np.nan: 'Unknown'
}

# Iterate over the mapping and update the DataFrame columns
for value, column_name in ethnicity_mapping.items():
    if value in ethnicity_dummies.columns:
        eat_reshaped_df[column_name] = ethnicity_dummies[value].fillna(0).astype(int)
    else:
        eat_reshaped_df[column_name] = 0

# Drop the original "ethnicity" column
eat_reshaped_df.drop('ethnicity', axis=1, inplace=True)


In [18]:
eat_reshaped_df.shape

(3921, 12)

In [19]:
eat_reshaped_df.head(15)

Unnamed: 0,case ID,Age,study group,Child's sex,Wheal (mm),OFC Pass [need to determine age],White,Black,Asian,Other,Mixed,Unknown
0,1.0,3,0.0,0.0,,,1,0,0,0,0,0
1,1.0,12,0.0,0.0,0.0,0.0,1,0,0,0,0,0
2,1.0,36,0.0,0.0,0.0,0.0,1,0,0,0,0,0
3,2.0,3,1.0,0.0,0.0,,1,0,0,0,0,0
4,2.0,12,1.0,0.0,0.0,0.0,1,0,0,0,0,0
5,2.0,36,1.0,0.0,0.0,0.0,1,0,0,0,0,0
6,3.0,3,1.0,0.0,0.0,,1,0,0,0,0,0
7,3.0,12,1.0,0.0,0.0,0.0,1,0,0,0,0,0
8,3.0,36,1.0,0.0,0.0,0.0,1,0,0,0,0,0
9,4.0,3,1.0,1.0,0.0,,0,1,0,0,0,0


# Creating Binary column from Child's Sex -> Male

note origial mapping in the raw eat dataset was:  
0=male, 1=female  

in this new encoding:  
1=male, 0=female

In [20]:
# Create "Male" column based on "Child's sex"
eat_reshaped_df['Male'] = (eat_reshaped_df["Child's sex"] == 0).astype(int)

# Drop the original "Child's sex" column
eat_reshaped_df.drop("Child's sex", axis=1, inplace=True)


In [21]:
eat_reshaped_df.shape

(3921, 12)

In [22]:
eat_reshaped_df.head(15)

Unnamed: 0,case ID,Age,study group,Wheal (mm),OFC Pass [need to determine age],White,Black,Asian,Other,Mixed,Unknown,Male
0,1.0,3,0.0,,,1,0,0,0,0,0,1
1,1.0,12,0.0,0.0,0.0,1,0,0,0,0,0,1
2,1.0,36,0.0,0.0,0.0,1,0,0,0,0,0,1
3,2.0,3,1.0,0.0,,1,0,0,0,0,0,1
4,2.0,12,1.0,0.0,0.0,1,0,0,0,0,0,1
5,2.0,36,1.0,0.0,0.0,1,0,0,0,0,0,1
6,3.0,3,1.0,0.0,,1,0,0,0,0,0,1
7,3.0,12,1.0,0.0,0.0,1,0,0,0,0,0,1
8,3.0,36,1.0,0.0,0.0,1,0,0,0,0,0,1
9,4.0,3,1.0,0.0,,0,1,0,0,0,0,0


# Exploring Wheal values and supporting documents to see if I can learn more about how to map the data accurately to the OFC pass column

page 29 from appendix:  
Participants who were skin-prick positive (greater than 0 mm) to peanut or sesame at the one year assessment had their challenge to this food deferred until the three year assessment __depending on their study group__ and consumption frequency (see Table S2 and Supplementary Methods). Participants with a double-blind, placebo- controlled positive food challenge fulfilled the primary outcome definition (Category 1 - see Supplementary Methods), regardless of whether they subsequently returned for the three year assessment. Participants who had negative challenges were non-allergic but not deemed primary outcome negative as an allergy could still develop between the one and three year assessments.  

According to the chart, the patients who did not "Meets EAT frequent consumption criteria" were deferred to taking the OFC test at the 36 month visit. "Defer challenge until 3 year assessment for infrequent/never consuming SIG and infrequent (UCV+ve) EIG"

page 8 from appendix:  
__Frequent consumption criteria__ (Figs. S2 & S3) were as follows: (1) Consuming at least one EAT portion (2 grams or more of food protein) of the food within the last month; and (2) History of ever having consumed more than three EAT portions (2 grams or more of food protein at a time) of the food.   
All other participants were __designated as infrequent__ or never consumers as appropriate. Further details are in Table S2 (scheduled challenges) and Fig. S2 (one year assessment) & Fig. S3 (three year assessment).  

Participants who were found to be skin-prick test positive to peanut or sesame at the one year assessment underwent assessment in accordance with Table S2. Skin-prick test positive frequent consumers of peanut or sesame were told to maintain their consumption at the same rate. Early introduction group participants were encouraged to consume peanut and sesame in the recommended quantities. Infrequent or never consumers of peanut or sesame were told to avoid the food until the three year assessment when their skin-prick test status was determined and challenges undertaken as designated in Table S2. The reason for deferring the peanut or sesame challenges was that there was a theoretical risk that undertaking a sesame or peanut challenge in a standard introduction infant who had been exposed to little or no sesame or peanut could induce tolerance.

# TO DO:
To determine if OFC was deferred to 3 year assessment, a patient must meet this criteria: 
- infrequent/never consuming SIG and infrequent (UCV+ve) EIG
- Participants who were skin-prick positive (greater than 0 mm) to peanut or sesame at the one year assessment 

## Names of the columns that contain the frequency of consumption:
freq of peanut butter consumed at 4 months
freq of peanut cereal consumed at 4 months
freq of peanut choc consumed at 4 months
freq of peanut butter consumed at 5 months
freq of peanut cereal consumed at 5 months
freq of peanut choc consumed at 5 months
freq of peanut butter consumed at 6 months
freq of peanut cereal consumed at 6 months
freq of peanut choc consumed at 6 months
freq of peanut butter consumed at 7 months
freq of peanut cereal consumed at 7 months
freq of peanut choc consumed at 7 months
freq of peanut butter consumed at 8 months
freq of peanut cereal consumed at 8 months
freq of peanut choc consumed at 8 months
freq of peanut butter consumed at 9 months
freq of peanut cereal consumed at 9 months
freq of peanut choc consumed at 9 months
freq of peanut butter consumed at 10 months
freq of peanut cereal consumed at 10 months
freq of peanut choc consumed at 10 months
freq of peanut butter consumed at 11 months
freq of peanut cereal consumed at 11 months
freq of peanut choc consumed at 11 months
freq of peanut butter consumed at 12 months
freq of peanut cereal consumed at 12 months
freq of peanut choc consumed at 12 months
freq of peanut butter consumed at 15 months
freq of peanut cereal consumed at 15 months
freq of peanut choc consumed at 15 months
freq of peanut butter consumed at 18 months
freq of peanut cereal consumed at 18 months
freq of peanut choc consumed at 18 months
freq of peanut butter consumed at 21 months
freq of peanut cereal consumed at 21 months
freq of peanut choc consumed at 21 months
freq of peanut butter consumed at 24 months
freq of peanut cereal consumed at 24 months
freq of peanut choc consumed at 24 months
freq of peanut butter consumed at 27 months
freq of peanut cereal consumed at 27 months
freq of peanut choc consumed at 27 months
freq of peanut butter consumed at 30 months
freq of peanut cereal consumed at 30 months
freq of peanut choc consumed at 30 months
freq of peanut butter consumed at 33 months
freq of peanut cereal consumed at 33 months
freq of peanut choc consumed at 33 months
freq of peanut butter consumed at 36 months
freq of peanut cereal consumed at 36 months
freq of peanut choc consumed at 36 months


In [23]:
eat_wheal_dropna = eat_reshaped_df["Wheal (mm)"].dropna()

In [24]:
eat_wheal_dropna.shape # dropped 934 participants who have NaN wheal values

(2987,)

In [25]:
len(eat_wheal_dropna.unique()) # there are 29 unique values for wheal mm

29

In [26]:
unique_wheal = eat_wheal_dropna.unique()

In [27]:
unique_wheal = np.sort(unique_wheal)

In [28]:
unique_wheal # wheal values range from 0 to 13.5

array([ 0.   ,  0.875,  1.   ,  1.5  ,  2.   ,  2.25 ,  2.5  ,  2.75 ,
        3.   ,  3.5  ,  4.   ,  4.5  ,  5.   ,  5.25 ,  5.5  ,  6.   ,
        6.5  ,  7.   ,  7.5  ,  8.   ,  8.5  ,  9.   ,  9.5  , 10.   ,
       10.5  , 11.5  , 12.   , 12.5  , 13.5  ])

# Creating a new dataframe for frequency testing

In [29]:
# Creating a new data frame to include the frequency information 

freq_consum_columns = ["case ID", 
                "study group", 
                "Child's sex",
                "ethnicity", 
                "skin prick test to peanut at 3 month clinic visit", 
                "skin prick test to peanut at 12 month clinic visit", 
                "skin prick test to peanut at 36 month clinic visit",
                "primary outcome peanut allergy (only those evaluable and within age range)", 
                "Peanut week 1 consumption in grams",
                "Peanut week 2 consumption in grams",
                "Peanut week 3 consumption in grams",
                "Peanut week 4 consumption in grams",
                "Peanut week 5 consumption in grams",
                "Peanut week 6 consumption in grams",
                "Peanut week 7 consumption in grams",
                "Peanut week 8 consumption in grams",
                "Peanut week 9 consumption in grams",
                "Peanut week 10 consumption in grams",
                "Peanut week 11 consumption in grams",
                "Peanut week 12 consumption in grams",
                "freq of peanut butter consumed at 4 months",
                "freq of peanut cereal consumed at 4 months",
                "freq of peanut choc consumed at 4 months",
                "freq of peanut butter consumed at 5 months",
                "freq of peanut cereal consumed at 5 months",
                "freq of peanut choc consumed at 5 months",
                "freq of peanut butter consumed at 6 months",
                "freq of peanut cereal consumed at 6 months",
                "freq of peanut choc consumed at 6 months",
                "freq of peanut butter consumed at 7 months",
                "freq of peanut cereal consumed at 7 months",
                "freq of peanut choc consumed at 7 months",
                "freq of peanut butter consumed at 8 months",
                "freq of peanut cereal consumed at 8 months",
                "freq of peanut choc consumed at 8 months",
                "freq of peanut butter consumed at 9 months",
                "freq of peanut cereal consumed at 9 months",
                "freq of peanut choc consumed at 9 months",
                "freq of peanut butter consumed at 10 months",
                "freq of peanut cereal consumed at 10 months",
                "freq of peanut choc consumed at 10 months",
                "freq of peanut butter consumed at 11 months",
                "freq of peanut cereal consumed at 11 months",
                "freq of peanut choc consumed at 11 months",
                "freq of peanut butter consumed at 12 months",
                "freq of peanut cereal consumed at 12 months",
                "freq of peanut choc consumed at 12 months",
                "freq of peanut butter consumed at 15 months",
                "freq of peanut cereal consumed at 15 months",
                "freq of peanut choc consumed at 15 months",
                "freq of peanut butter consumed at 18 months",
                "freq of peanut cereal consumed at 18 months",
                "freq of peanut choc consumed at 18 months",
                "freq of peanut butter consumed at 21 months",
                "freq of peanut cereal consumed at 21 months",
                "freq of peanut choc consumed at 21 months",
                "freq of peanut butter consumed at 24 months",
                "freq of peanut cereal consumed at 24 months",
                "freq of peanut choc consumed at 24 months",
                "freq of peanut butter consumed at 27 months",
                "freq of peanut cereal consumed at 27 months",
                "freq of peanut choc consumed at 27 months",
                "freq of peanut butter consumed at 30 months",
                "freq of peanut cereal consumed at 30 months",
                "freq of peanut choc consumed at 30 months",
                "freq of peanut butter consumed at 33 months",
                "freq of peanut cereal consumed at 33 months",
                "freq of peanut choc consumed at 33 months",
                "freq of peanut butter consumed at 36 months",
                "freq of peanut cereal consumed at 36 months",
                "freq of peanut choc consumed at 36 months"]



In [31]:
type(freq_consum_columns)

list

In [32]:
freq_consum_df = eat_working[freq_consum_columns]

In [33]:
freq_consum_df.head()

Unnamed: 0,case ID,study group,Child's sex,ethnicity,skin prick test to peanut at 3 month clinic visit,skin prick test to peanut at 12 month clinic visit,skin prick test to peanut at 36 month clinic visit,primary outcome peanut allergy (only those evaluable and within age range),freq of peanut butter consumed at 4 months,freq of peanut cereal consumed at 4 months,...,freq of peanut choc consumed at 27 months,freq of peanut butter consumed at 30 months,freq of peanut cereal consumed at 30 months,freq of peanut choc consumed at 30 months,freq of peanut butter consumed at 33 months,freq of peanut cereal consumed at 33 months,freq of peanut choc consumed at 33 months,freq of peanut butter consumed at 36 months,freq of peanut cereal consumed at 36 months,freq of peanut choc consumed at 36 months
0,1.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,4.0,3.0,0.0,4.0,3.0,2.0,4.0,3.0,0.0
2,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,,,,4.0,0.0,0.0,5.0,0.0,0.0
3,4.0,1.0,1.0,3.0,0.0,,0.0,0.0,0.0,0.0,...,,,,,,,,0.0,0.0,1.0
4,5.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,4.0,0.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0


In [34]:
# "never consumed" is easier to calculate than determining what 
# constitutes "infrequent" so I'm going to filter those out first 

# Define the columns to exclude from the sum
exclude_cols = [
    "case ID",
    "study group",
    "Child's sex",
    "ethnicity",
    "skin prick test to peanut at 3 month clinic visit",
    "skin prick test to peanut at 12 month clinic visit",
    "skin prick test to peanut at 36 month clinic visit",
    "primary outcome peanut allergy (only those evaluable and within age range)",
    "Peanut week 1 consumption in grams",
    "Peanut week 2 consumption in grams",
    "Peanut week 3 consumption in grams",
    "Peanut week 4 consumption in grams",
    "Peanut week 5 consumption in grams",
    "Peanut week 6 consumption in grams",
    "Peanut week 7 consumption in grams",
    "Peanut week 8 consumption in grams",
    "Peanut week 9 consumption in grams",
    "Peanut week 10 consumption in grams",
    "Peanut week 11 consumption in grams",
    "Peanut week 12 consumption in grams"
]

# Create the "total frequency" column which will hold sum across the rows of all the frequency columns 
freq_consum_df.loc[:, 'total frequency'] = freq_consum_df.drop(exclude_cols, axis=1).sum(axis=1).copy()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


In [35]:
count_total_frequency_zero = len(freq_consum_df[freq_consum_df["total frequency"] == 0])
count_total_frequency_zero 
# output is 109, meaning at least 109 did not consume any peanut food by the 12 month clinic visit

109

In [37]:
# data frame containing just the zero freq consumption rows 
freq_consum_df_zero = freq_consum_df[freq_consum_df["total frequency"] == 0]

In [44]:
freq_consum_df_zero.shape

(109, 60)

---

# Combining above code to mix the one hot encoding etc with the freq rows 