# Re-encode the conditions

_Summary_:

Add one column for each relevant condition to the data frame.

Depending on whether we choose to label the data with {0,1} or {0,1,2}, the value in each condition column will be 0 or 1 or 0, 1, or 2

For each entry, a maximum of one of the condition columns will have a non-zero value

## Read data

In [1]:
import pandas as pd
df_dated = pd.read_csv("autoimmune_data_dated.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [2]:
df_dated

Unnamed: 0.1,Unnamed: 0,user_id,age,sex,country,checkin_date,trackable_id,trackable_type,trackable_name,trackable_value
0,0,1,30.0,female,BE,1,1288,Condition,Leaky Gut,1
1,1,1,30.0,female,BE,1,649,Condition,Migraine,0
2,2,1,30.0,female,BE,1,1022,Condition,Thoracic outlet syndrome,2
3,3,1,30.0,female,BE,1,886,Condition,Rheumatoid Arthritis,2
4,4,1,30.0,female,BE,1,397,Condition,Fibromyalgia,3
...,...,...,...,...,...,...,...,...,...,...
3553751,3553751,11478,48.0,male,US,12,175150,Weather,temperature_min,42.0
3553752,3553752,11478,48.0,male,US,12,175150,Weather,temperature_max,78.0
3553753,3553753,11478,48.0,male,US,12,175150,Weather,precip_intensity,0.0001
3553754,3553754,11478,48.0,male,US,12,175150,Weather,pressure,1015.0


## Re-encode labels

In [12]:
# Copy the dataframe
modify_df = df_dated.copy(deep=True)

In [14]:
# For every entry that tracks a condition, change the trackable_value to '0' or '1'
VALUE_TRANSLATION = {'0':'0', '1':'0', '2':'1', '3':'1', '4':'1'}

for i, row in modify_df.iterrows():
    if 'Condition' == modify_df.at[i, 'trackable_type']:
        cur_val = modify_df.at[i, 'trackable_value'] # current value as is in the dataset
        # Change the trackable_value to '0' or '1'
        new_val = VALUE_TRANSLATION[cur_val]
        modify_df.at[i, 'trackable_value'] = new_val

In [15]:
# export to csv
modify_df.to_csv('binary_labels_data.csv')

In [17]:
# Insert a new column for each relevant condition (set dfault value = 0)
our_cond_list = ["Scleroderma","IBS","IBD","Chron's", "Lupus", "Rheumatoid Arthritis", "Fibromyalgia",
                 "Dysautonomia", "Arthritis", "Psoriasis", "Raynaud's Phenomenon"]
                # Update this list to match the dict in Aleksandra's code

for cond in our_cond_list:
    modify_df[cond] = 0

In [19]:
# If a row corresponds to a relevant condition, update the value of in the corresponding condition's column 
# to match the value in the trackable_value column

for i, row in modify_df.iterrows():
    if modify_df.at[i, 'trackable_value'] == '1' and modify_df.at[i, 'trackable_type'] == 'Condition' and modify_df.at[i, 'trackable_name'] in our_cond_list:
        cond = modify_df.at[i, 'trackable_name']
        modify_df.at[i, cond] = '1'

In [20]:
modify_df

Unnamed: 0.1,Unnamed: 0,user_id,age,sex,country,checkin_date,trackable_id,trackable_type,trackable_name,trackable_value,...,IBS,IBD,Chron's,Lupus,Rheumatoid Arthritis,Fibromyalgia,Dysautonomia,Arthritis,Psoriasis,Raynaud's Phenomenon
0,0,1,30.0,female,BE,1,1288,Condition,Leaky Gut,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,30.0,female,BE,1,649,Condition,Migraine,0,...,0,0,0,0,0,0,0,0,0,0
2,2,1,30.0,female,BE,1,1022,Condition,Thoracic outlet syndrome,1,...,0,0,0,0,0,0,0,0,0,0
3,3,1,30.0,female,BE,1,886,Condition,Rheumatoid Arthritis,1,...,0,0,0,0,1,0,0,0,0,0
4,4,1,30.0,female,BE,1,397,Condition,Fibromyalgia,1,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3553751,3553751,11478,48.0,male,US,12,175150,Weather,temperature_min,42.0,...,0,0,0,0,0,0,0,0,0,0
3553752,3553752,11478,48.0,male,US,12,175150,Weather,temperature_max,78.0,...,0,0,0,0,0,0,0,0,0,0
3553753,3553753,11478,48.0,male,US,12,175150,Weather,precip_intensity,0.0001,...,0,0,0,0,0,0,0,0,0,0
3553754,3553754,11478,48.0,male,US,12,175150,Weather,pressure,1015.0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
# export to csv
modify_df.to_csv('data_with_condition_columns.csv')

In [None]:
# For all entries of a given user on a given day (no matter what the trackable_type is)
# put the same value (0 or 1) in the condition columns
