- Handling warnings

In [47]:
import warnings
warnings.filterwarnings("ignore")

- Loading modules

In [48]:
import pandas as pd
import numpy as np
import json

- Implementation of Auxiliar functions

In [49]:
def get_average_measure(morning_d1, morning_d2):

    if not pd.isna(morning_d1) and not pd.isna(morning_d2):
        average_morning = np.mean([morning_d1, morning_d2])
    elif not pd.isna(morning_d1):
        average_morning = morning_d1
    elif not pd.isna(morning_d2):
        average_morning = morning_d2
    else:
        average_morning = np.nan
    
    return average_morning

In [50]:
def get_average_difference(row):
    diffs = [] 

    if pd.notna(row['IgA D1-Afternoon']) and pd.notna(row['IgA D1-Morning']):
        diffs.append(row['IgA D1-Afternoon'] - row['IgA D1-Morning'])
    if pd.notna(row['IgA D2-Afternoon']) and pd.notna(row['IgA D2-Morning']):
        diffs.append(row['IgA D2-Afternoon'] - row['IgA D2-Morning'])
    
    if len(diffs) > 0:
        return np.mean(diffs)
    else:
        return np.nan

- Reading raw data

In [51]:
df_data = pd.read_csv("../raw_data/sIgA_data/raw_data.csv")
df_data.head(5)

Unnamed: 0,m1,m2,m3,m4,N°_diada,BienestarV1,BienestarV2,BienestarAv,Género_bebé,Edad_bebé_1,...,ads_a_i_pv_2,ads_m_i_pv_2_r,ads_v_i_pv_r,ads_ta_i_pv_2_r,ads_sa_i_pv_r,ads_a_i_pv_2_r,ads_p_i_pv_r,countinsecurei_2020,countsecurei_2020,ADS2i_2020
0,20.076728,31.719207,23.453752,25.016903,1,5.0,4.0,4.5,1,5,...,3.0,0.0,1.0,0.0,1.0,1.0,0.0,3,3,0
1,28.58492,82.400761,28.361209,19.363161,4,,,,1,10,...,4.0,0.0,1.0,0.0,1.0,1.0,0.0,3,3,0
2,161.424693,46.867951,,,5,,,,0,5,...,3.0,0.0,1.0,0.0,1.0,1.0,0.0,3,3,0
3,26.747838,,21.599136,35.942378,7,4.0,4.0,4.0,0,12,...,3.0,0.0,1.0,0.0,0.0,1.0,0.0,4,2,0
4,118.901337,,37.125222,382.715268,11,4.0,4.0,4.0,1,8,...,,,,,,,,999,999,999


- Defining columns to use

In [52]:
columns_to_use = ["N°_diada", 'm1','m2','m3','m4', 'Género_bebé', "Apego_dic_SSP"]

- Selecting data from raw dataset

In [53]:
data_selection = df_data[columns_to_use]
data_selection.head(5)

Unnamed: 0,N°_diada,m1,m2,m3,m4,Género_bebé,Apego_dic_SSP
0,1,20.076728,31.719207,23.453752,25.016903,1,0.0
1,4,28.58492,82.400761,28.361209,19.363161,1,1.0
2,5,161.424693,46.867951,,,0,1.0
3,7,26.747838,,21.599136,35.942378,0,0.0
4,11,118.901337,,37.125222,382.715268,1,


- Change name columns

In [54]:
data_selection.columns = ["ID", 'IgA D1-Morning', 'IgA D1-Afternoon', 
                          'IgA D2-Morning', 'IgA D2-Afternoon', 'Gender', 'Attachment']
data_selection.head(5)

Unnamed: 0,ID,IgA D1-Morning,IgA D1-Afternoon,IgA D2-Morning,IgA D2-Afternoon,Gender,Attachment
0,1,20.076728,31.719207,23.453752,25.016903,1,0.0
1,4,28.58492,82.400761,28.361209,19.363161,1,1.0
2,5,161.424693,46.867951,,,0,1.0
3,7,26.747838,,21.599136,35.942378,0,0.0
4,11,118.901337,,37.125222,382.715268,1,


- Get average morning and afternoon

In [55]:
data_selection["IgA_Average_Morning"] = data_selection.apply(
    lambda row: get_average_measure(row["IgA D1-Morning"], row["IgA D2-Morning"]),
    axis=1
)

In [56]:
data_selection["IgA_Average_Afternoon"] = data_selection.apply(
    lambda row: get_average_measure(row["IgA D1-Afternoon"], row["IgA D2-Afternoon"]),
    axis=1
)

- Get average differences

In [57]:
data_selection["Average_Difference"] = data_selection.apply(get_average_difference, axis=1)

- Remove non necessary columns

In [58]:
data_selection = data_selection[['ID', 'Gender', 'IgA_Average_Morning', 
                                 'IgA_Average_Afternoon', 'Average_Difference', 'Attachment']]

- Working with Gender

In [59]:
with open("../raw_data/sIgA_data/desc_columns.json", 'r') as doc_open:
    definitions_variables_doc = json.load(doc_open)

In [69]:
definitions_variables_doc[1]['Género_bebé']

[{'nominal_value': '0', 'description': 'Femenino'},
 {'nominal_value': '1', 'description': 'Masculino'}]

In [70]:
data_selection["Gender"] = data_selection["Gender"].replace({1: 'Male', 0: 'Female'})
data_selection["Gender"].value_counts()

Gender
Female    20
Male      15
Name: count, dtype: int64

- Checking attachment

In [71]:
data_selection["Attachment"].value_counts()

Attachment
1.0    18
0.0     9
Name: count, dtype: int64

In [78]:
for index in range(len(definitions_variables_doc)):
    keys = list(definitions_variables_doc[index].keys())
    if keys[0] == "Apego_dic_SSP":
        break

definitions_variables_doc[index]

{'Apego_dic_SSP': [{'nominal_value': '0', 'description': 'insecure'},
  {'nominal_value': '1', 'description': 'secure'}]}

In [80]:
data_selection['Attachment'] = data_selection['Attachment'].replace(
    {1:'Secure', 0:'Insecure', np.nan:'Unknown'})
data_selection['Attachment'].value_counts()

Attachment
Secure      18
Insecure     9
Unknown      8
Name: count, dtype: int64

- Exporting processed data

In [82]:
data_selection.to_csv("../processed_data/1_processed_data_IgA.csv", index=False)