In [99]:
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

## Cleaning up the Fetus dataset

In [100]:
fetus = pd.read_csv("Data/fetus-data.csv")
fetus.head(10)

Unnamed: 0,Litter_size,Fetus_genotype,Placenta_mass,Fetal_mass,Fetal_placental ratio,Fetal_sex,Maternal_genotype,Dam
0,8,WT,0.13,1.13,8.692308,M,fl/+,M1
1,8,WT,0.12,1.1,9.166667,M,fl/+,M1
2,8,WT,0.15,1.17,7.8,M,fl/+,M1
3,8,WT,0.15,1.1,7.333333,F,fl/+,M1
4,8,WT,0.16,1.16,7.25,M,fl/+,M1
5,8,WT,0.17,1.21,7.117647,M,fl/+,M1
6,8,WT,0.2,1.2,6.0,M,fl/+,M1
7,8,WT,0.16,1.21,7.5625,F,fl/+,M1
8,1,WT,0.14,0.89,6.357143,F,fl/fl,M2
9,8,HET,0.2,1.05,5.25,F,HET,M3


In [101]:
# Overwrite mother column to be numeric
fetus['Dam'] = fetus['Dam'].str.replace(r'^M(\d+)', r'\1').str.strip().astype(int)

In [102]:
# Make fetal sex binary number
fetus['Fetal_sex'] = fetus['Fetal_sex'].map({'M': 0, 'F': 1})

In [103]:
# Make values of genotype numeric and remove missing values (for now, maybe impute later)
fetus["Fetus_genotype"] = fetus["Fetus_genotype"].str.replace('\s+', '').str.lower()
fetus['Fetus_genotype'] = fetus['Fetus_genotype'].map({'wt': 0, 'het': 1, 'ko':2, 'resorp': 3})  # Key for fetus genotype
fetus = fetus[fetus["Dam"] != 16]

In [104]:
# Make maternal genotype numeric categorical
fetus['Maternal_genotype'] = fetus['Maternal_genotype'].map({'fl/+': 0, 'fl/fl': 1, 'HET':2})  # Key for maternal genotype

In [105]:
fetus.head(10)

Unnamed: 0,Litter_size,Fetus_genotype,Placenta_mass,Fetal_mass,Fetal_placental ratio,Fetal_sex,Maternal_genotype,Dam
0,8,0.0,0.13,1.13,8.692308,0.0,0,1
1,8,0.0,0.12,1.1,9.166667,0.0,0,1
2,8,0.0,0.15,1.17,7.8,0.0,0,1
3,8,0.0,0.15,1.1,7.333333,1.0,0,1
4,8,0.0,0.16,1.16,7.25,0.0,0,1
5,8,0.0,0.17,1.21,7.117647,0.0,0,1
6,8,0.0,0.2,1.2,6.0,0.0,0,1
7,8,0.0,0.16,1.21,7.5625,1.0,0,1
8,1,0.0,0.14,0.89,6.357143,1.0,1,2
9,8,1.0,0.2,1.05,5.25,1.0,2,3


## Cleaning up the Mother dataset

In [106]:
mother = pd.read_csv("Data/mother-data.csv")
mother.head(10)

Unnamed: 0,subject_label,group,Maternal_genotype,in_weight,fin_weight,gluc_base,gluc_15,gluc_30,gluc_60,gluc_120,...,fetal_placental_ratio_av,insulin_base,insulin_15,insulin_30,insulin_60,insulin_120,chng_insulin_base_15,chng_insulin_base_30,chng_insulin_base_60,chng_insulin_base_120
0,Lr,experimental,fl/fl,24,31,144,309,362,369,197,...,6.357143,0.34,0.238,0.228,0.303,0.451,-30.0,-32.941176,-10.882353,32.647059
1,L,experimental,HET,20,33,166,400,333,211,178,...,7.660194,,,,,,,,,
2,LR,experimental,fl/fl,22,35,128,325,433,363,179,...,7.3125,0.628,0.386,0.405,0.21,0.303,-38.535032,-35.509554,-66.56051,-51.751592
3,R,experimental,fl/fl,23,35,179,507,475,495,175,...,10.893939,0.525,0.535,0.507,0.702,0.507,1.904762,-3.428571,33.714286,-3.428571
4,L,experimental,fl/fl,28,43,125,306,318,302,178,...,6.652893,0.628,0.572,0.516,0.507,0.757,-8.917197,-17.834395,-19.267516,20.541401
5,R,experimental,fl/fl,24,37,181,381,398,285,145,...,14.830508,0.33,0.349,0.33,0.414,0.674,5.757576,0.0,25.454545,104.242424
6,Rr,experiemental,HET,30,39,217,435,328,195,173,...,11.04,0.925,1.23,0.609,0.535,0.86,32.972973,-34.162162,-42.162162,-7.027027
7,R,control,fl/+,19,37,183,316,229,185,170,...,7.483871,0.442,0.618,0.377,0.999,0.609,39.819005,-14.705882,126.0181,37.782805
8,Rl,control,fl/+,20,32,150,427,414,298,182,...,11.771429,0.312,0.423,0.247,0.646,0.275,35.576923,-20.833333,107.051282,-11.858974
9,Lr,experiemental,HET,24,39,173,347,175,164,138,...,15.407407,0.813,1.98,0.59,0.646,0.377,143.542435,-27.429274,-20.541205,-53.628536


In [107]:
# Make maternal genotype numeric categorical
mother['Maternal_genotype'] = mother['Maternal_genotype'].map({'fl/+': 0, 'fl/fl': 1, 'HET':2})  # Key for maternal genotype

In [108]:
mother.head(16)

Unnamed: 0,subject_label,group,Maternal_genotype,in_weight,fin_weight,gluc_base,gluc_15,gluc_30,gluc_60,gluc_120,...,fetal_placental_ratio_av,insulin_base,insulin_15,insulin_30,insulin_60,insulin_120,chng_insulin_base_15,chng_insulin_base_30,chng_insulin_base_60,chng_insulin_base_120
0,Lr,experimental,1,24,31,144,309,362,369,197,...,6.357143,0.34,0.238,0.228,0.303,0.451,-30.0,-32.941176,-10.882353,32.647059
1,L,experimental,2,20,33,166,400,333,211,178,...,7.660194,,,,,,,,,
2,LR,experimental,1,22,35,128,325,433,363,179,...,7.3125,0.628,0.386,0.405,0.21,0.303,-38.535032,-35.509554,-66.56051,-51.751592
3,R,experimental,1,23,35,179,507,475,495,175,...,10.893939,0.525,0.535,0.507,0.702,0.507,1.904762,-3.428571,33.714286,-3.428571
4,L,experimental,1,28,43,125,306,318,302,178,...,6.652893,0.628,0.572,0.516,0.507,0.757,-8.917197,-17.834395,-19.267516,20.541401
5,R,experimental,1,24,37,181,381,398,285,145,...,14.830508,0.33,0.349,0.33,0.414,0.674,5.757576,0.0,25.454545,104.242424
6,Rr,experiemental,2,30,39,217,435,328,195,173,...,11.04,0.925,1.23,0.609,0.535,0.86,32.972973,-34.162162,-42.162162,-7.027027
7,R,control,0,19,37,183,316,229,185,170,...,7.483871,0.442,0.618,0.377,0.999,0.609,39.819005,-14.705882,126.0181,37.782805
8,Rl,control,0,20,32,150,427,414,298,182,...,11.771429,0.312,0.423,0.247,0.646,0.275,35.576923,-20.833333,107.051282,-11.858974
9,Lr,experiemental,2,24,39,173,347,175,164,138,...,15.407407,0.813,1.98,0.59,0.646,0.377,143.542435,-27.429274,-20.541205,-53.628536


## Saving the cleaned data

In [109]:
fetus.to_csv("Data/fetus-cleaned.csv", index=False)
mother.to_csv("Data/mother-cleaned.csv", index=False)

## Considerations
- Resorp mice are missing data NOT at random
    - Perform analysis without resorp, then assess what might cause mice to die in the first place
- Two mothers are missing glucose data (why?)