Create figures for scoping review

Import packages

In [1]:
# Import packages
library(data.table)
library(ggplot2)
library(dplyr)
library(tidyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




Import all the data:

### META DATA


In [3]:
# Meta data
datMeta <- read.csv("tblStudyMeta.csv", header=T)
head(datMeta)

Unnamed: 0_level_0,studyRef,assessedBy,firstAuthor,year,species,countryStudy,mainOutcome,studyScore
Unnamed: 0_level_1,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>,<chr>
1,1182475512,AS/SE,O'Higgins,2014,Human,Ireland,Both,II
2,1182475524,AS/SE,Cox,2019,Human,Belgium,Both,III
3,1182476286,AS/SE,Assaf-Balut,2019,Human,Spain,Both,II
4,1182476955,AS/SE,Assaf-Balut,2019,Human,Spain,Both,II
5,1182477024,AS/SE,Fondjo,2020,Human,Ghana,Mother,II
6,1182477025,AS/SE,Shamsi,2010,Human,Pakistan,Mother,III



### INFECTION DATA


In [4]:
# Infection data
datInf<-read.csv("tblStudyInfections.csv",header=T)
head(datInf)

Unnamed: 0_level_0,studyRef,infectionType,X,X.1,X.2,X.3,X.4,X.5,X.6,X.7,X.8
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,1182475512,Bacterial infection,,,,,,,,,
2,1182475524,Inflammatory/immune reponse,,,,,,,,,
3,1182476286,Urinary/Genital Tract Infection,,,,,,,,,
4,1182476955,Urinary/Genital Tract Infection,,,,,,,,,
5,1182477024,Malaria,,,,,,,,,
6,1182477025,Urinary/Genital Tract Infection,,,,,,,,,


In [5]:
# long format with multiple infection types per study shown in multiple rows
datInf_long <- datInf %>%
  pivot_longer(cols = -studyRef, values_to = "infectionType") %>%
  mutate(infectionType = na_if(infectionType, "")) %>%
  filter(!is.na(infectionType))

#trim white space
datInf_long$infectionType <-trimws(datInf_long$infectionType)

In [6]:
# Categorise Infection data

#What are all the Infection types?
unique(datInf_long$infectionType)

## Table catagorising infection types
infection_lookup <- data.frame(
  Infection = c("HPV", "Yeast infection", "Sepsis", "Systemic infection", "Tuberculosis",
                "Toxoplasmosis", "Zika virus", "Autoimmune disorder", "Rubella immune status",
                "Herpes virus (HSV)", "Influenza", "Pyelonephritis (Kidney infection)", "Fever or cold",
                "Periodontal", "Group B streptococcus", "H. pylori infection", "Vaccine response",
                "Bacterial infection", "LPS injection", "C-reactive protein",
                "Unspecified maternal infection", "Hepatitis B/C", "Intra-amniotic/intra-uterine infection (chorioamnionitis)",
                "STI", "Respiratory infection", "Other", "Malaria", 
                "Parasitic infection (nematode/hookworm etc)", "HIV", "Covid-19", "Urinary/Genital Tract Infection",
                "Inflammatory/immune reponse"),
  InfCat = c("Viral", "Fungal", "Bacterial", "General", "Bacterial",
             "Parasitic", "Viral", "Immune", "Viral",
             "Viral", "Viral", "Bacterial", "General",
             "General", "Bacterial", "Bacterial", "Immune",
             "Bacterial", "Immune", "Immune",
             "General", "Viral", "Bacterial",
             "Bacterial", "Bacterial", "General", "Parasitic",
             "Parasitic", "Viral", "Viral", "Bacterial", "Immune")
)

#Check that all the infection types have been categorised
setdiff(unique(datInf_long$infectionType), unique(infection_lookup$Infection))
setdiff(unique(infection_lookup$Infection), unique(datInf_long$infectionType))

#Create catagorised dataset
# Left join infection_lookup with datInf
datInfType_long <- merge(datInf_long, infection_lookup, by.x = "infectionType", by.y = "Infection", all.x = TRUE)
datInfType_long <- datInfType_long[,-3]
head(datInfType_long[order(datInfType_long$studyRef),])


Unnamed: 0_level_0,infectionType,studyRef,InfCat
Unnamed: 0_level_1,<chr>,<int>,<chr>
477,LPS injection,1182475095,Immune
275,Inflammatory/immune reponse,1182475123,Immune
630,Parasitic infection (nematode/hookworm etc),1182475134,Parasitic
262,HIV,1182475144,Viral
505,Malaria,1182475144,Parasitic
533,Malaria,1182475147,Parasitic


In [7]:

### NUTRITION DATA



In [8]:
datNut<-read.csv("tblStudyNutrition.csv",header=T)

# long format with multiple nutrient types per study shown in multiple rows
datNut_long <- datNut %>%
  pivot_longer(cols = -studyRef, values_to = "nutritionType") %>%
  mutate(nutritionType = na_if(nutritionType, "")) %>%
  filter(!is.na(nutritionType))

#trim white space
datNut_long$nutritionType <-trimws(datNut_long$nutritionType)



In [9]:
#What are all the nutritionTypes?
unique(datNut_long$nutritionType)

# add categorisation
# Lookup table with shortened category names

##Notes: 
#### Anaemia has been changed to a Clinical Indicator rather than Anthromometric
#### Minerals and Vitamins have been combined into Micronutrients
#### Because of the above combination we can now add Multivitamin/micronutrient supplement to Micronutrients. 

nutrient_lookup <- data.frame(
  Nutrition = c("Boron", "Phosphorus", "Magnesium", "Calcium", "Copper", "Selenium", "Zinc", "Iron",
                "Vitamin A", "Vitamin B2/B6/B12", "Vitamin C", "Vitamin D", "Vitamin E", "Folic acid",
                "Protein", "Fatty acids", "Fibre", "High fat diet",
                "Multivitamin/micronutrient supplement etc",
                "BMI", "Gestational weight gain", "Mid-upper arm circumference (MUAC)", "Anaemia",
                "Dietary diversity/change", "Other"),
  Category = c(
    "Micronutrients", "Micronutrients", "Micronutrients", "Micronutrients",
    "Micronutrients", "Micronutrients", "Micronutrients", "Micronutrients",
    
    "Micronutrients", "Micronutrients", "Micronutrients", "Micronutrients",
    "Micronutrients", "Micronutrients",
    
    "Macronutrients", "Macronutrients", "Macronutrients", "Macronutrients",
    
    "Micronutrients",
    
    "Anthropometrics", "Anthropometrics", "Anthropometrics", "Clinical Indicator",
    
    "Diet/Other", "Diet/Other"
  ),
  stringsAsFactors = FALSE
)



#Check that all the nutrition types have been categorised
setdiff(unique(datNut_long$nutritionType), unique(nutrient_lookup$Nutrition))
setdiff(unique(nutrient_lookup$Nutrition), unique(datNut_long$nutritionType))

#Create catagorised dataset
# Left join nutrient_lookup with datNut
datNutType_long <- merge(datNut_long, nutrient_lookup, by.x = "nutritionType", by.y = "Nutrition", all.x = TRUE)
datNutType_long <- datNutType_long[,-3] #remove "name" column
head(datNutType_long[order(datNutType_long$studyRef),])





Unnamed: 0_level_0,nutritionType,studyRef,Category
Unnamed: 0_level_1,<chr>,<int>,<chr>
683,Iron,1182475095,Micronutrients
82,BMI,1182475123,Anthropometrics
606,Folic acid,1182475134,Micronutrients
680,Iron,1182475134,Micronutrients
810,Other,1182475134,Diet/Other
898,Vitamin A,1182475134,Micronutrients



### PREGNANCY OUTCOMES



In [10]:
## This data was collected as MaternalOutcomes and OffspringOutcomes. We will combine these data into a single PregnancyOutcome column


######## Maternal data
datMat<-read.csv("tblStudyMaternalOutcomes.csv",header=T)

# long format with multiple nutrient types per study shown in multiple rows
datMat_long <- datMat %>%
  pivot_longer(cols = -studyRef, values_to = "maternalOutcomeType") %>%
  mutate(maternalOutcomeType = na_if(maternalOutcomeType, "")) %>%
  filter(!is.na(maternalOutcomeType))

#trim white space
datMat_long$maternalOutcomeType <-trimws(datMat_long$maternalOutcomeType)
datMat_long <- datMat_long[,-2]

#Remove all "None" entries (~NA in our data)
datMat_long[datMat_long == "None"] <- NA
datMat_long <- na.omit(datMat_long)

#View data
head(datMat_long[order(datMat_long$studyRef),])

###### Offspring data
datOff<-read.csv("tblStudyOffspringOutcomes.csv",header=T)

# long format with multiple nutrient types per study shown in multiple rows
datOff_long <- datOff %>%
  pivot_longer(cols = -studyRef, values_to = "offspringOutcomeType") %>%
  mutate(offspringOutcomeType = na_if(offspringOutcomeType, "")) %>%
  filter(!is.na(offspringOutcomeType))

#trim white space
datOff_long$offspringOutcomeType <-trimws(datOff_long$offspringOutcomeType)
datOff_long <- datOff_long[,-2]

#Remove all "None" entries (~NA in our data)
datOff_long[datOff_long == "None"] <- NA
datOff_long <- na.omit(datOff_long)


#View data
head(datOff_long[order(datOff_long$studyRef),])
unique(datOff_long$offspringOutcomeType)

studyRef,maternalOutcomeType
<int>,<chr>
1182475095,Biomarkers of immunity - maternal
1182475123,Placental inflammation
1182475123,Other - maternal
1182475134,Anaemia - maternal
1182475144,Anaemia - maternal
1182475147,Anaemia - maternal


studyRef,offspringOutcomeType
<int>,<chr>
1182475095,Behavioural change
1182475159,Birth weight
1182475165,Birth weight
1182475165,Gestational age (Large or Small)
1182475165,Neonate length
1182475165,Head circumference


In [11]:
# Combine the two datasets into a list of Pregnancy Outcomes. 
# use the PregnancyOutcomes lookup table to create a Pregnancy Outcomes variable df. 

#1. Rename columns to facilitate rbind
names(datOff_long)[2] <- "pregnancyOutcomeType"
names(datMat_long)[2] <- "pregnancyOutcomeType"

datPreg_long <- bind_rows(datOff_long, datMat_long)



#What are all the pregnancyOutcomeTypes?
unique(datPreg_long$pregnancyOutcomeType)

# add categorisation
# Lookup table with shortened category names

##Notes: 
#### None means no data/NA


pregnancy_lookup <- data.frame(Pregnancy = c("Mode of delivery / caesarean section rate", "Maternal mortality", "Hypertension (pregnancy-induced)",
"Pre-eclampsia", "Anaemia", "Gestational diabetes", "Haemorrhage", "Hospitalisation or ICU admission", "Postterm pregnancy",
"Placental abruption", "Maternal mental health", "Cancer risk", "Venous thromboembolism / pulmonary embolism / deep vein thrombosis",
"Prolonged labour (labour dystocia)", "Hyperemesis gravidarum", "Ectopic pregnancy", "COVID status", 
"Stillbirth", "Miscarriage / spontaneous abortion",
"Perinatal mortality", "Neonatal death", "NICU admission", "Birth weight", "Gestational age (Large or Small)", "Gestational age (Days/Weeks)",
"Apgar score", "Cord blood pH", "Respiratory distress", "Congenital malformations", "Congenital heart disease", "Foetal distress", "Macrosomia",
"Intrauterine growth restriction (IUGR)", "Neonatal encephalopathy", "Cleft palate", "Microcephaly", "Neonate length", "Head circumference",
"Pre-term birth", "Pre-term pre-labour rupture of membranes (PPROM)", "PROM", "Placental inflammation", "Placental size", "Placental lesions",
"Biomarkers of immunity - maternal", "Biomarkers of immunity - offspring", "Haemoglobin levels - maternal", "Haemoglobin levels - offspring", 
"Gene expression - maternal", "Gene expression - offspring", "DNA methylation - maternal", "DNA methylation - maternal - offspring", 
"Growth", "Language and motor development", "Motor development",
"Autism spectrum disorder", "Behavioural change", "Other", "None"),
Category = c(
rep("Maternal Outcomes", 17),
rep("Foetal and Neonatal Outcomes", 21),
rep("Pregnancy Complications", 3),
rep("Placental and Immunological Factors", 5),
rep("Molecular and Clinical Biomarkers", 6),
rep("Developmental and Long-term Child Outcomes", 5),
rep("Other", 1),
rep("None", 1)
),
    stringsAsFactors = FALSE
)





#Check that all the nutrition types have been categorised
setdiff(unique(datPreg_long$pregnancyOutcomeType), unique(pregnancy_lookup$Pregnancy))
setdiff(unique(pregnancy_lookup$Pregnancy), unique(datPreg_long$pregnancyOutcomeType))

#Create catagorised dataset
# Left join nutrient_lookup with datNut
datPregType_long <- merge(datPreg_long, pregnancy_lookup, by.x = "pregnancyOutcomeType", by.y = "Pregnancy", all.x = TRUE)

head(datPregType_long[order(datPregType_long$studyRef),])


Unnamed: 0_level_0,pregnancyOutcomeType,studyRef,Category
Unnamed: 0_level_1,<chr>,<int>,<chr>
125,Behavioural change,1182475095,Developmental and Long-term Child Outcomes
287,Biomarkers of immunity - maternal,1182475095,Placental and Immunological Factors
1841,Other - maternal,1182475123,
1967,Placental inflammation,1182475123,Placental and Immunological Factors
14,Anaemia - maternal,1182475134,
12,Anaemia - maternal,1182475144,




# Full join pregnancy Outcome with Meta, Nutrition, and Infection

This creates one big table with all the input and output variables for a specific study. 



In [15]:
#List of all the dfs

dfs_list <- list(datMeta, datNutType_long, datInfType_long, datPregType_long)

# Full join all data frames by "studyRef"

datMetaNutInfPreg <- Reduce(function(x,y) full_join(x,y, by = "studyRef"), dfs_list)
datMetaNutInfPreg <- rename(datMetaNutInfPreg, "NutCat", "Category.y" = "PregCat")

                            
head(datMetaNutInfPreg)
dim(datMetaNutInfPreg)

ERROR: [1m[33mError[39m in `rename()`:[22m
[33m![39m Can't rename columns that don't exist.
[31m✖[39m Column `NutCat` doesn't exist.
