In [44]:
# Function to convert measures to OR
convert_to_or <- function(row){

  measure_name <- row[['Measure_Name']]
  measure <- as.numeric(row[['Measure_Value']])
  n1 <- as.numeric(row[['N1_Cancer']])
  n2 <- as.numeric(row[['N2_No_Cancer']])

  if (measure_name == "OR"){
    OR <- measure
  } else if (measure_name == "Crude OR") {
    OR <- measure
  } else if (measure_name == "RD"){
    OR <- exp(measure)
  } else if (measure_name == "SIR"){
    OR <- (measure - 1) / measure
  } else if (measure_name == "RR"){
    OR <- measure
  } else if (measure_name == "HR"){
    OR <- measure
  } else if (measure_name == "Crude IRR"){
    OR <- measure / (1 - measure)
  } else if (measure_name == "IR"){
    OR <- (n1 / n2) / (1 - (n1 / n2))
  }
  # print(OR)
  return(OR)
}

In [45]:
# read data in
data <- read.csv("test-data.csv")
# remove commas from Total_Cohort and parse as numeric
data$Total_Cohort <- as.numeric(gsub(",", "",data$Total_Cohort))
# parse N1_Cancer as numeric or fill with 0
data$N1_Cancer <- as.numeric(data$N1_Cancer)
data$N1_Cancer[is.na(data$N1_Cancer)] <- 0

# parse N2_No_Cancer as numeric or fill with 0
data$N2_No_Cancer <- as.numeric(data$N2_No_Cancer)
data$N2_No_Cancer[is.na(data$N2_No_Cancer)] <- 0



# show the first few rows
head(data)


Unnamed: 0_level_0,Article_ID,Total_Cohort,Measure_Name,Measure_Value,X95_CI_lower,X95_CI_upper,N1_Cancer,N2_No_Cancer
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,"Di Luccia, 2010",1216,OR,3.04,1.22,7.57,0,0
2,"Brewster, 2010",77518,SIR,1.4,0.17,5.04,0,0
3,"Brewster, 2010",77518,SIR,0.0,0.0,3.11,0,0
4,"Cnattingius, 1995",89,OR,7.5,1.8,31.9,0,0
5,"Bugaiaki-Shaked, 2022",342172,HR,1.33,0.95,1.84,0,0
6,"Bugaiaki-Shaked, 2022",342172,HR,1.09,0.92,1.3,0,0


In [46]:
# convert to OR
data$OR <- apply(data, 1, convert_to_or)

# show the first few rows
head(data)

[1] 3.04
[1] 0.2857143
[1] -Inf
[1] 7.5
[1] 1.33
[1] 1.09
[1] 1.02
[1] 1.51
[1] 1.53
[1] 1.35
[1] 0.05934343
[1] 0.06586826
[1] 0.07
[1] 0.05810398
[1] 0.04761905
[1] 0.04926108
[1] 0
[1] 0.4117647
[1] 0
[1] 0.6666667
[1] -Inf
[1] -0.4285714
[1] -0.1111111
[1] -0.25
[1] -0.6666667
[1] 0.3333333
[1] 0.2857143
[1] -0.4285714
[1] 0.1666667
[1] -4
[1] 1.12
[1] 0.05894309
[1] 0.07079646
[1] 0.04724409
[1] 0.1075269
[1] 0.04142012
[1] 0.04040404
[1] 0.1081081
[1] 0.02531646
[1] 0.01470588
[1] 0.065
[1] 0.1015228
[1] 0.1597633
[1] 0.1167883
[1] 0.34375
[1] 0.3181818
[1] 0.4
[1] 0.04285714
[1] 0.08
[1] 0.2222222
[1] 0.2222222
[1] 0.5
[1] 0.07142857


Unnamed: 0_level_0,Article_ID,Total_Cohort,Measure_Name,Measure_Value,X95_CI_lower,X95_CI_upper,N1_Cancer,N2_No_Cancer,OR
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,"Di Luccia, 2010",1216,OR,3.04,1.22,7.57,0,0,3.04
2,"Brewster, 2010",77518,SIR,1.4,0.17,5.04,0,0,0.2857143
3,"Brewster, 2010",77518,SIR,0.0,0.0,3.11,0,0,-inf
4,"Cnattingius, 1995",89,OR,7.5,1.8,31.9,0,0,7.5
5,"Bugaiaki-Shaked, 2022",342172,HR,1.33,0.95,1.84,0,0,1.33
6,"Bugaiaki-Shaked, 2022",342172,HR,1.09,0.92,1.3,0,0,1.09


In [None]:
library(metafor)

# OR of each study and type of cancer (onr study can have more than 1 entry if it has more than 1 type of cancer observed)
or <- data$OR
# Standard Errors of the log odds ratios for each type of cancer per study
se <- c(0.03, 0.02, 0.03, 0.02, 0.03, 0.02, 0.03, 0.02, 0.03, 0.02, 0.03, 0.02, 0.03, 0.02, 0.03)
# Sample size for each study
n <- data$Total_Cohort

# Fit random-effects model
fit <- rma.peto(yi = log(or), sei = se/or, data = data.frame(n = n))

# Get overall odds ratio
exp(fit$b)

# Plot forest plot
forest(fit, exp = TRUE)