# Population counts

Notebook used to produce some population counts for the first paper: "International electronic health record driven COVID-19 clinical course profile"

In [None]:
library("dplyr")

In [None]:
path_combined <- "Combined_200410/"

In [None]:
mapping_siteid <- read.csv("mapping_siteid.csv", stringsAsFactors = F)

In [None]:
labs_combined <- read.csv(paste0(path_combined, "Labs-Combined200410.csv"), stringsAsFactors = F)
labs_bycountry <- read.csv(paste0(path_combined, "Labs-CombinedByCountry200410.csv"), stringsAsFactors = F)
labs_bysite <- read.csv(paste0(path_combined, "Labs-CombinedBySite200410.csv"), stringsAsFactors = F)

In [None]:
diagnoses_combined <- read.csv(paste0(path_combined, "Diagnoses-Combined200410.csv"), stringsAsFactors = F)
diagnoses_bycountry <- read.csv(paste0(path_combined, "Diagnoses-CombinedByCountry200410.csv"), stringsAsFactors = F)
diagnoses_bysite <- read.csv(paste0(path_combined, "Diagnoses-CombinedBySite200410.csv"), stringsAsFactors = F)

In [None]:
dailycounts_combined <- read.csv(paste0(path_combined, "DailyCounts-Combined200410.csv"), stringsAsFactors = F)
dailycounts_bycountry <- read.csv(paste0(path_combined, "DailyCounts-CombinedByCountry200410.csv"), stringsAsFactors = F)
dailycounts_bysite <- read.csv(paste0(path_combined, "DailyCounts-CombinedBySite200410.csv"), stringsAsFactors = F)

In [None]:
Demographics_combined <- read.csv(paste0(path_combined, "Demographics-Combined200410.csv"), stringsAsFactors = F)
Demographics_bycountry <- read.csv(paste0(path_combined, "Demographics-CombinedByCountry200410.csv"), stringsAsFactors = F)
Demographics_bysite <- read.csv(paste0(path_combined, "Demographics-CombinedBySite200410.csv"), stringsAsFactors = F)

In [None]:
length(unique(Demographics_bysite$siteid))

# Global counts

## New cases

In [None]:
count_cases <- function(dailycounts, 
                        cases = "new_positive_cases",
                        upper_bounds = "masked_upper_bound_new_positive_cases") {
    cases_total_unmasked_patients = sum(dailycounts[[cases]])
    cases_upper_bound_masked_patients = sum(dailycounts[[upper_bounds]], na.rm = T)
    cases_lower_bound_masked_patients = sum(dailycounts[[upper_bounds]] !=0)
    return(list(cases_total_unmasked_patients = cases_total_unmasked_patients,
                cases_lower_bound_masked_patients = cases_lower_bound_masked_patients,
                cases_upper_bound_masked_patients = cases_upper_bound_masked_patients
               ))
}
count_cases

In [None]:
new_cases = count_cases(dailycounts_bysite, "new_positive_cases", "masked_upper_bound_new_positive_cases")
print(paste("Minimum total patients", new_cases$cases_total_unmasked_patients + new_cases$cases_lower_bound_masked_patients))
print(paste("Maximum total patients", new_cases$cases_total_unmasked_patients + new_cases$cases_upper_bound_masked_patients))

## Total new patients in ICU

In [None]:
new_icu <- count_cases(dailycounts_bysite, "patients_in_icu", "masked_upper_bound_patients_in_icu")
print(paste("Minimum total patients", new_icu$cases_total_unmasked_patients + new_icu$cases_lower_bound_masked_patients))
print(paste("Maximum total patients", new_icu$cases_total_unmasked_patients + new_icu$cases_upper_bound_masked_patients))

# Total deaths

In [None]:
new_deaths <- count_cases(dailycounts_bysite, "new_deaths", "masked_upper_bound_new_deaths")
print(paste("Minimum total patients", new_deaths$cases_total_unmasked_patients + new_deaths$cases_lower_bound_masked_patients))
print(paste("Maximum total patients", new_deaths$cases_total_unmasked_patients + new_deaths$cases_upper_bound_masked_patients))

# ICU Rate

In [None]:
global_icu_rate = (new_icu$cases_total_unmasked_patients + new_icu$cases_lower_bound_masked_patients) / 
(new_cases$cases_total_unmasked_patients + new_cases$cases_lower_bound_masked_patients)

In [None]:
global_icu_rate

In [None]:
global_deaths_rate = (new_deaths$cases_total_unmasked_patients + new_deaths$cases_lower_bound_masked_patients) / 
(new_cases$cases_total_unmasked_patients + new_cases$cases_lower_bound_masked_patients)

In [None]:
global_deaths_rate

# Per sites

In [None]:
cases_per_sites = by(dailycounts_bysite,
   as.factor(dailycounts_bysite$siteid),
   count_cases,
   "new_positive_cases",
   "masked_upper_bound_new_positive_cases")
icu_per_sites = by(dailycounts_bysite,
   as.factor(dailycounts_bysite$siteid),
   count_cases,
   "patients_in_icu",
   "masked_upper_bound_patients_in_icu")
death_per_sites = by(dailycounts_bysite,
   as.factor(dailycounts_bysite$siteid),
   count_cases,
   "new_deaths",
   "masked_upper_bound_new_deaths")

In [None]:
df_cases_per_sites <- dplyr::bind_rows(cases_per_sites)
minimum_nb_cases <-  df_cases_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_lower_bound_masked_patients
maximum_nb_cases <- df_cases_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_upper_bound_masked_patients

In [None]:
df_icu_per_sites <- dplyr::bind_rows(icu_per_sites)
minimum_nb_icu <- df_icu_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_lower_bound_masked_patients
maximum_nb_icu <- df_icu_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_upper_bound_masked_patients

In [None]:
df_death_per_sites <- dplyr::bind_rows(death_per_sites)
minimum_nb_death <- df_death_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_lower_bound_masked_patients
maximum_nb_death <- df_death_per_sites$cases_total_unmasked_patients + 
df_cases_per_sites$cases_upper_bound_masked_patients

In [None]:
df_stats_overall <- data.frame(minimum_nb_cases = minimum_nb_cases,
                              maximum_nb_cases = maximum_nb_cases,
                               median_nb_cases = (maximum_nb_cases - minimum_nb_cases)/2 + minimum_nb_cases,
                              minimum_nb_icu = minimum_nb_icu,
                              maximum_nb_icu = maximum_nb_icu,
                                median_icu = (maximum_nb_icu - minimum_nb_icu)/2 + minimum_nb_icu,
                              minimum_nb_death= minimum_nb_death,
                               median_nb_death = (maximum_nb_death - minimum_nb_death)/2 + minimum_nb_death,
                              maximum_nb_death = maximum_nb_death)
df_stats_overall["siteid"] <- levels(as.factor(dailycounts_bysite$siteid))

In [None]:
df_count_per_sites = dplyr::left_join(df_stats_overall, mapping_siteid[c("siteid", "country")], by="siteid")
df_count_per_sites["country"] = as.factor(df_count_per_sites$country)

In [None]:
df_total = df_count_per_sites %>% select(-c("siteid")) %>% 
group_by(country) %>% summarize_all(sum)


In [None]:
df_total

In [None]:
apply(df_total[, c(2:length(df_total))], 2, sum)

In [None]:
df_total$minimum_nb_icu / df_total$minimum_nb_cases

## Labs

In [None]:
sum_per_labs = labs_bysite %>% select(c("siteid", "loinc", "num_patients")) %>% 
group_by(siteid, loinc) %>% summarize_all(sum)
sum_per_labs  = sum_per_labs[sum_per_labs$num_patients != 0,]

In [None]:
number_reported_labs = tapply(sum_per_labs$loinc, as.factor(sum_per_labs$siteid), function(x) length(unique(x)))
prop_number_reported_labs = table(number_reported_labs) / 23
prop_number_reported_labs

In [None]:
total_nb_bio = sum(labs_bysite$num_patients)
total_nb_means = nrow(labs_bysite)

In [None]:
total_nb_bio