# Merge the data
Create the `data/us_excess_per_age_group.rds` data frame.

In [1]:
library(tidyverse)

df <-
  rbind(read_csv("data/tidy_deaths_age_state_CDC.csv", show_col_types = FALSE)%>%
          mutate(Type = "Observed"), 
        read_csv("data/tidy_predicted_deaths_age_state_CDC.csv", show_col_types = FALSE)%>%
          mutate(Type = "Predicted"))%>%
  mutate(Date = ISOweek::ISOweek2date(paste0(Year, "-W", str_pad(Week, 2, pad = "0"), "-7")))

head(df)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.8
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



Jurisdiction,Year,Age Group,Week,Number of Deaths,Type,Date
<chr>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<date>
Alabama,2015,<65,1,345,Observed,2015-01-04
Alabama,2015,<65,2,333,Observed,2015-01-11
Alabama,2015,<65,3,303,Observed,2015-01-18
Alabama,2015,<65,4,294,Observed,2015-01-25
Alabama,2015,<65,5,316,Observed,2015-02-01
Alabama,2015,<65,6,291,Observed,2015-02-08


# Read population data
For the age normalization we need the following fields:
* __Population__: The population of the age group for the state
* __Population_all_ages__: The population of the entire state
* __Population_age_group_nation__: The population of the age group for the entire nation

In [3]:
df_pop1 <- read_csv("data/tidy_pop_age_state.csv", show_col_types = FALSE)

df_pop2 <- 
  df_pop1%>%
  group_by(Jurisdiction)%>%
  summarise(Population_all_ages = sum(Population))

df_pop3 <- 
  df_pop1%>%
  filter(Jurisdiction=="United States")%>%
  rename(Population_age_group_nation = Population)%>%
  select(-Jurisdiction)

df_pop <- 
  df_pop1%>%
  merge(df_pop2)%>%
  merge(df_pop3)

df2 <- merge(df, df_pop)
write_rds(df2, "data/us_excess_deaths_per_age_group.rds")
head(df2)

Unnamed: 0_level_0,Jurisdiction,Age Group,Year,Week,Number of Deaths,Type,Date,Population,Population_all_ages,Population_age_group_nation
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<date>,<dbl>,<dbl>,<dbl>
1,Alabama,<65,2018,37,285.3385,Predicted,2018-09-16,4033195,4907439,272637837
2,Alabama,<65,2018,38,288.3385,Predicted,2018-09-23,4033195,4907439,272637837
3,Alabama,<65,2018,35,288.1385,Predicted,2018-09-02,4033195,4907439,272637837
4,Alabama,<65,2018,36,288.1385,Predicted,2018-09-09,4033195,4907439,272637837
5,Alabama,<65,2018,50,311.9385,Predicted,2018-12-16,4033195,4907439,272637837
6,Alabama,<65,2018,51,320.3385,Predicted,2018-12-23,4033195,4907439,272637837
