-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_tidying.R
119 lines (93 loc) · 3.92 KB
/
data_tidying.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Data Assembly Script
# Heili Lowman
# March 24, 2021
# The following script will assemble the disparate datasets into a single main dataset to be used in the accompanying analyses and figures.
# Load packages.
library(tidyverse)
library(lubridate)
library(openxlsx)
library(readxl)
library(naniar)
#### Raw Data ####
# C:N data
# Note - this project also includes the raw Excel file, but there was one date (9/12/19) entered incorrectly, so this has been fixed manually and the file saved as a csv for import below.
cn <- read_csv("data_raw/CHN_Field_all_years.csv")
# SST datasets (3 need to be knitted together)
sst1 <- read_csv("data_raw/LTER_sites_20000101_20190501.csv")
sst2 <- read_csv("data_raw/LTER_sites_20190502_20191001.csv")
sst3 <- read_csv("data_raw/LTER_sites_20191001_20210301.csv")
# Oceanographic indices (7 unique)
bakun <- read_csv("data_raw/Bakun_monthly_upwelling.csv")
beuti <- read_csv("data_raw/BEUTI_Upwelling.csv")
cuti <- read_csv("data_raw/CUTI_Upwelling.csv")
enso <- read_csv("data_raw/MEIV2_ENSO.csv")
mjo <- read_csv("data_raw/MJO_Index6_120W_Daily.csv")
npgo <- read_csv("data_raw/npgo.csv")
pdo <- read_csv("data_raw/PDO.csv")
#### Tidy Data ####
period <- c(".")
cn_full <- cn[-c(1:2),] %>% # remove first two rows of data
replace_with_na_all(condition = ~.x %in% period) %>% # remove all periods
mutate(WET_WT = as.numeric(WET_WT),
DRY_WET = as.numeric(DRY_WET)) %>% # need to change char to num
group_by(YEAR, MONTH, DATE, SITE, `_N`) %>% # group and...
summarize(wet_wt = mean(WET_WT, na.rm=TRUE),
dry_wt = mean(DRY_WT, na.rm=TRUE),
dry_wet = mean(DRY_WET, na.rm=TRUE),
anal_wt = mean(ANAL_WT, na.rm=TRUE),
c = mean(C, na.rm=TRUE),
h = mean(H, na.rm=TRUE),
n = mean(N, na.rm=TRUE),
cn = mean(CN_RATIO, na.rm=TRUE)) %>% # summarize across both replicates so that future analyses don't include pseudoreplication
ungroup() %>%
replace_with_na_all(condition = ~.x == -99999) # remove all -99999s
# Knit together temperature file.
sst2_2 <- sst2 %>%
mutate(DATE = lubridate::ymd(as.character(date))) %>% # convert to date
mutate(day = day(DATE)) %>% # pull out day
select(year, month, day, temp_C, site) # match sst1 formatting
sst3_2 <- sst3 %>%
mutate(DATE = lubridate::ymd(as.character(date))) %>% # convert to date
mutate(month = month(DATE), day = day(DATE)) %>% # pull out day & month
select(year, month, day, temp_C, site) # match sst1 formatting
sst_full <- sst1 %>%
rbind(sst2_2) %>%
rbind(sst3_2)
# Export to working folder for future use
# write_csv(sst_full, "data_analyses/SBC_sea_surface_temp.csv")
# Create compiled oceanographic indices file
beuti_ed <- beuti %>%
rename(beuti = `34N`)
# some raw files missing the month column
m0nth <- beuti_ed$month
bakun_ed <- bakun %>%
mutate(month = m0nth) %>% # add month to bakun dataset
select(year, month, bakun) # and reorder
enso_ed <- enso %>%
mutate(month = m0nth) %>% # add month to enso dataset
select(year, month, enso) # and reorder
# need to average MJO
mjo_ed <- mjo %>%
group_by(year, month) %>%
summarize(mjo = mean(MJO_120W)) %>%
ungroup()
# need to tidy NPGO file a bit
# as of 3/24/2021, the most recent data available for npgo is 7/2020
# see http://www.o3d.org/npgo/
year_add <- c(2020, 2020, 2020, 2020, 2020, 2021, 2021)
month_add <- c(8, 9, 10, 11, 12, 1, 2)
npgo_add <- c(NA, NA, NA, NA, NA, NA, NA)
addition <- data.frame(year_add, month_add, npgo_add) %>%
rename(year = year_add, month = month_add, npgo = npgo_add)
npgo_ed <- npgo[-c(1:4),] %>% # removes jan-april 2002
rbind(addition)
# Joining all the indices into a single dataset (wide format)
oi_full <- full_join(bakun_ed, beuti_ed) %>%
full_join(cuti) %>%
full_join(enso_ed) %>%
full_join(mjo_ed) %>%
full_join(npgo_ed) %>%
full_join(pdo)
#### Export Data ####
save(cn_full, sst_full, oi_full, file = "data_tidy/kelp_cn_data_clean.rda")
# End of script.