Skip to content

Commit b66f4b3

Browse files
committed
Updates data file format
1 parent f9b3131 commit b66f4b3

File tree

3 files changed

+1772
-14
lines changed

3 files changed

+1772
-14
lines changed

analyses/analyze_ct-age-clinical.Rmd

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,28 @@ library(ggpubr)
1616
```
1717
#### Load data:
1818
```{r}
19-
df <- read_tsv("../data/WA_df.tsv", col_types = list("c", "f", "f", "f", "D", "f", "d", "f", "f", "f", "f", "f", "f", "f", "D", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "d"))
19+
df <- read_tsv("../data/WA_df.tsv", col_types = list("c", "f", "f", "f", "D", "f", "d", "f", "f", "f", "f", "f", "f", "f","i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i"))
2020
```
2121
```{r}
2222
summary(df)
2323
```
2424

2525
#### Data prep for analysis:
2626
```{r}
27+
df$bin_age[df$age_bin == 'under 10'] <- 9 #better to be conservative
28+
df$bin_age[df$age_bin == '10-19'] <- 14.5
29+
df$bin_age[df$age_bin == '20-29'] <- 24.5
30+
df$bin_age[df$age_bin == '30-39'] <- 34.5
31+
df$bin_age[df$age_bin == '40-49'] <- 44.5
32+
df$bin_age[df$age_bin == '50-59'] <- 54.5
33+
df$bin_age[df$age_bin == '60-69'] <- 64.5
34+
df$bin_age[df$age_bin == '70-79'] <- 74.5
35+
df$bin_age[df$age_bin == '80-89'] <- 84.5
36+
df$bin_age[df$age_bin == '90+'] <- 91 #Better to be conservative
37+
2738
df <- df %>%
2839
mutate(severe = ifelse(critical_care == "yes" | deceased == "yes", "yes", "no")) %>%
2940
mutate(severe = factor(severe)) %>%
30-
mutate(post_symp = date - symptom_onset) %>%
3141
mutate(decade_age = bin_age/10) %>%
3242
mutate(sequencing = ifelse(source != "UW", "nwgc", "uw")) %>%
3343
mutate(syn_muts = nt_muts - aa_muts)

analyses/df_prep.Rmd

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -308,24 +308,18 @@ df <- df %>%
308308
```{r}
309309
df <- df %>%
310310
mutate(symptom_onset = replace(symptom_onset, symptom_onset < as.Date("2020-01-31"), NA)) %>%
311+
mutate(post_symp = date - symptom_onset) %>%
311312
mutate(primers = replace(primers, source == "SFS", "scan")) %>%
312313
mutate(primers = replace(primers, source == "wadoh", "wadoh")) %>%
313314
mutate(county = replace(county, county == "Unknown", NA))
314315
316+
# Remove symptom onset date
317+
df <- df %>%
318+
select(-symptom_onset)
319+
315320
# Dropping any samples from May on:
316321
df <- df %>%
317322
filter(date < as.Date("2020-05-01"))
318-
319-
df$bin_age[df$age_bin == 'under 10'] <- 9 #better to be conservative
320-
df$bin_age[df$age_bin == '10-19'] <- 14.5
321-
df$bin_age[df$age_bin == '20-29'] <- 24.5
322-
df$bin_age[df$age_bin == '30-39'] <- 34.5
323-
df$bin_age[df$age_bin == '40-49'] <- 44.5
324-
df$bin_age[df$age_bin == '50-59'] <- 54.5
325-
df$bin_age[df$age_bin == '60-69'] <- 64.5
326-
df$bin_age[df$age_bin == '70-79'] <- 74.5
327-
df$bin_age[df$age_bin == '80-89'] <- 84.5
328-
df$bin_age[df$age_bin == '90+'] <- 91 #Better to be conservative
329323
```
330324

331325

@@ -360,12 +354,22 @@ df <- df %>%
360354
filter(!(is.na(df$nt_muts) & df$source == "wadoh"))
361355
```
362356

357+
### Make cleaned dataset
358+
```{r}
359+
clean <- df %>%
360+
select(-post_symp)
361+
```
362+
363363
#### Data overview
364364
```{r}
365365
summary(df)
366+
summary(clean)
366367
```
367368

368369
Saving dataset for analysis:
369370
```{r}
370371
write_tsv(df, "../data/WA_df.tsv")
371-
```
372+
```
373+
```{r}
374+
write_tsv(clean, "../data/WA_df_cleaned.tsv")
375+
```

0 commit comments

Comments
 (0)