Skip to content

Commit

Permalink
Updates data file format
Browse files Browse the repository at this point in the history
  • Loading branch information
cassiawag committed Mar 30, 2021
1 parent f9b3131 commit b66f4b3
Show file tree
Hide file tree
Showing 3 changed files with 1,772 additions and 14 deletions.
14 changes: 12 additions & 2 deletions analyses/analyze_ct-age-clinical.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,28 @@ library(ggpubr)
```
#### Load data:
```{r}
df <- read_tsv("../data/WA_df.tsv", col_types = list("c", "f", "f", "f", "D", "f", "d", "f", "f", "f", "f", "f", "f", "f", "D", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "d"))
df <- read_tsv("../data/WA_df.tsv", col_types = list("c", "f", "f", "f", "D", "f", "d", "f", "f", "f", "f", "f", "f", "f","i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i", "i"))
```
```{r}
summary(df)
```

#### Data prep for analysis:
```{r}
df$bin_age[df$age_bin == 'under 10'] <- 9 #better to be conservative
df$bin_age[df$age_bin == '10-19'] <- 14.5
df$bin_age[df$age_bin == '20-29'] <- 24.5
df$bin_age[df$age_bin == '30-39'] <- 34.5
df$bin_age[df$age_bin == '40-49'] <- 44.5
df$bin_age[df$age_bin == '50-59'] <- 54.5
df$bin_age[df$age_bin == '60-69'] <- 64.5
df$bin_age[df$age_bin == '70-79'] <- 74.5
df$bin_age[df$age_bin == '80-89'] <- 84.5
df$bin_age[df$age_bin == '90+'] <- 91 #Better to be conservative
df <- df %>%
mutate(severe = ifelse(critical_care == "yes" | deceased == "yes", "yes", "no")) %>%
mutate(severe = factor(severe)) %>%
mutate(post_symp = date - symptom_onset) %>%
mutate(decade_age = bin_age/10) %>%
mutate(sequencing = ifelse(source != "UW", "nwgc", "uw")) %>%
mutate(syn_muts = nt_muts - aa_muts)
Expand Down
28 changes: 16 additions & 12 deletions analyses/df_prep.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -308,24 +308,18 @@ df <- df %>%
```{r}
df <- df %>%
mutate(symptom_onset = replace(symptom_onset, symptom_onset < as.Date("2020-01-31"), NA)) %>%
mutate(post_symp = date - symptom_onset) %>%
mutate(primers = replace(primers, source == "SFS", "scan")) %>%
mutate(primers = replace(primers, source == "wadoh", "wadoh")) %>%
mutate(county = replace(county, county == "Unknown", NA))
# Remove symptom onset date
df <- df %>%
select(-symptom_onset)
# Dropping any samples from May on:
df <- df %>%
filter(date < as.Date("2020-05-01"))
df$bin_age[df$age_bin == 'under 10'] <- 9 #better to be conservative
df$bin_age[df$age_bin == '10-19'] <- 14.5
df$bin_age[df$age_bin == '20-29'] <- 24.5
df$bin_age[df$age_bin == '30-39'] <- 34.5
df$bin_age[df$age_bin == '40-49'] <- 44.5
df$bin_age[df$age_bin == '50-59'] <- 54.5
df$bin_age[df$age_bin == '60-69'] <- 64.5
df$bin_age[df$age_bin == '70-79'] <- 74.5
df$bin_age[df$age_bin == '80-89'] <- 84.5
df$bin_age[df$age_bin == '90+'] <- 91 #Better to be conservative
```


Expand Down Expand Up @@ -360,12 +354,22 @@ df <- df %>%
filter(!(is.na(df$nt_muts) & df$source == "wadoh"))
```

### Make cleaned dataset
```{r}
clean <- df %>%
select(-post_symp)
```

#### Data overview
```{r}
summary(df)
summary(clean)
```

Saving dataset for analysis:
```{r}
write_tsv(df, "../data/WA_df.tsv")
```
```
```{r}
write_tsv(clean, "../data/WA_df_cleaned.tsv")
```
Loading

0 comments on commit b66f4b3

Please sign in to comment.