-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_cleaning.Rmd
131 lines (97 loc) · 3.33 KB
/
data_cleaning.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
---
title: "data_cleaning"
author: "Ethan"
date: "4/20/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
```
```{r libraries_raw, message=FALSE, warning=FALSE}
library(readxl)
library(sf)
library(janitor)
library(readr)
library(tidyverse)
df_raw <- read_excel("data/2021_01_06_Climate Vulnerability Composite Score_MASTER FILE.xlsx",
sheet = "Composite (exp+vul) score",
col_types = c("text", "numeric", "numeric",
"numeric", "skip", "numeric", "numeric",
"skip", "skip", "skip", "skip", "skip",
"skip"))
exposure_raw <- read_excel("data/2021_01_06_Climate Vulnerability Composite Score_MASTER FILE.xlsx",
sheet = "Composite (exposure only)",
col_types = c("text", "skip", "numeric",
"numeric", "skip", "skip", "numeric",
"skip", "skip", "numeric", "skip",
"skip", "skip"))
vul_raw <- read_excel("data/2021_01_06_Climate Vulnerability Composite Score_MASTER FILE.xlsx",
sheet = "Social Vulnerability", col_types = c("text",
"skip", "numeric", "skip", "skip",
"skip"))
bg_raw <- st_read("data/shape_files/tl_2019_48_bg.shp", stringsAsFactors = FALSE)
bg_raw <-st_transform(bg_raw,"+proj=longlat +ellps=WGS84 +datum=WGS84")
ej_raw <- read_csv("data/EJSCREEN_2020_USPR.csv")
ej_columns_raw <- read_excel("data/2020_EJSCREEEN_columns-explained.xlsx",
col_types = c("skip", "text", "text"))
```
```{r}
#Minor Cleaning
df <- df_raw[1:640, ]
df$`SVI+Flood`[is.na(df$`SVI+Flood`)] <- 0
ej_columns <- ej_columns_raw[-2, ]
new_columns <- ej_columns$Description
ej <- ej_raw
colnames(ej) <- new_columns
flood <- flood_raw %>% select(GEOID_, `Weighted Flood Risk Score (Normalized)`)
wildfire <- wildfire_raw %>% select(`ID_Blk_Grp...1`,`WildFire Risk (0-1)`) %>% rename("GEOID_" = `ID_Blk_Grp...1`)
#merging
df <-
df %>%
left_join(bg_raw, by = c("GEOID_" = "GEOID")) %>%
left_join(ej, by = c("GEOID_" = "Census FIPS code for block group")) %>%
left_join(exposure_raw, by = "GEOID_") %>%
left_join(vul_raw, by = c("GEOID_" = "ID_Blk_Grp")) %>%
rename(
"Composite Climate Hazard Exposure" = "Norm_Composite (Norm)",
"Social Vulnerability Score" = "SVI Score (0-1)",
"Climate Exposure and Climate Vulnerability" = "Norm_COMPOSITE (v1)"
) %>%
select(
GEOID_,
"Flood Exposure (norm)",
"Wildfire Exposure (Norm)",
"Heat Exposure (Norm",
"Composite Climate Hazard Exposure",
"Social Vulnerability Score",
"Climate Exposure and Climate Vulnerability",
"Percentile for Ozone level in air" ,
"Percentile for PM2.5 level in air",
"Total population",
"% people of color",
"% low-income",
geometry
)
geometry <- select(df,
GEOID_,
geometry,
"Total population",
"% people of color",
"% low-income",
)
df <-
df %>%
mutate(across(everything(), as.character)) %>%
pivot_longer(cols = 2:9,
names_to = "var",
values_to = "value") %>%
select(GEOID_, var, value) %>%
left_join(geometry)
df$value <- as.numeric(df$value)
df$value[is.na(df$value)] <- 0
saveRDS(df, "data/austin_composite.rds")
```