-
Notifications
You must be signed in to change notification settings - Fork 0
/
ohio_explore.R
140 lines (113 loc) · 4.1 KB
/
ohio_explore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(tidyverse)
library(ggplot2)
theme_set(theme_bw())
library(sf)
#read in cleaned data
df <- read_csv('a3/ohio_clean.csv')
#read in info on cities
city_lookup <- read_csv('a3/city_populations.csv')
city_lookup$asciiname <- tolower(city_lookup$asciiname)
colnames(city_lookup) <- c('city', 'pop2019', 'location')
df <- df %>% arrange(desc(num_affected))
#get number of affected workers by city
df_city <- df %>%
group_by(city) %>%
summarise(sum(num_affected, na.rm = T)) %>%
set_names('city', 'num_affected') %>%
arrange(desc(num_affected))
#NOT RUN
# get number of affected workers by county
#df_county <- df %>%
# group_by(county) %>%
# summarise(sum(num_affected, na.rm = T)) %>%
# set_names('county', 'num_affected')
# merge city layoffs with city geo lookup
df <- merge(city_lookup, df_city, all.x = T)
#get proportion affected measure by city
df$prop_affected <- df$num_affected / df$pop2019
df <- df %>% arrange(desc(df$prop_affected))
head(df %>% select(-location))
#extract geographic information from the provided google url
df <- df %>%
separate_rows(location, sep = '=|,') %>%
group_by(city) %>%
slice(2:3) %>%
mutate('dim' = c('lat', 'long')) %>%
pivot_wider(names_from = 'dim', values_from = location) %>%
mutate(long = as.double(long), lat = as.double(lat)) %>%
mutate(layoff = ifelse(is.na(prop_affected), 0, 1))
#RUN ONCE
#Get US county shapefile and save locally
#f <- tempfile()
#download.file("http://www2.census.gov/geo/tiger/GENZ2010/gz_2010_us_050_00_20m.zip", destfile = f)
#unzip(f, exdir = ".")
#get shapefile of ohio
US <- st_read("gz_2010_us_050_00_20m.shp")
ohio <- US[US$STATE == '39',]
#################
# Create sf dataset all ohio cities
#################
sf_full <- st_as_sf(df, coords = c('long', 'lat'),
crs = 4326, agr = "constant")
#save csv for other analyses
write_sf(sf_full, 'A3/cities_geo_layoffs.shp')
#################
# Create data for different dots in the plots below
#################
#split data into laid off and non-laid off cities
ohio_yes <- df[which(df$layoff == 1),]
ohio_no <- df[which(df$layoff == 0),]
#convert points to sf format
ohio_yes_sf <- st_as_sf(ohio_yes, coords = c('long', 'lat'),
crs = 4326, agr = "constant")
ohio_no_sf <- st_as_sf(ohio_no, coords = c('long', 'lat'),
crs = 4326, agr = "constant")
####################
#save first plot
tiff('a3/plot1.tiff', units = 'in', width = 5, height = 5, res = 1080)
ggplot(data = ohio) +
geom_sf() +
geom_sf(data = ohio_no_sf, shape = 20, color='black', alpha = 0.25) +
geom_sf(data = ohio_yes_sf, shape = 20, color='maroon', aes(size = num_affected),
alpha = 0.75, show.legend = 'point')
dev.off()
####
## Combine w/ population data
####
#AJWBE001 = Total population
#AJWBE002 = Male
#AJWBE003-025 = Male age breakdown
#AJWBE026 = Female
#AJWBE027-049 = Male age breakdown
#read data
df_pop <- read_csv("a3/nhgis_county.csv")
#filter to ohio
df_pop <- df_pop %>% filter(STATE == 'Ohio')
#get proportion sub-21
df_pop$sub20 <- df_pop %>%
select(AJWBE003, AJWBE004, AJWBE005,
AJWBE006, AJWBE007, AJWBE008,
AJWBE027, AJWBE028, AJWBE029,
AJWBE030, AJWBE031, AJWBE032) %>%
rowSums()
df_pop$sub20_prop <- df_pop$sub20 / df_pop$AJWBE001
#merge into sf object
sub20_prop <- df_pop %>% select(COUNTYA, sub20_prop) %>%
mutate(COUNTYA = as.factor(COUNTYA)) %>%
set_names('COUNTY', 'sub20prop')
ohio <- merge(ohio, sub20_prop, all = T)
#convert sub20 to low-med-high
ohio$age_split <- cut(ohio$sub20prop, 3)
ohio <- ohio %>% mutate(age_split = factor(age_split, labels = c('< 26%', '26-30%', '> 31%')))
#create second plot
tiff('a3/plot2.tiff', units = 'in', width = 5, height = 5, res = 1080)
ggplot(data = ohio) +
geom_sf(aes(fill = age_split)) +
scale_fill_brewer() +
geom_sf(data = ohio_no_sf, shape = 20, color='black', alpha = 0.25) +
geom_sf(data = ohio_yes_sf, shape = 20, color='maroon', aes(size = num_affected),
alpha = 0.75, show.legend = 'point') +
labs(title = 'Mass Layoffs in Ohio, 2015 - 2019',
size = '% directly affected',
fill = '% under 20')
dev.off()