In [1]:
# Load the necessary package
library(readr)

# Specify the file path
file_path <- "../data/us_occ_by_month.csv"

# Read the CSV file
data <- read_csv(file_path)

# Load the necessary packages
library(ggplot2)

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m6048[39m [1mColumns: [22m[34m9[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (3): Month, SOC 2018 3-Digit Minor Group, SOC 2018 3-Digit Minor Group (...
[32mdbl[39m (6): ...1, Year, Year-Month, Percent, Percent_3ma, N

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [2]:
library(dplyr)

# Assuming your data frame is named 'data' and it has columns 'Year', 'N', and 'Percent' # nolint
data_by_year_occ <- data %>% # nolint
  group_by(`Year`,`SOC 2018 3-Digit Minor Group`,`SOC 2018 3-Digit Minor Group (Name)`) %>% # nolint
  mutate(WFH_jobs = N * Percent / 100) %>%
  summarise(Total_WFH_jobs = sum(WFH_jobs),
            Total_jobs = sum(N),
            Percent_WFH_jobs = Total_WFH_jobs / Total_jobs * 100)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


[1m[22m`summarise()` has grouped output by 'Year', 'SOC 2018 3-Digit Minor Group'. You
can override using the `.groups` argument.


In [3]:
# keep only data from 2019
data_by_year_SOC_2019 <- data_by_year_occ %>% filter(Year == 2019)
# sort by Percent_WFH_jobs
data_by_year_SOC_2019 <- data_by_year_SOC_2019 %>% arrange(desc(Percent_WFH_jobs))
# create a rank column
data_by_year_SOC_2019$rank <- 1:nrow(data_by_year_SOC_2019)
# keep only the naics code and rank columns
data_by_year_SOC_2019 <- data_by_year_SOC_2019 %>% ungroup() %>% select(`SOC 2018 3-Digit Minor Group`,rank)
# merge in into the grouped dataset
data_by_year_occ <- merge(data_by_year_occ,data_by_year_SOC_2019,by = "SOC 2018 3-Digit Minor Group")

In [4]:
# keep only year 2019/2023/2024
data_by_year_occ <- data_by_year_occ %>% filter(Year %in% c(2019,2023,2024))

In [5]:
p <- ggplot(data_by_year_occ, aes(x = Percent_WFH_jobs,
 y = reorder(`SOC 2018 3-Digit Minor Group (Name)`, -1*rank),
 fill = factor(Year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  labs(
    title = "Percent of job ads offering remote/hybrid vary widely by Occupation - Sorted by 2019 Shares",
    x = "Percent",
    y = NULL,
    fill = "Year'"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("2024" = "#F8766D", "2023" = "#00BFC4","2019" = "#7CAE00")) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )
ggsave("../output_blogpost_1/figure4.pdf", plot = p, width = 16, height = 32, dpi = 300)

In [6]:
# now make the two subgraphs
# first include on the the top 5 occupations
p <- ggplot(data_by_year_occ %>% filter(rank <= 5), aes(x = Percent_WFH_jobs,
 y = reorder(`SOC 2018 3-Digit Minor Group (Name)`, -1*rank),
 fill = factor(Year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  labs(
    title = "Percent of job ads offering remote/hybrid vary widely by Occupation - Top 5 - Sorted by 2019 Shares",
    x = "Percent",
    y = NULL,
    fill = "Year'"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("2024" = "#F8766D", "2023" = "#00BFC4", "2019" = "#7CAE00")) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )
ggsave("../output_blogpost_1/figure4_top5.pdf", plot = p, width = 16, height = 10, dpi = 300)

p <- ggplot(data_by_year_occ %>% filter(rank >= 92), aes(x = Percent_WFH_jobs,
 y = reorder(`SOC 2018 3-Digit Minor Group (Name)`, -1*rank),
 fill = factor(Year))) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
  labs(
    title = "Percent of job ads offering remote/hybrid vary widely by Occupation - Bottom 5 - Sorted by 2019 Shares",
    x = "Percent",
    y = NULL,
    fill = "Year'"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("2024" = "#F8766D", "2023" = "#00BFC4", "2019" = "#7CAE00")) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )
ggsave("../output_blogpost_1/figure4_bottom5.pdf", plot = p, width = 16, height = 10, dpi = 300)