In [1]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [3]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [4]:
%%R

# My commonly used R imports

require('tidyverse')

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Loading required package: tidyverse


In [5]:
import dotenv

# Load the environment variables
# (loads CENSUS_API_KEY from .env)
dotenv.load_dotenv()

True

In [12]:
%%R

library(tidycensus)
library(tidyverse)

# Skip 2020 since regular 1-year ACS data is not available for that year
years <- c(2015:2019, 2021:2023)                  
vars  <- c(
  "working_mom_under6yos" = "B23003_003",
  "non_working_mom_under6yos" = "B23003_009",
  "working_mom_6yosto17yos" = "B23003_017",
  "non_working_mom_6yosto17yos" = "B23003_023",
  "working_women" = "B23003_025",
  "non_working_women" = "B23003_029"
)  

# Get variable from ACS
multi_acs <- map_dfr(                    # one row-bind per year
  years,
  ~get_acs(
    geography = "us",
    variables = vars,
    year = .x,
    survey = "acs1",
    geometry = FALSE
  ),
  .id = "year"
)

# Display the resulting data
head(multi_acs)

# A tibble: 6 × 6
  year  GEOID NAME          variable                    estimate   moe
  <chr> <chr> <chr>         <chr>                          <dbl> <dbl>
1 1     1     United States working_mom_under6yos        7630279 42339
2 1     1     United States non_working_mom_under6yos    2240222 26373
3 1     1     United States working_mom_6yosto17yos     19150433 51164
4 1     1     United States non_working_mom_6yosto17yos  4461443 39847
5 1     1     United States working_women               44442667 82063
6 1     1     United States non_working_women            2451225 26495


Getting data from the 2015 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2016 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2017 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2018 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2019 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2021 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2022 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.
Getting data from the 2023 1-year ACS
The 1-year ACS provides data for geographies with populations of 65,000 and greater.


In [13]:
%%R 

multi_acs <- multi_acs %>% 
  # pivot from wide to long
  pivot_wider(
    names_from=variable, 
    values_from = c(estimate, moe),
    names_glue = "{variable}_{.value}"
  )

multi_acs

# A tibble: 8 × 15
  year  GEOID NAME          working_mom_under6yos_estim…¹ non_working_mom_unde…²
  <chr> <chr> <chr>                                 <dbl>                  <dbl>
1 1     1     United States                       7630279                2240222
2 2     1     United States                       7570447                2179535
3 3     1     United States                       7510145                2146461
4 4     1     United States                       7471116                2090499
5 5     1     United States                       7307933                1980176
6 6     1     United States                       7173770                1960309
7 7     1     United States                       7129347                1893955
8 8     1     United States                       7164382                1815925
# ℹ abbreviated names: ¹​working_mom_under6yos_estimate,
#   ²​non_working_mom_under6yos_estimate
# ℹ 10 more variables: working_mom_6yosto17yos_estimate <dbl>,
#   non_wo

In [14]:
%%R 

colnames(multi_acs)

 [1] "year"                                
 [2] "GEOID"                               
 [3] "NAME"                                
 [4] "working_mom_under6yos_estimate"      
 [5] "non_working_mom_under6yos_estimate"  
 [6] "working_mom_6yosto17yos_estimate"    
 [7] "non_working_mom_6yosto17yos_estimate"
 [8] "working_women_estimate"              
 [9] "non_working_women_estimate"          
[10] "working_mom_under6yos_moe"           
[11] "non_working_mom_under6yos_moe"       
[12] "working_mom_6yosto17yos_moe"         
[13] "non_working_mom_6yosto17yos_moe"     
[14] "working_women_moe"                   
[15] "non_working_women_moe"               


In [16]:
%%R
multi_acs %>%
   mutate(
        mom_estimate = working_mom_under6yos_estimate + working_mom_6yosto17yos_estimate - non_working_mom_under6yos_estimate - non_working_mom_6yosto17yos_estimate,
        working_women_total_estimate = working_women_estimate - non_working_women_estimate
      ) %>% 
    head()

# A tibble: 6 × 17
  year  GEOID NAME          working_mom_under6yos_estim…¹ non_working_mom_unde…²
  <chr> <chr> <chr>                                 <dbl>                  <dbl>
1 1     1     United States                       7630279                2240222
2 2     1     United States                       7570447                2179535
3 3     1     United States                       7510145                2146461
4 4     1     United States                       7471116                2090499
5 5     1     United States                       7307933                1980176
6 6     1     United States                       7173770                1960309
# ℹ abbreviated names: ¹​working_mom_under6yos_estimate,
#   ²​non_working_mom_under6yos_estimate
# ℹ 12 more variables: working_mom_6yosto17yos_estimate <dbl>,
#   non_working_mom_6yosto17yos_estimate <dbl>, working_women_estimate <dbl>,
#   non_working_women_estimate <dbl>, working_mom_under6yos_moe <dbl>,
#   non_working_mom_und

In [17]:
%%R
write.csv(multi_acs, "census_data.csv", row.names = FALSE)