# American Community Survey (ACS) 1-Year Data

See https://www.census.gov/data/developers/data-sets/acs-1year.html.

In [1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir, os.path.pardir)))
)

In [2]:
import censusdis.data as ced

## ACS 1-Year Detailed Tables

In [3]:
DATASET = 'acs/acs1'
YEAR = 2019

### What groups are in the dataset?

In [4]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1,2019,B01001,SEX BY AGE
1,acs/acs1,2019,B01001A,SEX BY AGE (WHITE ALONE)
2,acs/acs1,2019,B01001B,SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
3,acs/acs1,2019,B01001C,SEX BY AGE (AMERICAN INDIAN AND ALASKA NATIVE ...
4,acs/acs1,2019,B01001D,SEX BY AGE (ASIAN ALONE)


### What variables are in the first group (as a tree)?

In [5]:
group = groups.iloc[0]['GROUP']

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + Total: (B01001_001E)
        + Male: (B01001_002E)
            + Under 5 years (B01001_003E)
            + 5 to 9 years (B01001_004E)
            + 10 to 14 years (B01001_005E)
            + 15 to 17 years (B01001_006E)
            + 18 and 19 years (B01001_007E)
            + 20 years (B01001_008E)
            + 21 years (B01001_009E)
            + 22 to 24 years (B01001_010E)
            + 25 to 29 years (B01001_011E)
            + 30 to 34 years (B01001_012E)
            + 35 to 39 years (B01001_013E)
            + 40 to 44 years (B01001_014E)
            + 45 to 49 years (B01001_015E)
            + 50 to 54 years (B01001_016E)
            + 55 to 59 years (B01001_017E)
            + 60 and 61 years (B01001_018E)
            + 62 to 64 years (B01001_019E)
            + 65 and 66 years (B01001_020E)
            + 67 to 69 years (B01001_021E)
            + 70 to 74 years (B01001_022E)
            + 75 to 79 years (B01001_023E)
            + 80 to 84 years (B01001_024E

### What variables in the first group are leaves of the tree?

In [6]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['B01001_003E', 'B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_007E', 'B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E', 'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E', 'B01001_018E', 'B01001_019E', 'B01001_020E', 'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E', 'B01001_025E', 'B01001_027E', 'B01001_028E', 'B01001_029E', 'B01001_030E', 'B01001_031E', 'B01001_032E', 'B01001_033E', 'B01001_034E', 'B01001_035E', 'B01001_036E', 'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E', 'B01001_042E', 'B01001_043E', 'B01001_044E', 'B01001_045E', 'B01001_046E', 'B01001_047E', 'B01001_048E', 'B01001_049E']"

### All variables in the first group as a list

In [7]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['B01001_001E', 'B01001_002E', 'B01001_003E', 'B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_007E', 'B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E', 'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E', 'B01001_018E', 'B01001_019E', 'B01001_020E', 'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E', 'B01001_025E', 'B01001_026E', 'B01001_027E', 'B01001_028E', 'B01001_029E', 'B01001_030E', 'B01001_031E', 'B01001_032E', 'B01001_033E', 'B01001_034E', 'B01001_035E', 'B01001_036E', 'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E', 'B01001_042E', 'B01001_043E', 'B01001_044E', 'B01001_045E', 'B01001_046E', 'B01001_047E', 'B01001_048E', 'B01001_049E']"

### Dowload data for the leaf variables

In [8]:
df_data = ced.download(
    DATASET,
    YEAR,
    leaves,
    state="*"
)

In [9]:
df_data.head()

Unnamed: 0,STATE,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_040E,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E
0,28,90762,94244,107137,59725,51201,21585,22531,59106,92409,...,92180,101101,37698,62880,38070,49638,68666,50082,31602,37401
1,29,186251,197748,199258,119063,84563,40997,46564,112148,210654,...,186708,214845,86487,131560,75825,102759,147887,113042,75195,76437
2,30,31120,33055,32944,19808,17109,6163,8438,21087,34834,...,29694,38366,17196,22390,16299,19376,28502,19914,11954,12680
3,31,67748,67190,70679,39221,29070,14803,13956,39678,65050,...,53348,61933,24625,36714,21118,30804,42084,27856,23085,26925
4,32,94610,92778,107967,57452,36663,18793,21244,53656,114131,...,95761,99936,37956,55820,38396,47736,74506,49882,28638,24837


## ACS 1-Year Subject Tables

In [10]:
DATASET = 'acs/acs1/subject'
YEAR = 2019

### What groups are in the dataset?

In [11]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1/subject,2019,S0101,AGE AND SEX
1,acs/acs1/subject,2019,S0102,POPULATION 60 YEARS AND OVER IN THE UNITED STATES
2,acs/acs1/subject,2019,S0102PR,POPULATION 60 YEARS AND OVER IN PUERTO RICO
3,acs/acs1/subject,2019,S0103,POPULATION 65 YEARS AND OVER IN THE UNITED STATES
4,acs/acs1/subject,2019,S0103PR,POPULATION 65 YEARS AND OVER IN PUERTO RICO


### What variables are in the first group (as a tree)?

In [12]:
group = groups.iloc[0]['GROUP']

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + Total
        + Total population (S0101_C01_001E)
            + AGE
                + Under 5 years (S0101_C01_002E)
                + 5 to 9 years (S0101_C01_003E)
                + 10 to 14 years (S0101_C01_004E)
                + 15 to 19 years (S0101_C01_005E)
                + 20 to 24 years (S0101_C01_006E)
                + 25 to 29 years (S0101_C01_007E)
                + 30 to 34 years (S0101_C01_008E)
                + 35 to 39 years (S0101_C01_009E)
                + 40 to 44 years (S0101_C01_010E)
                + 45 to 49 years (S0101_C01_011E)
                + 50 to 54 years (S0101_C01_012E)
                + 55 to 59 years (S0101_C01_013E)
                + 60 to 64 years (S0101_C01_014E)
                + 65 to 69 years (S0101_C01_015E)
                + 70 to 74 years (S0101_C01_016E)
                + 75 to 79 years (S0101_C01_017E)
                + 80 to 84 years (S0101_C01_018E)
                + 85 years and over (S0101_C01_019E)
            + S

### What variables in the first group are leaves of the tree?

In [13]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['S0101_C01_002E', 'S0101_C01_003E', 'S0101_C01_004E', 'S0101_C01_005E', 'S0101_C01_006E', 'S0101_C01_007E', 'S0101_C01_008E', 'S0101_C01_009E', 'S0101_C01_010E', 'S0101_C01_011E', 'S0101_C01_012E', 'S0101_C01_013E', 'S0101_C01_014E', 'S0101_C01_015E', 'S0101_C01_016E', 'S0101_C01_017E', 'S0101_C01_018E', 'S0101_C01_019E', 'S0101_C01_020E', 'S0101_C01_021E', 'S0101_C01_022E', 'S0101_C01_023E', 'S0101_C01_024E', 'S0101_C01_025E', 'S0101_C01_026E', 'S0101_C01_027E', 'S0101_C01_028E', 'S0101_C01_029E', 'S0101_C01_030E', 'S0101_C01_031E', 'S0101_C01_032E', 'S0101_C01_033E', 'S0101_C01_034E', 'S0101_C01_035E', 'S0101_C01_036E', 'S0101_C01_037E', 'S0101_C01_038E', 'S0101_C02_002E', 'S0101_C02_003E', 'S0101_C02_004E', 'S0101_C02_005E', 'S0101_C02_006E', 'S0101_C02_007E', 'S0101_C02_008E', 'S0101_C02_009E', 'S0101_C02_010E', 'S0101_C02_011E', 'S0101_C02_012E', 'S0101_C02_013E', 'S0101_C02_014E', 'S0101_C02_015E', 'S0101_C02_016E', 'S0101_C02_017E', 'S0101_C02_018E', 'S0101_C02_019E', 'S0101_C

### All variables in the first group as a list

In [14]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['S0101_C01_001E', 'S0101_C01_002E', 'S0101_C01_003E', 'S0101_C01_004E', 'S0101_C01_005E', 'S0101_C01_006E', 'S0101_C01_007E', 'S0101_C01_008E', 'S0101_C01_009E', 'S0101_C01_010E', 'S0101_C01_011E', 'S0101_C01_012E', 'S0101_C01_013E', 'S0101_C01_014E', 'S0101_C01_015E', 'S0101_C01_016E', 'S0101_C01_017E', 'S0101_C01_018E', 'S0101_C01_019E', 'S0101_C01_020E', 'S0101_C01_021E', 'S0101_C01_022E', 'S0101_C01_023E', 'S0101_C01_024E', 'S0101_C01_025E', 'S0101_C01_026E', 'S0101_C01_027E', 'S0101_C01_028E', 'S0101_C01_029E', 'S0101_C01_030E', 'S0101_C01_031E', 'S0101_C01_032E', 'S0101_C01_033E', 'S0101_C01_034E', 'S0101_C01_035E', 'S0101_C01_036E', 'S0101_C01_037E', 'S0101_C01_038E', 'S0101_C02_001E', 'S0101_C02_002E', 'S0101_C02_003E', 'S0101_C02_004E', 'S0101_C02_005E', 'S0101_C02_006E', 'S0101_C02_007E', 'S0101_C02_008E', 'S0101_C02_009E', 'S0101_C02_010E', 'S0101_C02_011E', 'S0101_C02_012E', 'S0101_C02_013E', 'S0101_C02_014E', 'S0101_C02_015E', 'S0101_C02_016E', 'S0101_C02_017E', 'S0101_C

### Dowload data for the leaf variables

In [15]:
df_data = ced.download(
    DATASET,
    YEAR,
    leaves,
    state="*"
)

In [16]:
df_data.head()

Unnamed: 0,STATE,S0101_C01_002E,S0101_C01_003E,S0101_C01_004E,S0101_C01_005E,S0101_C01_006E,S0101_C01_007E,S0101_C01_008E,S0101_C01_009E,S0101_C01_010E,...,S0101_C06_029E,S0101_C06_030E,S0101_C06_031E,S0101_C06_032E,S0101_C06_033E,S0101_C06_034E,S0101_C06_035E,S0101_C06_036E,S0101_C06_037E,S0101_C06_038E
0,28,179497,190277,211264,214658,200789,190095,178132,190629,188981,...,22.0,17.9,7.7,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888
1,29,365451,380818,391770,398235,394992,421412,402310,402944,353765,...,23.1,18.9,8.5,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888
2,30,58850,61823,67783,70013,68359,68563,64556,68267,64205,...,24.7,20.5,8.4,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888
3,31,129551,131863,136832,131674,132718,130734,127197,131476,111576,...,21.6,17.8,8.0,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888
4,32,184168,185324,206681,184666,182851,224382,224673,217006,193315,...,20.8,17.2,6.7,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888,-888888888


## ACS 1-Year Data Profiles

In [17]:
DATASET = 'acs/acs1/profile'
YEAR = 2019

### What groups are in the dataset?

In [18]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1/profile,2019,DP02,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...
1,acs/acs1/profile,2019,DP02PR,SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO
2,acs/acs1/profile,2019,DP03,SELECTED ECONOMIC CHARACTERISTICS
3,acs/acs1/profile,2019,DP04,SELECTED HOUSING CHARACTERISTICS
4,acs/acs1/profile,2019,DP05,ACS DEMOGRAPHIC AND HOUSING ESTIMATES


### What variables are in the first group (as a tree)?

In [19]:
group = groups.iloc[0]['GROUP']

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + HOUSEHOLDS BY TYPE
        + Total households (DP02_0001E)
            + Married-couple family (DP02_0002E)
                + With own children of the householder under 18 years (DP02_0003E)
            + Cohabiting couple household (DP02_0004E)
                + With own children of the householder under 18 years (DP02_0005E)
            + Male householder, no spouse/partner present (DP02_0006E)
                + With own children of the householder under 18 years (DP02_0007E)
                + Householder living alone (DP02_0008E)
                    + 65 years and over (DP02_0009E)
            + Female householder, no spouse/partner present (DP02_0010E)
                + With own children of the householder under 18 years (DP02_0011E)
                + Householder living alone (DP02_0012E)
                    + 65 years and over (DP02_0013E)
            + Households with one or more people under 18 years (DP02_0014E)
            + Households with one or more people 

### What variables in the first group are leaves of the tree?

In [20]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['DP02_0003E', 'DP02_0003PE', 'DP02_0003PM', 'DP02_0005E', 'DP02_0005PE', 'DP02_0005PM', 'DP02_0007E', 'DP02_0007PE', 'DP02_0007PM', 'DP02_0009E', 'DP02_0009PE', 'DP02_0009PM', 'DP02_0011E', 'DP02_0011PE', 'DP02_0011PM', 'DP02_0013E', 'DP02_0013PE', 'DP02_0013PM', 'DP02_0014E', 'DP02_0014PE', 'DP02_0014PM', 'DP02_0015E', 'DP02_0015PE', 'DP02_0015PM', 'DP02_0016E', 'DP02_0016PE', 'DP02_0016PM', 'DP02_0017E', 'DP02_0017PE', 'DP02_0017PM', 'DP02_0019E', 'DP02_0019PE', 'DP02_0019PM', 'DP02_0020E', 'DP02_0020PE', 'DP02_0020PM', 'DP02_0021E', 'DP02_0021PE', 'DP02_0021PM', 'DP02_0022E', 'DP02_0022PE', 'DP02_0022PM', 'DP02_0023E', 'DP02_0023PE', 'DP02_0023PM', 'DP02_0024E', 'DP02_0024PE', 'DP02_0024PM', 'DP02_0026E', 'DP02_0026PE', 'DP02_0026PM', 'DP02_0027E', 'DP02_0027PE', 'DP02_0027PM', 'DP02_0028E', 'DP02_0028PE', 'DP02_0028PM', 'DP02_0029E', 'DP02_0029PE', 'DP02_0029PM', 'DP02_0030E', 'DP02_0030PE', 'DP02_0030PM', 'DP02_0032E', 'DP02_0032PE', 'DP02_0032PM', 'DP02_0033E', 'DP02_0033PE', '

### All variables in the first group as a list

In [21]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['DP02_0001E', 'DP02_0001PE', 'DP02_0001PM', 'DP02_0002E', 'DP02_0002PE', 'DP02_0002PM', 'DP02_0003E', 'DP02_0003PE', 'DP02_0003PM', 'DP02_0004E', 'DP02_0004PE', 'DP02_0004PM', 'DP02_0005E', 'DP02_0005PE', 'DP02_0005PM', 'DP02_0006E', 'DP02_0006PE', 'DP02_0006PM', 'DP02_0007E', 'DP02_0007PE', 'DP02_0007PM', 'DP02_0008E', 'DP02_0008PE', 'DP02_0008PM', 'DP02_0009E', 'DP02_0009PE', 'DP02_0009PM', 'DP02_0010E', 'DP02_0010PE', 'DP02_0010PM', 'DP02_0011E', 'DP02_0011PE', 'DP02_0011PM', 'DP02_0012E', 'DP02_0012PE', 'DP02_0012PM', 'DP02_0013E', 'DP02_0013PE', 'DP02_0013PM', 'DP02_0014E', 'DP02_0014PE', 'DP02_0014PM', 'DP02_0015E', 'DP02_0015PE', 'DP02_0015PM', 'DP02_0016E', 'DP02_0016PE', 'DP02_0016PM', 'DP02_0017E', 'DP02_0017PE', 'DP02_0017PM', 'DP02_0018E', 'DP02_0018PE', 'DP02_0018PM', 'DP02_0019E', 'DP02_0019PE', 'DP02_0019PM', 'DP02_0020E', 'DP02_0020PE', 'DP02_0020PM', 'DP02_0021E', 'DP02_0021PE', 'DP02_0021PM', 'DP02_0022E', 'DP02_0022PE', 'DP02_0022PM', 'DP02_0023E', 'DP02_0023PE', '

### Dowload data for the leaf variables

In [22]:
df_data = ced.download(
    DATASET,
    YEAR,
    leaves,
    state="*"
)

In [23]:
df_data.head()

Unnamed: 0,STATE,DP02_0003E,DP02_0003PE,DP02_0003PM,DP02_0005E,DP02_0005PE,DP02_0005PM,DP02_0007E,DP02_0007PE,DP02_0007PM,...,DP02_0149PM,DP02_0150E,DP02_0150PE,DP02_0150PM,DP02_0152E,DP02_0152PE,DP02_0152PM,DP02_0153E,DP02_0153PE,DP02_0153PM
0,28,166838.0,15.2,0.6,17748.0,1.6,0.2,11523.0,1.0,0.2,...,0.1,2994.0,0.1,0.1,964097.0,87.6,0.6,845398.0,76.8,0.7
1,29,420287.0,17.1,0.3,55355.0,2.3,0.2,36356.0,1.5,0.1,...,0.1,13725.0,0.2,0.1,2255796.0,91.8,0.3,2084327.0,84.8,0.4
2,30,67092.0,15.3,0.7,9708.0,2.2,0.4,6392.0,1.5,0.3,...,0.2,1334.0,0.1,0.1,404364.0,92.4,0.6,372148.0,85.0,0.8
3,31,161340.0,20.9,0.5,16504.0,2.1,0.3,9110.0,1.2,0.2,...,0.1,1399.0,0.1,0.1,715015.0,92.7,0.3,670887.0,87.0,0.5
4,32,188766.0,16.5,0.6,28800.0,2.5,0.3,18566.0,1.6,0.3,...,0.1,9993.0,0.3,0.1,1077305.0,94.2,0.4,979228.0,85.6,0.6


## ACS 1-Year Comparison Profiles

In [24]:
DATASET = 'acs/acs1/cprofile'
YEAR = 2019

### What groups are in the dataset?

In [25]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1/cprofile,2019,CP02,COMPARATIVE SOCIAL CHARACTERISTICS IN THE UNIT...
1,acs/acs1/cprofile,2019,CP02PR,COMPARATIVE SOCIAL CHARACTERISTICS IN PUERTO RICO
2,acs/acs1/cprofile,2019,CP03,COMPARATIVE ECONOMIC CHARACTERISTICS
3,acs/acs1/cprofile,2019,CP04,COMPARATIVE HOUSING CHARACTERISTICS
4,acs/acs1/cprofile,2019,CP05,COMPARATIVE DEMOGRAPHIC ESTIMATES


### What variables are in the first group (as a tree)?

In [26]:
group = groups.iloc[0]['GROUP']

ced.variables.group_tree(DATASET, YEAR, group)

+ 2015 Estimate
    + HOUSEHOLDS BY TYPE
        + Total households (CP02_2015_001E)
            + Married-couple family (CP02_2015_002E)
                + With own children of the householder under 18 years (CP02_2015_003E)
            + Cohabiting couple household (CP02_2015_004E)
                + With own children of the householder under 18 years (CP02_2015_005E)
            + Male householder, no spouse/partner present (CP02_2015_006E)
                + With own children of the householder under 18 years (CP02_2015_007E)
                + Householder living alone (CP02_2015_008E)
                    + 65 years and over (CP02_2015_009E)
            + Female householder, no spouse/partner present (CP02_2015_010E)
                + With own children of the householder under 18 years (CP02_2015_011E)
                + Householder living alone (CP02_2015_012E)
                    + 65 years and over (CP02_2015_013E)
            + Households with one or more people under 18 years (CP02

### What variables in the first group are leaves of the tree?

In [27]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['CP02_2015_003E', 'CP02_2015_005E', 'CP02_2015_007E', 'CP02_2015_009E', 'CP02_2015_011E', 'CP02_2015_013E', 'CP02_2015_014E', 'CP02_2015_015E', 'CP02_2015_016E', 'CP02_2015_017E', 'CP02_2015_019E', 'CP02_2015_020E', 'CP02_2015_021E', 'CP02_2015_022E', 'CP02_2015_023E', 'CP02_2015_024E', 'CP02_2015_026E', 'CP02_2015_027E', 'CP02_2015_028E', 'CP02_2015_029E', 'CP02_2015_030E', 'CP02_2015_032E', 'CP02_2015_033E', 'CP02_2015_034E', 'CP02_2015_035E', 'CP02_2015_036E', 'CP02_2015_039E', 'CP02_2015_040E', 'CP02_2015_041E', 'CP02_2015_042E', 'CP02_2015_043E', 'CP02_2015_045E', 'CP02_2015_046E', 'CP02_2015_047E', 'CP02_2015_048E', 'CP02_2015_049E', 'CP02_2015_051E', 'CP02_2015_052E', 'CP02_2015_054E', 'CP02_2015_055E', 'CP02_2015_056E', 'CP02_2015_057E', 'CP02_2015_058E', 'CP02_2015_060E', 'CP02_2015_061E', 'CP02_2015_062E', 'CP02_2015_063E', 'CP02_2015_064E', 'CP02_2015_065E', 'CP02_2015_066E', 'CP02_2015_067E', 'CP02_2015_068E', 'CP02_2015_070E', 'CP02_2015_072E', 'CP02_2015_074E', 'CP02_20

### All variables in the first group as a list

In [28]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['CP02_2015_001E', 'CP02_2015_002E', 'CP02_2015_003E', 'CP02_2015_004E', 'CP02_2015_005E', 'CP02_2015_006E', 'CP02_2015_007E', 'CP02_2015_008E', 'CP02_2015_009E', 'CP02_2015_010E', 'CP02_2015_011E', 'CP02_2015_012E', 'CP02_2015_013E', 'CP02_2015_014E', 'CP02_2015_015E', 'CP02_2015_016E', 'CP02_2015_017E', 'CP02_2015_018E', 'CP02_2015_019E', 'CP02_2015_020E', 'CP02_2015_021E', 'CP02_2015_022E', 'CP02_2015_023E', 'CP02_2015_024E', 'CP02_2015_025E', 'CP02_2015_026E', 'CP02_2015_027E', 'CP02_2015_028E', 'CP02_2015_029E', 'CP02_2015_030E', 'CP02_2015_031E', 'CP02_2015_032E', 'CP02_2015_033E', 'CP02_2015_034E', 'CP02_2015_035E', 'CP02_2015_036E', 'CP02_2015_037E', 'CP02_2015_038E', 'CP02_2015_039E', 'CP02_2015_040E', 'CP02_2015_041E', 'CP02_2015_042E', 'CP02_2015_043E', 'CP02_2015_044E', 'CP02_2015_045E', 'CP02_2015_046E', 'CP02_2015_047E', 'CP02_2015_048E', 'CP02_2015_049E', 'CP02_2015_050E', 'CP02_2015_051E', 'CP02_2015_052E', 'CP02_2015_053E', 'CP02_2015_054E', 'CP02_2015_055E', 'CP02_20

### Dowload data for the leaf variables

In [29]:
df_data = ced.download(
    DATASET,
    YEAR,
    leaves,
    state="*"
)

In [30]:
df_data.head()

Unnamed: 0,STATE,CP02_2015_003E,CP02_2015_005E,CP02_2015_007E,CP02_2015_009E,CP02_2015_011E,CP02_2015_013E,CP02_2015_014E,CP02_2015_015E,CP02_2015_016E,...,CP02_2019TO2018_143SS,CP02_2019TO2018_144SS,CP02_2019TO2018_145SS,CP02_2019TO2018_146SS,CP02_2019TO2018_147SS,CP02_2019TO2018_148SS,CP02_2019TO2018_149SS,CP02_2019TO2018_150SS,CP02_2019TO2018_152SS,CP02_2019TO2018_153SS
0,28,15.9,2.5,1.5,3.4,8.0,7.7,33.4,28.7,2.62,...,,,,,,,,,*,
1,29,17.8,2.1,1.4,3.4,5.4,8.0,29.8,28.1,2.49,...,,*,,*,,,,*,*,*
2,30,17.1,1.9,1.5,4.6,3.7,7.1,26.3,29.8,2.42,...,,,*,,,,,,*,*
3,31,20.7,2.1,1.2,3.4,4.8,7.7,31.3,26.2,2.48,...,,,*,*,,,,,*,*
4,32,16.9,2.8,1.6,4.0,5.8,5.8,30.5,28.1,2.74,...,,,,,,,,,*,


## ACS 1-Year Selected Population Profiles

In [31]:
DATASET = 'acs/acs1/spp'
YEAR = 2019

### What groups are in the dataset?

In [32]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1/spp,2019,S0201,SELECTED POPULATION PROFILE IN THE UNITED STATES
1,acs/acs1/spp,2019,S0201PR,SELECTED POPULATION PROFILE IN PUERTO RICO


### What variables are in the first group (as a tree)?

In [33]:
group = groups.iloc[0]['GROUP']

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + TOTAL NUMBER OF RACES REPORTED
        + Total population (S0201_001E)
            + One race (S0201_002E)
            + Two races (S0201_003E)
            + Three races (S0201_004E)
            + Four or more races (S0201_005E)
    + SEX AND AGE
        + Total population (S0201_006E)
            + Male (S0201_007E)
            + Female (S0201_008E)
            + Under 5 years (S0201_009E)
            + 5 to 17 years (S0201_010E)
            + 18 to 24 years (S0201_011E)
            + 25 to 34 years (S0201_012E)
            + 35 to 44 years (S0201_013E)
            + 45 to 54 years (S0201_014E)
            + 55 to 64 years (S0201_015E)
            + 65 to 74 years (S0201_016E)
            + 75 years and over (S0201_017E)
            + Median age (years) (S0201_018E)
            + 21 years and over (S0201_020E)
            + 62 years and over (S0201_021E)
            + Under 18 years (S0201_023E)
                + Male (S0201_024E)
                + Female (S0201_025E)

### What variables in the first group are leaves of the tree?

In [34]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['S0201_002E', 'S0201_003E', 'S0201_004E', 'S0201_005E', 'S0201_007E', 'S0201_008E', 'S0201_009E', 'S0201_010E', 'S0201_011E', 'S0201_012E', 'S0201_013E', 'S0201_014E', 'S0201_015E', 'S0201_016E', 'S0201_017E', 'S0201_018E', 'S0201_020E', 'S0201_021E', 'S0201_024E', 'S0201_025E', 'S0201_027E', 'S0201_028E', 'S0201_030E', 'S0201_031E', 'S0201_033E', 'S0201_034E', 'S0201_036E', 'S0201_037E', 'S0201_039E', 'S0201_040E', 'S0201_041E', 'S0201_042E', 'S0201_043E', 'S0201_046E', 'S0201_048E', 'S0201_050E', 'S0201_053E', 'S0201_054E', 'S0201_056E', 'S0201_057E', 'S0201_058E', 'S0201_059E', 'S0201_061E', 'S0201_062E', 'S0201_063E', 'S0201_064E', 'S0201_065E', 'S0201_067E', 'S0201_068E', 'S0201_069E', 'S0201_070E', 'S0201_071E', 'S0201_073E', 'S0201_074E', 'S0201_075E', 'S0201_076E', 'S0201_077E', 'S0201_079E', 'S0201_080E', 'S0201_081E', 'S0201_082E', 'S0201_083E', 'S0201_085E', 'S0201_086E', 'S0201_088E', 'S0201_089E', 'S0201_091E', 'S0201_092E', 'S0201_093E', 'S0201_094E', 'S0201_095E', 'S02

### All variables in the first group as a list

In [35]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['S0201_001E', 'S0201_002E', 'S0201_003E', 'S0201_004E', 'S0201_005E', 'S0201_006E', 'S0201_007E', 'S0201_008E', 'S0201_009E', 'S0201_010E', 'S0201_011E', 'S0201_012E', 'S0201_013E', 'S0201_014E', 'S0201_015E', 'S0201_016E', 'S0201_017E', 'S0201_018E', 'S0201_019E', 'S0201_020E', 'S0201_021E', 'S0201_022E', 'S0201_023E', 'S0201_024E', 'S0201_025E', 'S0201_026E', 'S0201_027E', 'S0201_028E', 'S0201_029E', 'S0201_030E', 'S0201_031E', 'S0201_032E', 'S0201_033E', 'S0201_034E', 'S0201_035E', 'S0201_036E', 'S0201_037E', 'S0201_038E', 'S0201_039E', 'S0201_040E', 'S0201_041E', 'S0201_042E', 'S0201_043E', 'S0201_044E', 'S0201_045E', 'S0201_046E', 'S0201_047E', 'S0201_048E', 'S0201_049E', 'S0201_050E', 'S0201_051E', 'S0201_052E', 'S0201_053E', 'S0201_054E', 'S0201_055E', 'S0201_056E', 'S0201_057E', 'S0201_058E', 'S0201_059E', 'S0201_060E', 'S0201_061E', 'S0201_062E', 'S0201_063E', 'S0201_064E', 'S0201_065E', 'S0201_066E', 'S0201_067E', 'S0201_068E', 'S0201_069E', 'S0201_070E', 'S0201_071E', 'S02

### Dowload data for the leaf variables

In [36]:
df_data = ced.download(
    DATASET,
    YEAR,
    leaves,
    state="*"
)

In [37]:
df_data.head()

Unnamed: 0,STATE,S0201_002E,S0201_003E,S0201_004E,S0201_005E,S0201_007E,S0201_008E,S0201_009E,S0201_010E,S0201_011E,...,S0201_295E,S0201_296E,S0201_298E,S0201_299E,S0201_300E,S0201_302E,S0201_303E,S0201_305E,S0201_307E,S0201_308E
0,28,98.5,1.4,0.1,0.0,48.2,51.8,6.0,17.4,10.0,...,74.2,25.8,128200,1149,352,51.8,48.2,777,87.6,76.8
1,29,97.2,2.6,0.2,0.0,49.0,51.0,6.0,16.4,9.1,...,79.6,20.4,168000,1271,446,55.7,44.3,834,91.8,84.8
2,30,96.6,3.0,0.3,0.1,50.3,49.7,5.5,15.7,9.4,...,71.0,29.0,253600,1466,450,55.2,44.8,831,92.4,85.0
3,31,97.2,2.5,0.2,0.0,50.0,50.0,6.7,17.9,9.7,...,80.4,19.6,172700,1427,535,58.8,41.2,859,92.7,87.0
4,32,95.3,4.3,0.4,0.0,50.2,49.8,6.0,16.4,8.2,...,71.5,28.5,317800,1589,423,48.9,51.1,1168,94.2,85.6
