# American Community Survey (ACS) 3-Year Data

See https://www.census.gov/data/developers/data-sets/acs-3year.html.

In [1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(
        os.path.abspath(os.path.join(os.path.curdir, os.path.pardir, os.path.pardir))
    )
)

In [2]:
import censusdis.data as ced

## ACS 3-Year Detailed Tables

In [3]:
DATASET = "acs/acs3"
YEAR = 2013

### What groups are in the dataset?

In [4]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,index,dataset,year,group,description
0,1061,acs/acs3,2013,B00001,UNWEIGHTED SAMPLE COUNT OF THE POPULATION
1,1057,acs/acs3,2013,B00002,UNWEIGHTED SAMPLE HOUSING UNITS
2,572,acs/acs3,2013,B01001,SEX BY AGE
3,147,acs/acs3,2013,B01001A,SEX BY AGE (WHITE ALONE)
4,128,acs/acs3,2013,B01001B,SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)


### What variables are in the first group (as a tree)?

In [5]:
group = groups.iloc[0]["group"]

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + Total (B00001_001E)

### What variables in the first group are leaves of the tree?

In [6]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['B00001_001E']"

### All variables in the first group as a list

In [7]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['B00001_001E']"

### Dowload data for the leaf variables

In [8]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

In [9]:
df_data.head()

Unnamed: 0,STATE,B00001_001E
0,1,246544
1,2,61127
2,4,276065
3,5,148981
4,6,1681054


## ACS 3-Year Subject Tables

In [37]:
DATASET = "acs/acs3/subject"
YEAR = 2013

### What groups are in the dataset?

In [38]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,index,dataset,year,group,description
0,29,acs/acs3/subject,2013,S0101,AGE AND SEX
1,32,acs/acs3/subject,2013,S0102,POPULATION 60 YEARS AND OVER IN THE UNITED STA...
2,37,acs/acs3/subject,2013,S0102PR,POPULATION 60 YEARS AND OVER IN PUERTO RICO
3,34,acs/acs3/subject,2013,S0103,POPULATION 65 YEARS AND OVER IN THE UNITED STA...
4,0,acs/acs3/subject,2013,S0103PR,POPULATION 65 YEARS AND OVER IN PUERTO RICO


### What variables are in the first group (as a tree)?

In [39]:
group = groups.iloc[0]["group"]

ced.variables.group_tree(DATASET, YEAR, group)

+ Total
    + Estimate
        + Total population (S0101_C01_001E)
        + AGE
            + Under 5 years (S0101_C01_002E)
            + 5 to 9 years (S0101_C01_003E)
            + 10 to 14 years (S0101_C01_004E)
            + 15 to 19 years (S0101_C01_005E)
            + 20 to 24 years (S0101_C01_006E)
            + 25 to 29 years (S0101_C01_007E)
            + 30 to 34 years (S0101_C01_008E)
            + 35 to 39 years (S0101_C01_009E)
            + 40 to 44 years (S0101_C01_010E)
            + 45 to 49 years (S0101_C01_011E)
            + 50 to 54 years (S0101_C01_012E)
            + 55 to 59 years (S0101_C01_013E)
            + 60 to 64 years (S0101_C01_014E)
            + 65 to 69 years (S0101_C01_015E)
            + 70 to 74 years (S0101_C01_016E)
            + 75 to 79 years (S0101_C01_017E)
            + 80 to 84 years (S0101_C01_018E)
            + 85 years and over (S0101_C01_019E)
        + SELECTED AGE CATEGORIES
            + 5 to 14 years (S0101_C01_020E)
            

### What variables in the first group are leaves of the tree?

In [40]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['S0101_C01_001E', 'S0101_C01_001M', 'S0101_C01_002E', 'S0101_C01_002M', 'S0101_C01_003E', 'S0101_C01_003M', 'S0101_C01_004E', 'S0101_C01_004M', 'S0101_C01_005E', 'S0101_C01_005M', 'S0101_C01_006E', 'S0101_C01_006M', 'S0101_C01_007E', 'S0101_C01_007M', 'S0101_C01_008E', 'S0101_C01_008M', 'S0101_C01_009E', 'S0101_C01_009M', 'S0101_C01_010E', 'S0101_C01_010M', 'S0101_C01_011E', 'S0101_C01_011M', 'S0101_C01_012E', 'S0101_C01_012M', 'S0101_C01_013E', 'S0101_C01_013M', 'S0101_C01_014E', 'S0101_C01_014M', 'S0101_C01_015E', 'S0101_C01_015M', 'S0101_C01_016E', 'S0101_C01_016M', 'S0101_C01_017E', 'S0101_C01_017M', 'S0101_C01_018E', 'S0101_C01_018M', 'S0101_C01_019E', 'S0101_C01_019M', 'S0101_C01_020E', 'S0101_C01_020M', 'S0101_C01_021E', 'S0101_C01_021M', 'S0101_C01_022E', 'S0101_C01_022M', 'S0101_C01_023E', 'S0101_C01_023M', 'S0101_C01_024E', 'S0101_C01_024M', 'S0101_C01_025E', 'S0101_C01_025M', 'S0101_C01_026E', 'S0101_C01_026M', 'S0101_C01_027E', 'S0101_C01_027M', 'S0101_C01_028E', 'S0101_C

### All variables in the first group as a list

In [41]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['S0101_C01_001E', 'S0101_C01_001M', 'S0101_C01_002E', 'S0101_C01_002M', 'S0101_C01_003E', 'S0101_C01_003M', 'S0101_C01_004E', 'S0101_C01_004M', 'S0101_C01_005E', 'S0101_C01_005M', 'S0101_C01_006E', 'S0101_C01_006M', 'S0101_C01_007E', 'S0101_C01_007M', 'S0101_C01_008E', 'S0101_C01_008M', 'S0101_C01_009E', 'S0101_C01_009M', 'S0101_C01_010E', 'S0101_C01_010M', 'S0101_C01_011E', 'S0101_C01_011M', 'S0101_C01_012E', 'S0101_C01_012M', 'S0101_C01_013E', 'S0101_C01_013M', 'S0101_C01_014E', 'S0101_C01_014M', 'S0101_C01_015E', 'S0101_C01_015M', 'S0101_C01_016E', 'S0101_C01_016M', 'S0101_C01_017E', 'S0101_C01_017M', 'S0101_C01_018E', 'S0101_C01_018M', 'S0101_C01_019E', 'S0101_C01_019M', 'S0101_C01_020E', 'S0101_C01_020M', 'S0101_C01_021E', 'S0101_C01_021M', 'S0101_C01_022E', 'S0101_C01_022M', 'S0101_C01_023E', 'S0101_C01_023M', 'S0101_C01_024E', 'S0101_C01_024M', 'S0101_C01_025E', 'S0101_C01_025M', 'S0101_C01_026E', 'S0101_C01_026M', 'S0101_C01_027E', 'S0101_C01_027M', 'S0101_C01_028E', 'S0101_C

### Dowload data for the leaf variables

In [43]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

CensusApiException: Census API request to https://api.census.gov/data/2013/acs/acs3/subject?get=S0101_C01_001E%2CS0101_C01_001M%2CS0101_C01_002E%2CS0101_C01_002M%2CS0101_C01_003E%2CS0101_C01_003M%2CS0101_C01_004E%2CS0101_C01_004M%2CS0101_C01_005E%2CS0101_C01_005M%2CS0101_C01_006E%2CS0101_C01_006M%2CS0101_C01_007E%2CS0101_C01_007M%2CS0101_C01_008E%2CS0101_C01_008M%2CS0101_C01_009E%2CS0101_C01_009M%2CS0101_C01_010E%2CS0101_C01_010M%2CS0101_C01_011E%2CS0101_C01_011M%2CS0101_C01_012E%2CS0101_C01_012M%2CS0101_C01_013E%2CS0101_C01_013M%2CS0101_C01_014E%2CS0101_C01_014M%2CS0101_C01_015E%2CS0101_C01_015M%2CS0101_C01_016E%2CS0101_C01_016M%2CS0101_C01_017E%2CS0101_C01_017M%2CS0101_C01_018E%2CS0101_C01_018M%2CS0101_C01_019E%2CS0101_C01_019M%2CS0101_C01_020E%2CS0101_C01_020M%2CS0101_C01_021E%2CS0101_C01_021M%2CS0101_C01_022E%2CS0101_C01_022M%2CS0101_C01_023E%2CS0101_C01_023M%2CS0101_C01_024E%2CS0101_C01_024M%2CS0101_C01_025E%2CS0101_C01_025M&for=state failed with status 500. There was an error while running your query.  We've logged the error and we'll correct it ASAP.  Sorry for the inconvenience.

In [None]:
df_data.head()

## ACS 3-Year Data Profiles

In [16]:
DATASET = "acs/acs3/profile"
YEAR = 2013

### What groups are in the dataset?

In [17]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,index,dataset,year,group,description
0,3,acs/acs3/profile,2013,DP02,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...
1,2,acs/acs3/profile,2013,DP02PR,SELECTED SOCIAL CHARACTERISTICS IN PUERTO RICO
2,4,acs/acs3/profile,2013,DP03,SELECTED ECONOMIC CHARACTERISTICS
3,0,acs/acs3/profile,2013,DP04,SELECTED HOUSING CHARACTERISTICS
4,1,acs/acs3/profile,2013,DP05,ACS DEMOGRAPHIC AND HOUSING ESTIMATES


### What variables are in the first group (as a tree)?

In [18]:
group = groups.iloc[0]["group"]

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + HOUSEHOLDS BY TYPE
        + Total households (DP02_0001E)
            + Family households (families) (DP02_0002E)
                + With own children under 18 years (DP02_0003E)
                + Married-couple family (DP02_0004E)
                    + With own children under 18 years (DP02_0005E)
                + Male householder, no wife present, family (DP02_0006E)
                    + With own children under 18 years (DP02_0007E)
                + Female householder, no husband present, family (DP02_0008E)
                    + With own children under 18 years (DP02_0009E)
            + Nonfamily households (DP02_0010E)
                + Householder living alone (DP02_0011E)
                    + 65 years and over (DP02_0012E)
        + Households with one or more people under 18 years (DP02_0013E)
        + Households with one or more people 65 years and over (DP02_0014E)
        + Average household size (DP02_0015E)
        + Average family size (DP02_0016E)
 

### What variables in the first group are leaves of the tree?

In [19]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['DP02_0003E', 'DP02_0003PE', 'DP02_0003PM', 'DP02_0005E', 'DP02_0005PE', 'DP02_0005PM', 'DP02_0007E', 'DP02_0007PE', 'DP02_0007PM', 'DP02_0009E', 'DP02_0009PE', 'DP02_0009PM', 'DP02_0012E', 'DP02_0012PE', 'DP02_0012PM', 'DP02_0013E', 'DP02_0013PE', 'DP02_0013PM', 'DP02_0014E', 'DP02_0014PE', 'DP02_0014PM', 'DP02_0015E', 'DP02_0015PE', 'DP02_0015PM', 'DP02_0016E', 'DP02_0016PE', 'DP02_0016PM', 'DP02_0018E', 'DP02_0018PE', 'DP02_0018PM', 'DP02_0019E', 'DP02_0019PE', 'DP02_0019PM', 'DP02_0020E', 'DP02_0020PE', 'DP02_0020PM', 'DP02_0021E', 'DP02_0021PE', 'DP02_0021PM', 'DP02_0023E', 'DP02_0023PE', 'DP02_0023PM', 'DP02_0025E', 'DP02_0025PE', 'DP02_0025PM', 'DP02_0026E', 'DP02_0026PE', 'DP02_0026PM', 'DP02_0027E', 'DP02_0027PE', 'DP02_0027PM', 'DP02_0028E', 'DP02_0028PE', 'DP02_0028PM', 'DP02_0029E', 'DP02_0029PE', 'DP02_0029PM', 'DP02_0031E', 'DP02_0031PE', 'DP02_0031PM', 'DP02_0032E', 'DP02_0032PE', 'DP02_0032PM', 'DP02_0033E', 'DP02_0033PE', 'DP02_0033PM', 'DP02_0034E', 'DP02_0034PE', '

### All variables in the first group as a list

In [20]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['DP02_0001E', 'DP02_0001PE', 'DP02_0001PM', 'DP02_0002E', 'DP02_0002PE', 'DP02_0002PM', 'DP02_0003E', 'DP02_0003PE', 'DP02_0003PM', 'DP02_0004E', 'DP02_0004PE', 'DP02_0004PM', 'DP02_0005E', 'DP02_0005PE', 'DP02_0005PM', 'DP02_0006E', 'DP02_0006PE', 'DP02_0006PM', 'DP02_0007E', 'DP02_0007PE', 'DP02_0007PM', 'DP02_0008E', 'DP02_0008PE', 'DP02_0008PM', 'DP02_0009E', 'DP02_0009PE', 'DP02_0009PM', 'DP02_0010E', 'DP02_0010PE', 'DP02_0010PM', 'DP02_0011E', 'DP02_0011PE', 'DP02_0011PM', 'DP02_0012E', 'DP02_0012PE', 'DP02_0012PM', 'DP02_0013E', 'DP02_0013PE', 'DP02_0013PM', 'DP02_0014E', 'DP02_0014PE', 'DP02_0014PM', 'DP02_0015E', 'DP02_0015PE', 'DP02_0015PM', 'DP02_0016E', 'DP02_0016PE', 'DP02_0016PM', 'DP02_0017E', 'DP02_0017PE', 'DP02_0017PM', 'DP02_0018E', 'DP02_0018PE', 'DP02_0018PM', 'DP02_0019E', 'DP02_0019PE', 'DP02_0019PM', 'DP02_0020E', 'DP02_0020PE', 'DP02_0020PM', 'DP02_0021E', 'DP02_0021PE', 'DP02_0021PM', 'DP02_0022E', 'DP02_0022PE', 'DP02_0022PM', 'DP02_0023E', 'DP02_0023PE', '

### Dowload data for the leaf variables

In [21]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

In [22]:
df_data.head()

Unnamed: 0,STATE,DP02_0003E,DP02_0003PE,DP02_0003PM,DP02_0005E,DP02_0005PE,DP02_0005PM,DP02_0007E,DP02_0007PE,DP02_0007PM,...,DP02_0148PM,DP02_0149E,DP02_0149PE,DP02_0149PM,DP02_0151E,DP02_0151PE,DP02_0151PM,DP02_0152E,DP02_0152PE,DP02_0152PM
0,1,506026.0,27.5,0.3,321333.0,17.5,0.2,33740.0,1.8,0.1,...,0.1,7800.0,0.2,0.1,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0
1,2,81428.0,32.5,0.8,54988.0,21.9,0.6,8487.0,3.4,0.3,...,0.1,1225.0,0.2,0.1,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0
2,4,675647.0,28.4,0.2,431351.0,18.1,0.2,67964.0,2.9,0.1,...,0.1,11568.0,0.2,0.1,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0
3,5,316030.0,28.0,0.3,205212.0,18.2,0.3,25603.0,2.3,0.1,...,0.1,3064.0,0.1,0.1,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0
4,6,4043554.0,32.1,0.1,2788662.0,22.2,0.1,347452.0,2.8,0.1,...,0.1,79587.0,0.2,0.1,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0,-888888888.0


## ACS 3-Year Comparison Profiles

In [23]:
DATASET = "acs/acs3/cprofile"
YEAR = 2013

### What groups are in the dataset?

In [24]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,index,dataset,year,group,description
0,0,acs/acs3/cprofile,2013,CP02,COMPARATIVE SOCIAL CHARACTERISTICS IN THE UNIT...
1,1,acs/acs3/cprofile,2013,CP02PR,COMPARATIVE SOCIAL CHARACTERISTICS IN PUERTO RICO
2,4,acs/acs3/cprofile,2013,CP03,COMPARATIVE ECONOMIC CHARACTERISTICS
3,3,acs/acs3/cprofile,2013,CP04,COMPARATIVE HOUSING CHARACTERISTICS
4,2,acs/acs3/cprofile,2013,CP05,COMPARATIVE DEMOGRAPHIC ESTIMATES


### What variables are in the first group (as a tree)?

In [25]:
group = groups.iloc[0]["group"]

ced.variables.group_tree(DATASET, YEAR, group)

+ 2008-2010 Estimate
    + HOUSEHOLDS BY TYPE
        + Total households (CP02_2008_2010_001E)
            + Family households (families) (CP02_2008_2010_002E)
                + With own children under 18 years (CP02_2008_2010_003E)
                + Married-couple family (CP02_2008_2010_004E)
                    + With own children under 18 years (CP02_2008_2010_005E)
                + Male householder, no wife present, family (CP02_2008_2010_006E)
                    + With own children under 18 years (CP02_2008_2010_007E)
                + Female householder, no husband present, family (CP02_2008_2010_008E)
                    + With own children under 18 years (CP02_2008_2010_009E)
            + Nonfamily households (CP02_2008_2010_010E)
                + Householder living alone (CP02_2008_2010_011E)
                    + 65 years and over (CP02_2008_2010_012E)
        + Households with one or more people under 18 years (CP02_2008_2010_013E)
        + Households with one or more p

### What variables in the first group are leaves of the tree?

In [26]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['CP02_2008_2010_003E', 'CP02_2008_2010_005E', 'CP02_2008_2010_007E', 'CP02_2008_2010_009E', 'CP02_2008_2010_012E', 'CP02_2008_2010_013E', 'CP02_2008_2010_014E', 'CP02_2008_2010_015E', 'CP02_2008_2010_016E', 'CP02_2008_2010_018E', 'CP02_2008_2010_019E', 'CP02_2008_2010_020E', 'CP02_2008_2010_021E', 'CP02_2008_2010_023E', 'CP02_2008_2010_025E', 'CP02_2008_2010_026E', 'CP02_2008_2010_027E', 'CP02_2008_2010_028E', 'CP02_2008_2010_029E', 'CP02_2008_2010_031E', 'CP02_2008_2010_032E', 'CP02_2008_2010_033E', 'CP02_2008_2010_034E', 'CP02_2008_2010_035E', 'CP02_2008_2010_038E', 'CP02_2008_2010_039E', 'CP02_2008_2010_040E', 'CP02_2008_2010_041E', 'CP02_2008_2010_042E', 'CP02_2008_2010_044E', 'CP02_2008_2010_045E', 'CP02_2008_2010_046E', 'CP02_2008_2010_047E', 'CP02_2008_2010_048E', 'CP02_2008_2010_050E', 'CP02_2008_2010_051E', 'CP02_2008_2010_053E', 'CP02_2008_2010_054E', 'CP02_2008_2010_055E', 'CP02_2008_2010_056E', 'CP02_2008_2010_057E', 'CP02_2008_2010_059E', 'CP02_2008_2010_060E', 'CP02_200

### All variables in the first group as a list

In [27]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['CP02_001SS', 'CP02_002SS', 'CP02_003SS', 'CP02_004SS', 'CP02_005SS', 'CP02_006SS', 'CP02_007SS', 'CP02_008SS', 'CP02_009SS', 'CP02_010SS', 'CP02_011SS', 'CP02_012SS', 'CP02_013SS', 'CP02_014SS', 'CP02_015SS', 'CP02_016SS', 'CP02_017SS', 'CP02_018SS', 'CP02_019SS', 'CP02_020SS', 'CP02_021SS', 'CP02_022SS', 'CP02_023SS', 'CP02_024SS', 'CP02_025SS', 'CP02_026SS', 'CP02_027SS', 'CP02_028SS', 'CP02_029SS', 'CP02_030SS', 'CP02_031SS', 'CP02_032SS', 'CP02_033SS', 'CP02_034SS', 'CP02_035SS', 'CP02_036SS', 'CP02_037SS', 'CP02_038SS', 'CP02_039SS', 'CP02_040SS', 'CP02_041SS', 'CP02_042SS', 'CP02_043SS', 'CP02_044SS', 'CP02_045SS', 'CP02_046SS', 'CP02_047SS', 'CP02_048SS', 'CP02_049SS', 'CP02_050SS', 'CP02_051SS', 'CP02_052SS', 'CP02_053SS', 'CP02_054SS', 'CP02_055SS', 'CP02_056SS', 'CP02_057SS', 'CP02_058SS', 'CP02_059SS', 'CP02_060SS', 'CP02_061SS', 'CP02_062SS', 'CP02_063SS', 'CP02_064SS', 'CP02_065SS', 'CP02_066SS', 'CP02_067SS', 'CP02_068SS', 'CP02_069SS', 'CP02_070SS', 'CP02_071SS', 'CP0

### Dowload data for the leaf variables

In [28]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

In [29]:
df_data.head()

Unnamed: 0,STATE,CP02_2008_2010_003E,CP02_2008_2010_005E,CP02_2008_2010_007E,CP02_2008_2010_009E,CP02_2008_2010_012E,CP02_2008_2010_013E,CP02_2008_2010_014E,CP02_2008_2010_015E,CP02_2008_2010_016E,...,CP02_2011_2013_142E,CP02_2011_2013_143E,CP02_2011_2013_144E,CP02_2011_2013_145E,CP02_2011_2013_146E,CP02_2011_2013_147E,CP02_2011_2013_148E,CP02_2011_2013_149E,CP02_2011_2013_151E,CP02_2011_2013_152E
0,1,28.7,18.6,1.9,8.3,10.0,32.8,25.4,2.54,3.13,...,1.7,0.0,0.8,0.4,0.1,0.0,0.4,0.2,-888888888.0,-888888888.0
1,2,34.2,23.4,3.5,7.3,5.2,37.6,15.3,2.68,3.21,...,2.7,0.1,0.4,2.4,0.5,0.4,0.9,0.2,-888888888.0,-888888888.0
2,4,29.6,19.5,2.7,7.3,8.9,33.2,25.6,2.66,3.26,...,1.9,0.1,0.5,1.6,0.3,0.2,0.7,0.2,-888888888.0,-888888888.0
3,5,28.9,18.6,2.1,8.2,10.1,32.9,25.9,2.52,3.06,...,1.7,0.0,0.3,0.5,0.2,0.1,0.4,0.1,-888888888.0,-888888888.0
4,6,33.4,23.4,2.7,7.2,8.2,37.3,23.7,2.92,3.51,...,1.3,0.1,0.7,1.1,0.3,0.3,0.4,0.2,-888888888.0,-888888888.0


## ACS 3-Year Selected Population Profiles

In [30]:
DATASET = "acs/acs3/spp"
YEAR = 2013

### What groups are in the dataset?

In [31]:
groups = ced.variables.all_groups(DATASET, YEAR)
groups.head()

Unnamed: 0,index,dataset,year,group,description
0,1,acs/acs3/spp,2013,S0201,SELECTED POPULATION PROFILE IN THE UNITED STATES
1,0,acs/acs3/spp,2013,S0201PR,SELECTED POPULATION PROFILE IN PUERTO RICO


### What variables are in the first group (as a tree)?

In [32]:
group = groups.iloc[0]["group"]

ced.variables.group_tree(DATASET, YEAR, group)

+ Estimate
    + TOTAL NUMBER OF RACES REPORTED
        + Total population (S0201_001E)
            + One race (S0201_002E)
            + Two races (S0201_003E)
            + Three races (S0201_004E)
            + Four or more races (S0201_005E)
    + SEX AND AGE
        + Total population (S0201_006E)
            + Male (S0201_007E)
            + Female (S0201_008E)
        + Under 5 years (S0201_009E)
        + 5 to 17 years (S0201_010E)
        + 18 to 24 years (S0201_011E)
        + 25 to 34 years (S0201_012E)
        + 35 to 44 years (S0201_013E)
        + 45 to 54 years (S0201_014E)
        + 55 to 64 years (S0201_015E)
        + 65 to 74 years (S0201_016E)
        + 75 years and over (S0201_017E)
        + Median age (years) (S0201_018E)
        + 21 years and over (S0201_020E)
        + 62 years and over (S0201_021E)
        + Under 18 years (S0201_023E)
            + Male (S0201_024E)
            + Female (S0201_025E)
        + 18 years and over (S0201_026E)
            + Male

### What variables in the first group are leaves of the tree?

In [33]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['S0201_002E', 'S0201_003E', 'S0201_004E', 'S0201_005E', 'S0201_007E', 'S0201_008E', 'S0201_009E', 'S0201_010E', 'S0201_011E', 'S0201_012E', 'S0201_013E', 'S0201_014E', 'S0201_015E', 'S0201_016E', 'S0201_017E', 'S0201_018E', 'S0201_020E', 'S0201_021E', 'S0201_024E', 'S0201_025E', 'S0201_027E', 'S0201_028E', 'S0201_030E', 'S0201_031E', 'S0201_033E', 'S0201_034E', 'S0201_036E', 'S0201_037E', 'S0201_039E', 'S0201_040E', 'S0201_041E', 'S0201_043E', 'S0201_046E', 'S0201_048E', 'S0201_050E', 'S0201_053E', 'S0201_054E', 'S0201_056E', 'S0201_057E', 'S0201_058E', 'S0201_059E', 'S0201_061E', 'S0201_062E', 'S0201_063E', 'S0201_064E', 'S0201_065E', 'S0201_067E', 'S0201_068E', 'S0201_069E', 'S0201_070E', 'S0201_071E', 'S0201_073E', 'S0201_074E', 'S0201_075E', 'S0201_076E', 'S0201_077E', 'S0201_079E', 'S0201_080E', 'S0201_081E', 'S0201_082E', 'S0201_083E', 'S0201_085E', 'S0201_086E', 'S0201_088E', 'S0201_089E', 'S0201_091E', 'S0201_092E', 'S0201_093E', 'S0201_094E', 'S0201_095E', 'S0201_097E', 'S02

### All variables in the first group as a list

In [34]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['S0201_001E', 'S0201_002E', 'S0201_003E', 'S0201_004E', 'S0201_005E', 'S0201_006E', 'S0201_007E', 'S0201_008E', 'S0201_009E', 'S0201_010E', 'S0201_011E', 'S0201_012E', 'S0201_013E', 'S0201_014E', 'S0201_015E', 'S0201_016E', 'S0201_017E', 'S0201_018E', 'S0201_019E', 'S0201_020E', 'S0201_021E', 'S0201_022E', 'S0201_023E', 'S0201_024E', 'S0201_025E', 'S0201_026E', 'S0201_027E', 'S0201_028E', 'S0201_029E', 'S0201_030E', 'S0201_031E', 'S0201_032E', 'S0201_033E', 'S0201_034E', 'S0201_035E', 'S0201_036E', 'S0201_037E', 'S0201_038E', 'S0201_039E', 'S0201_040E', 'S0201_041E', 'S0201_042E', 'S0201_043E', 'S0201_044E', 'S0201_045E', 'S0201_046E', 'S0201_047E', 'S0201_048E', 'S0201_049E', 'S0201_050E', 'S0201_051E', 'S0201_052E', 'S0201_053E', 'S0201_054E', 'S0201_055E', 'S0201_056E', 'S0201_057E', 'S0201_058E', 'S0201_059E', 'S0201_060E', 'S0201_061E', 'S0201_062E', 'S0201_063E', 'S0201_064E', 'S0201_065E', 'S0201_066E', 'S0201_067E', 'S0201_068E', 'S0201_069E', 'S0201_070E', 'S0201_071E', 'S02

### Dowload data for the leaf variables

In [35]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

In [36]:
df_data.head()

Unnamed: 0,STATE,S0201_002E,S0201_003E,S0201_004E,S0201_005E,S0201_007E,S0201_008E,S0201_009E,S0201_010E,S0201_011E,...,S0201_294E,S0201_295E,S0201_297E,S0201_298E,S0201_299E,S0201_301E,S0201_302E,S0201_304E,S0201_306E,S0201_307E
0,1,98.4,1.5,0.1,0.0,48.4,51.6,6.2,17.0,10.1,...,71.2,28.8,122800.0,1129.0,340.0,48.9,51.1,701.0,-888888888.0,-888888888.0
1,2,92.0,7.5,0.5,0.1,52.2,47.8,7.5,18.3,11.0,...,70.0,30.0,245300.0,1817.0,550.0,55.2,44.8,1113.0,-888888888.0,-888888888.0
2,4,97.0,2.8,0.2,0.0,49.7,50.3,6.7,18.0,10.0,...,65.0,35.0,157100.0,1348.0,375.0,49.1,50.9,889.0,-888888888.0,-888888888.0
3,5,98.0,1.9,0.1,0.0,49.1,50.9,6.6,17.5,9.8,...,74.0,26.0,108000.0,998.0,316.0,50.1,49.9,658.0,-888888888.0,-888888888.0
4,6,95.6,4.0,0.4,0.0,49.7,50.3,6.6,17.6,10.5,...,54.4,45.6,359400.0,2157.0,484.0,42.5,57.5,1216.0,-888888888.0,-888888888.0
