# Summary File 1 (SF1) Data

See https://api.census.gov/data/2010/dec/sf1.html.

In [1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(
        os.path.abspath(os.path.join(os.path.curdir, os.path.pardir, os.path.pardir))
    )
)

In [2]:
import censusdis.data as ced

## Summary File 1

In [3]:
DATASET = "dec/sf1"
YEAR = 2010

### What groups are in the dataset?

In [4]:
df_groups = ced.variables.all_groups(DATASET, YEAR)
df_groups.head()

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,dec/sf1,2010,H1,HOUSING UNITS
1,dec/sf1,2010,H10,TOTAL POPULATION IN OCCUPIED HOUSING UNITS
2,dec/sf1,2010,H11,TOTAL POPULATION IN OCCUPIED HOUSING UNITS BY ...
3,dec/sf1,2010,H11A,TOTAL POPULATION IN OCCUPIED HOUSING UNITS BY ...
4,dec/sf1,2010,H11B,TOTAL POPULATION IN OCCUPIED HOUSING UNITS BY ...


In [5]:
df_groups[df_groups["GROUP"].str.startswith("PCT")]

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
249,dec/sf1,2010,PCT1,AMERICAN INDIAN AND ALASKA NATIVE ALONE WITH O...
250,dec/sf1,2010,PCT10,NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALO...
251,dec/sf1,2010,PCT11,HISPANIC OR LATINO BY SPECIFIC ORIGIN
252,dec/sf1,2010,PCT12,SEX BY AGE
253,dec/sf1,2010,PCT12A,SEX BY AGE (WHITE ALONE)
...,...,...,...,...
328,dec/sf1,2010,PCT5,ASIAN ALONE WITH ONE ASIAN CATEGORY FOR SELECT...
329,dec/sf1,2010,PCT6,ASIAN ALONE WITH ONE OR MORE ASIAN CATEGORIES ...
330,dec/sf1,2010,PCT7,ASIAN ALONE OR IN COMBINATION WITH ONE OR MORE...
331,dec/sf1,2010,PCT8,NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALO...


### What variables are in the PCT12 group (as a tree)?

In [6]:
group = "PCT12"
ced.variables.group_tree(DATASET, YEAR, group)

+ Errata of Total (PCT012001ERR)
+ Total (PCT012001)
    + Male (PCT012002)
        + Under 1 year (PCT012003)
        + 1 year (PCT012004)
        + 2 years (PCT012005)
        + 3 years (PCT012006)
        + 4 years (PCT012007)
        + 5 years (PCT012008)
        + 6 years (PCT012009)
        + 7 years (PCT012010)
        + 8 years (PCT012011)
        + 9 years (PCT012012)
        + 10 years (PCT012013)
        + 11 years (PCT012014)
        + 12 years (PCT012015)
        + 13 years (PCT012016)
        + 14 years (PCT012017)
        + 15 years (PCT012018)
        + 16 years (PCT012019)
        + 17 years (PCT012020)
        + 18 years (PCT012021)
        + 19 years (PCT012022)
        + 20 years (PCT012023)
        + 21 years (PCT012024)
        + 22 years (PCT012025)
        + 23 years (PCT012026)
        + 24 years (PCT012027)
        + 25 years (PCT012028)
        + 26 years (PCT012029)
        + 27 years (PCT012030)
        + 28 years (PCT012031)
        + 29 years (PCT012032)


### What variables in the group are leaves of the tree?

In [7]:
leaves = ced.variables.group_leaves(DATASET, YEAR, group)

str(leaves)

"['PCT012001ERR', 'PCT012003', 'PCT012004', 'PCT012005', 'PCT012006', 'PCT012007', 'PCT012008', 'PCT012009', 'PCT012010', 'PCT012011', 'PCT012012', 'PCT012013', 'PCT012014', 'PCT012015', 'PCT012016', 'PCT012017', 'PCT012018', 'PCT012019', 'PCT012020', 'PCT012021', 'PCT012022', 'PCT012023', 'PCT012024', 'PCT012025', 'PCT012026', 'PCT012027', 'PCT012028', 'PCT012029', 'PCT012030', 'PCT012031', 'PCT012032', 'PCT012033', 'PCT012034', 'PCT012035', 'PCT012036', 'PCT012037', 'PCT012038', 'PCT012039', 'PCT012040', 'PCT012041', 'PCT012042', 'PCT012043', 'PCT012044', 'PCT012045', 'PCT012046', 'PCT012047', 'PCT012048', 'PCT012049', 'PCT012050', 'PCT012051', 'PCT012052', 'PCT012053', 'PCT012054', 'PCT012055', 'PCT012056', 'PCT012057', 'PCT012058', 'PCT012059', 'PCT012060', 'PCT012061', 'PCT012062', 'PCT012063', 'PCT012064', 'PCT012065', 'PCT012066', 'PCT012067', 'PCT012068', 'PCT012069', 'PCT012070', 'PCT012071', 'PCT012072', 'PCT012073', 'PCT012074', 'PCT012075', 'PCT012076', 'PCT012077', 'PCT012

### All variables in the group as a list

In [8]:
variables = ced.variables.group_variables(DATASET, YEAR, group)

str(variables)

"['PCT012001', 'PCT012001ERR', 'PCT012002', 'PCT012003', 'PCT012004', 'PCT012005', 'PCT012006', 'PCT012007', 'PCT012008', 'PCT012009', 'PCT012010', 'PCT012011', 'PCT012012', 'PCT012013', 'PCT012014', 'PCT012015', 'PCT012016', 'PCT012017', 'PCT012018', 'PCT012019', 'PCT012020', 'PCT012021', 'PCT012022', 'PCT012023', 'PCT012024', 'PCT012025', 'PCT012026', 'PCT012027', 'PCT012028', 'PCT012029', 'PCT012030', 'PCT012031', 'PCT012032', 'PCT012033', 'PCT012034', 'PCT012035', 'PCT012036', 'PCT012037', 'PCT012038', 'PCT012039', 'PCT012040', 'PCT012041', 'PCT012042', 'PCT012043', 'PCT012044', 'PCT012045', 'PCT012046', 'PCT012047', 'PCT012048', 'PCT012049', 'PCT012050', 'PCT012051', 'PCT012052', 'PCT012053', 'PCT012054', 'PCT012055', 'PCT012056', 'PCT012057', 'PCT012058', 'PCT012059', 'PCT012060', 'PCT012061', 'PCT012062', 'PCT012063', 'PCT012064', 'PCT012065', 'PCT012066', 'PCT012067', 'PCT012068', 'PCT012069', 'PCT012070', 'PCT012071', 'PCT012072', 'PCT012073', 'PCT012074', 'PCT012075', 'PCT012

### Dowload data for the leaf variables

In [9]:
df_data = ced.download(DATASET, YEAR, leaves, state="*")

In [10]:
df_data.head()

Unnamed: 0,STATE,PCT012001ERR,PCT012003,PCT012004,PCT012005,PCT012006,PCT012007,PCT012008,PCT012009,PCT012010,...,PCT012200,PCT012201,PCT012202,PCT012203,PCT012204,PCT012205,PCT012206,PCT012207,PCT012208,PCT012209
0,1,T1_POP$0400000US01,30465,30587,31607,31725,30881,30898,31001,30828,...,2061,1744,1344,997,715,464,384,583,53,3
1,2,T1_POP$0400000US02,5590,5687,5665,5486,5468,5158,5248,5178,...,110,97,77,53,41,28,14,28,0,0
2,4,,44652,45904,47366,47916,46724,46355,46422,46063,...,2688,2089,1500,1209,845,545,379,611,35,3
3,5,T1_POP$0400000US05,19735,19650,20467,20975,20123,20246,19971,20132,...,1481,1202,934,774,573,323,264,445,39,3
4,6,,252121,254358,264336,264157,259084,258171,255433,253309,...,16200,13016,10189,7823,5747,4009,2748,4309,378,23
