# Getting Started Examples

Example code that is in the getting started doc.

In [1]:
# So we can run from within the censusdis project and find the packages we need.
import os
import sys

sys.path.append(
    os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir)))
)

## States

In [2]:
import censusdis.data as ced

DATASET = "dec/pl"
YEAR = 2020
VARIABLES = ["NAME", "P2_001N"]

df_states = ced.download_detail(
    DATASET,
    YEAR,
    VARIABLES,
    state="*",
)

In [3]:
print(df_states)

   STATE                  NAME   P2_001N
0     42          Pennsylvania  13002700
1     06            California  39538223
2     54         West Virginia   1793716
3     49                  Utah   3271616
4     36              New York  20201249
5     11  District of Columbia    689545
6     02                Alaska    733391
7     12               Florida  21538187
8     45        South Carolina   5118425
9     38          North Dakota    779094
10    23                 Maine   1362359
11    13               Georgia  10711908
12    01               Alabama   5024279
13    33         New Hampshire   1377529
14    41                Oregon   4237256
15    56               Wyoming    576851
16    04               Arizona   7151502
17    22             Louisiana   4657757
18    18               Indiana   6785528
19    16                 Idaho   1839106
20    09           Connecticut   3605944
21    15                Hawaii   1455271
22    17              Illinois  12812508
23    25        

## Counties

In [4]:
import censusdis.data as ced
from censusdis.states import STATE_NJ

DATASET = "dec/pl"
YEAR = 2020
VARIABLES = ["NAME", "P2_001N"]

df_counties = ced.download_detail(
    DATASET,
    YEAR,
    VARIABLES,
    state=STATE_NJ,
    county="*",
)

In [5]:
print(df_counties)

   STATE COUNTY                           NAME  P2_001N
0     34    003      Bergen County, New Jersey   955732
1     34    009    Cape May County, New Jersey    95263
2     34    015  Gloucester County, New Jersey   302294
3     34    021      Mercer County, New Jersey   387340
4     34    027      Morris County, New Jersey   509285
5     34    033       Salem County, New Jersey    64837
6     34    039       Union County, New Jersey   575345
7     34    001    Atlantic County, New Jersey   274534
8     34    005  Burlington County, New Jersey   461860
9     34    007      Camden County, New Jersey   523485
10    34    011  Cumberland County, New Jersey   154152
11    34    013       Essex County, New Jersey   863728
12    34    017      Hudson County, New Jersey   724854
13    34    019   Hunterdon County, New Jersey   128947
14    34    023   Middlesex County, New Jersey   863162
15    34    025    Monmouth County, New Jersey   643615
16    34    029       Ocean County, New Jersey  

In [6]:
df_counties["P2_001N"].sum()

9288994

## Regions

In [7]:
import censusdis.data as ced

DATASET = "dec/pl"
YEAR = 2020
VARIABLES = ["NAME", "P2_001N"]

df_region = ced.download_detail(
    DATASET,
    YEAR,
    VARIABLES,
    region="*",
)

In [8]:
print(df_region)

  REGION              NAME    P2_001N
0      2    Midwest Region   68985454
1      3      South Region  126266107
2      4       West Region   78588572
3      1  Northeast Region   57609148


## Block Groups

In [9]:
import censusdis.data as ced
from censusdis.states import STATE_NJ

COUNTY_ESSEX_NJ = "013"  # See county query above.

DATASET = "dec/pl"
YEAR = 2020
VARIABLES = ["NAME", "P2_001N"]

df_bg = ced.download_detail(
    DATASET,
    YEAR,
    VARIABLES,
    state=STATE_NJ,
    county=COUNTY_ESSEX_NJ,
    block_group="*",
)

In [10]:
print(df_bg[:10].to_string(line_width=200))
print()
print("...")
print()
print(df_bg[-10:].to_string(line_width=200))

  STATE COUNTY   TRACT BLOCK_GROUP                                                      NAME  P2_001N
0    34    013  000100           2   Block Group 2, Census Tract 1, Essex County, New Jersey     2104
1    34    013  000200           2   Block Group 2, Census Tract 2, Essex County, New Jersey     2096
2    34    013  000400           1   Block Group 1, Census Tract 4, Essex County, New Jersey     2514
3    34    013  000600           1   Block Group 1, Census Tract 6, Essex County, New Jersey     1816
4    34    013  000700           2   Block Group 2, Census Tract 7, Essex County, New Jersey     2469
5    34    013  000800           1   Block Group 1, Census Tract 8, Essex County, New Jersey     2388
6    34    013  000900           1   Block Group 1, Census Tract 9, Essex County, New Jersey     1960
7    34    013  001000           1  Block Group 1, Census Tract 10, Essex County, New Jersey     1100
8    34    013  001100           2  Block Group 2, Census Tract 11, Essex County, 

## Query Geographies

In [11]:
import censusdis.geography as cgeo

specs = cgeo.geo_path_snake_specs(DATASET, YEAR)

In [12]:
import pprint

pprint.pprint(specs)

{'010': ['us'],
 '020': ['region'],
 '030': ['division'],
 '040': ['state'],
 '050': ['state', 'county'],
 '060': ['state', 'county', 'county_subdivision'],
 '067': ['state', 'county', 'county_subdivision', 'subminor_civil_division'],
 '100': ['state', 'county', 'tract', 'block'],
 '140': ['state', 'county', 'tract'],
 '150': ['state', 'county', 'tract', 'block_group'],
 '155': ['state', 'place', 'county_or_part'],
 '160': ['state', 'place'],
 '170': ['state', 'consolidated_city'],
 '172': ['state', 'consolidated_city', 'place_or_part'],
 '230': ['state', 'alaska_native_regional_corporation'],
 '250': ['american_indian_area_alaska_native_area_hawaiian_home_land'],
 '252': ['american_indian_area_alaska_native_area_reservation_or_statistical_entity_only'],
 '254': ['american_indian_area_off_reservation_trust_land_only_hawaiian_home_land'],
 '280': ['state',
         'american_indian_area_alaska_native_area_hawaiian_home_land_or_part'],
 '281': ['state',
         'american_indian_area_ala

## CSAs

In [13]:
import censusdis.data as ced

DATASET = "dec/pl"
YEAR = 2020
VARIABLES = ["NAME", "P2_001N"]

df_csa = ced.download_detail(DATASET, YEAR, VARIABLES, combined_statistical_area="*")

In [14]:
print(df_csa.iloc[:10].to_string(line_width=200))
print()
print("...")
print()
print(df_csa.iloc[-10:].to_string(line_width=200))

  COMBINED_STATISTICAL_AREA                                                     NAME  P2_001N
0                       104                               Albany-Schenectady, NY CSA  1190727
1                       106                   Albuquerque-Santa Fe-Las Vegas, NM CSA  1162523
2                       107                               Altoona-Huntingdon, PA CSA   166914
3                       108                            Amarillo-Pampa-Borger, TX CSA   311362
4                       118                          Appleton-Oshkosh-Neenah, WI CSA   414877
5                       120                         Asheville-Marion-Brevard, NC CSA   546579
6                       122  Atlanta--Athens-Clarke County--Sandy Springs, GA-AL CSA  6930423
7                       140                                  Bend-Prineville, OR CSA   222991
8                       142                      Birmingham-Hoover-Talladega, AL CSA  1350646
9                       144                              Blo