# Tech Access in the USA 

## Set up environment

In [79]:
# install census data from https://pypi.org/project/CensusData/
# !pip install CensusData
# %pip install pandas_gbq

In [81]:
# import modules 
import pandas as pd # data manipulation
import censusdata # connect to Census API
from utils import upload_csv_from_df, load_data_gbq, enforce_bq_schema
import datetime

# set options for nicer printing
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

## Select a state for case study: WA 

In [82]:
# print FIPS codes 
censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)

{'Alabama': censusgeo((('state', '01'),)),
 'Alaska': censusgeo((('state', '02'),)),
 'Arizona': censusgeo((('state', '04'),)),
 'Arkansas': censusgeo((('state', '05'),)),
 'California': censusgeo((('state', '06'),)),
 'Colorado': censusgeo((('state', '08'),)),
 'Connecticut': censusgeo((('state', '09'),)),
 'Delaware': censusgeo((('state', '10'),)),
 'District of Columbia': censusgeo((('state', '11'),)),
 'Florida': censusgeo((('state', '12'),)),
 'Georgia': censusgeo((('state', '13'),)),
 'Hawaii': censusgeo((('state', '15'),)),
 'Idaho': censusgeo((('state', '16'),)),
 'Illinois': censusgeo((('state', '17'),)),
 'Indiana': censusgeo((('state', '18'),)),
 'Iowa': censusgeo((('state', '19'),)),
 'Kansas': censusgeo((('state', '20'),)),
 'Kentucky': censusgeo((('state', '21'),)),
 'Louisiana': censusgeo((('state', '22'),)),
 'Maine': censusgeo((('state', '23'),)),
 'Maryland': censusgeo((('state', '24'),)),
 'Massachusetts': censusgeo((('state', '25'),)),
 'Michigan': censusgeo((('stat

In [83]:
# case study: Washington State
# show counties in WA 
censusdata.geographies(censusdata.censusgeo([('state', '53'), ('county', '*')]), 'acs5', 2015)

{'Adams County, Washington': censusgeo((('state', '53'), ('county', '001'))),
 'Asotin County, Washington': censusgeo((('state', '53'), ('county', '003'))),
 'Benton County, Washington': censusgeo((('state', '53'), ('county', '005'))),
 'Chelan County, Washington': censusgeo((('state', '53'), ('county', '007'))),
 'Clallam County, Washington': censusgeo((('state', '53'), ('county', '009'))),
 'Clark County, Washington': censusgeo((('state', '53'), ('county', '011'))),
 'Columbia County, Washington': censusgeo((('state', '53'), ('county', '013'))),
 'Cowlitz County, Washington': censusgeo((('state', '53'), ('county', '015'))),
 'Douglas County, Washington': censusgeo((('state', '53'), ('county', '017'))),
 'Ferry County, Washington': censusgeo((('state', '53'), ('county', '019'))),
 'Franklin County, Washington': censusgeo((('state', '53'), ('county', '021'))),
 'Garfield County, Washington': censusgeo((('state', '53'), ('county', '023'))),
 'Grant County, Washington': censusgeo((('stat

## Find relevant variables 

In [84]:
# search for computer-related variables 
#censusdata.search('acs5', 2018, 'label', 'computer')[133:-16]
# cut to 10 for easier printing
censusdata.search('acs5', 2018, 'label', 'computer')[133:-16][0:10]

[('B28001_007E',
  'TYPES OF COMPUTERS IN HOUSEHOLD',
  'Estimate!!Total!!Has one or more types of computing devices!!Tablet or other portable wireless computer'),
 ('B28001_008E',
  'TYPES OF COMPUTERS IN HOUSEHOLD',
  'Estimate!!Total!!Has one or more types of computing devices!!Tablet or other portable wireless computer!!Tablet or other portable wireless computer with no other type of computing device'),
 ('B28001_009E',
  'TYPES OF COMPUTERS IN HOUSEHOLD',
  'Estimate!!Total!!Has one or more types of computing devices!!Other computer'),
 ('B28001_010E',
  'TYPES OF COMPUTERS IN HOUSEHOLD',
  'Estimate!!Total!!Has one or more types of computing devices!!Other computer!!Other computer with no other type of computing device'),
 ('B28001_011E',
  'TYPES OF COMPUTERS IN HOUSEHOLD',
  'Estimate!!Total!!No Computer'),
 ('B28003_002E',
  'PRESENCE OF A COMPUTER AND TYPE OF INTERNET SUBSCRIPTION IN HOUSEHOLD',
  'Estimate!!Total!!Has a computer'),
 ('B28003_003E',
  'PRESENCE OF A COMPUTER 

## Extract some variables 

In [90]:
# download data on the county level -- select variables related to computers in household (not split by any characteristics )
wa_computer_household = censusdata.download('acs5', 2018,
                             censusdata.censusgeo([('state', '*'),('county', '*')]),
                             ['B28001_001E', 'B28001_002E', 'B28001_003E', 'B28001_004E',
                              'B28001_005E', 'B28001_006E', 'B28001_007E', 'B28001_008E', 
                              'B28001_009E', 'B28001_010E', 'B28001_011E'
                             ])

In [5]:
# get all states 
all_states_df = censusdata.download('acs5', 2018,
                             censusdata.censusgeo([('state', '*'),('county', '*')]),
                             ['B28001_001E', 'B28001_002E', 'B28001_003E', 'B28001_004E',
                              'B28001_005E', 'B28001_006E', 'B28001_007E', 'B28001_008E', 
                              'B28001_009E', 'B28001_010E', 'B28001_011E'
                             ])



all_states_df.rename(columns = 
         {'B28001_001E':'computer_total', 
          'B28001_002E':'computer_oneplus', 
          'B28001_003E':'computer_oneplus_desktoplaptop', 
          'B28001_004E':'computer_oneplus_desktoplaptop_nooth',                      
          'B28001_005E':'computer_oneplus_smartphone', 
          'B28001_006E':'computer_oneplus_smartphone_nooth', 
          'B28001_007E':'computer_oneplus_tablet', 
          'B28001_008E':'computer_oneplus_tablet_nooth', 
          'B28001_009E':'computer_oneplus_othercomp', 
          'B28001_010E':'computer_oneplus_othercomp_nooth', 
          'B28001_011E':'computer_none'}, inplace=True)

#Parse index for state and county
all_states_df['county'] = all_states_df.index
all_states_df['state'] = all_states_df['county'].astype(str).str.split(pat=",|:", n = 2, expand = True)[1]
all_states_df['county'] = all_states_df['county'].astype(str).str.split(pat=",|:", n = 2, expand = True)[0]

In [77]:
all_states_df.head()

Unnamed: 0,computer_total,computer_oneplus,computer_oneplus_desktoplaptop,computer_oneplus_desktoplaptop_nooth,computer_oneplus_smartphone,computer_oneplus_smartphone_nooth,computer_oneplus_tablet,computer_oneplus_tablet_nooth,computer_oneplus_othercomp,computer_oneplus_othercomp_nooth,computer_none,county,state
"Washington County, Mississippi: Summary level: 050, state:28> county:151",18299,13858,10242,1263,11819,1752,8162,193,785,48,4441,Washington County,Mississippi
"Perry County, Mississippi: Summary level: 050, state:28> county:111",4563,3552,2445,295,3081,631,1903,43,237,13,1011,Perry County,Mississippi
"Choctaw County, Mississippi: Summary level: 050, state:28> county:019",3164,2345,1665,323,1894,392,1212,25,61,0,819,Choctaw County,Mississippi
"Itawamba County, Mississippi: Summary level: 050, state:28> county:057",8706,7103,5298,717,6228,1148,3874,74,250,8,1603,Itawamba County,Mississippi
"Carroll County, Mississippi: Summary level: 050, state:28> county:015",3658,2813,2371,325,2413,234,1967,52,79,0,845,Carroll County,Mississippi


In [82]:
# Upload df as CSV to Google Cloud Storage 
# Then import CSV into BigQuery

today_date = datetime.datetime.today().strftime('%Y_%m_%d_%H_%M_%S')
alias = 'test'
bucket = 'acs_2018'
df = all_states_df #DF_NAME_HERE
dataset = 'acs_2018'
project_id = 'tech-access-276720'

upload_csv_from_df(bucket_name=bucket,
                   bucket_folder=alias+'/',
                   file_name=f'{alias}_{today_date}.csv',
                   dataframe=df)

load_data_gbq(dataset_id=dataset, 
              table_name=alias, 
              bucket_name=bucket,
              bucket_folder=alias+'/', 
              file_name=f'{alias}_{today_date}.csv',
              schema=None, 
              autodetect=True, 
              append=False, ### BEFORE UPLOADING, DECIDE WHETHER YOU WANT TO APPEND OR REPLACE, APPEND=TRUE APPENDS, APPEND=FALSE REPLACES 
              bad_records=0)


test_2020_05_15_18_46_27.csv uploaded to acs_2018 / test/
Starting job c62e1ce3-dfdd-449e-a242-4aff5ce07558
Job finished.
Loaded 3220 rows.


In [83]:
project_id = 'tech-access-276720'
test_df =  pd.read_gbq(f"SELECT * FROM {dataset}.{alias}",
                                      project_id=project_id,
                                      dialect='standard')

Downloading: 100%|██████████| 3220/3220 [00:00<00:00, 5218.66rows/s]


In [84]:
test_df.head()

Unnamed: 0,computer_total,computer_oneplus,computer_oneplus_desktoplaptop,computer_oneplus_desktoplaptop_nooth,computer_oneplus_smartphone,computer_oneplus_smartphone_nooth,computer_oneplus_tablet,computer_oneplus_tablet_nooth,computer_oneplus_othercomp,computer_oneplus_othercomp_nooth,computer_none,county,state
0,16840,14220,11817,2043,11280,1134,8567,199,664,36,2620,Des Moines County,Iowa
1,183753,169229,149108,14249,147457,9206,112971,1269,4298,103,14524,Polk County,Iowa
2,38975,33580,28168,4077,27747,2633,21474,379,1009,49,5395,Woodbury County,Iowa
3,52976,46150,40657,5924,37996,2666,28661,355,1374,93,6826,Black Hawk County,Iowa
4,89807,80999,72733,7970,68658,3926,52490,697,5218,116,8808,Linn County,Iowa


In [91]:
# rename columns for easier understanding
wa_computer_household = wa_computer_household.rename(columns = 
         {'B28001_001E':'computer_total', 
          'B28001_002E':'computer_oneplus', 
          'B28001_003E':'computer_oneplus_desktoplaptop', 
          'B28001_004E':'computer_oneplus_desktoplaptop_nooth',                      
          'B28001_005E':'computer_oneplus_smartphone', 
          'B28001_006E':'computer_oneplus_smartphone_nooth', 
          'B28001_007E':'computer_oneplus_tablet', 
          'B28001_008E':'computer_oneplus_tablet_nooth', 
          'B28001_009E':'computer_oneplus_othercomp', 
          'B28001_010E':'computer_oneplus_othercomp_nooth', 
          'B28001_011E':'computer_none'})

In [92]:
# print some rows 
wa_computer_household.head()

Unnamed: 0,computer_total,computer_oneplus,computer_oneplus_desktoplaptop,computer_oneplus_desktoplaptop_nooth,computer_oneplus_smartphone,computer_oneplus_smartphone_nooth,computer_oneplus_tablet,computer_oneplus_tablet_nooth,computer_oneplus_othercomp,computer_oneplus_othercomp_nooth,computer_none
"Kitsap County, Washington: Summary level: 050, state:53> county:035",101662,95844,89891,9122,81944,2600,65839,550,2622,0,5818
"King County, Washington: Summary level: 050, state:53> county:033",865627,821382,769844,57238,732772,23297,570467,3976,48635,660,44245
"Pend Oreille County, Washington: Summary level: 050, state:53> county:051",5782,5110,4365,733,4032,363,3067,33,297,6,672
"San Juan County, Washington: Summary level: 050, state:53> county:055",8025,7460,7093,1118,5880,156,4050,52,306,7,565
"Whitman County, Washington: Summary level: 050, state:53> county:075",17910,16957,16023,1761,14466,432,10877,82,471,14,953


In [None]:
# parse index into name of county & name of state 


In [93]:
# check that total is equal to some of those with one plus & some of those with none 
wa_computer_household['computer_total'] == wa_computer_household['computer_oneplus'] + wa_computer_household['computer_none']

Kitsap County, Washington: Summary level: 050, state:53> county:035          True
King County, Washington: Summary level: 050, state:53> county:033            True
Pend Oreille County, Washington: Summary level: 050, state:53> county:051    True
San Juan County, Washington: Summary level: 050, state:53> county:055        True
Whitman County, Washington: Summary level: 050, state:53> county:075         True
Yakima County, Washington: Summary level: 050, state:53> county:077          True
Cowlitz County, Washington: Summary level: 050, state:53> county:015         True
Ferry County, Washington: Summary level: 050, state:53> county:019           True
Lewis County, Washington: Summary level: 050, state:53> county:041           True
Grays Harbor County, Washington: Summary level: 050, state:53> county:027    True
Island County, Washington: Summary level: 050, state:53> county:029          True
Wahkiakum County, Washington: Summary level: 050, state:53> county:069       True
Franklin County,