In [1]:
import altair as alt
from vega_datasets import data
import pandas as pd
import numpy as np

In [2]:
# Data Source: https://broadbandusa.maps.arcgis.com/home/item.html?id=2d370cfdbbdf447880127994e63b7c20
# Importing data on county-level broadband access

county_bb_data = pd.read_csv('CountyBroadband2021.csv').dropna()
county_bb_data.head()

Unnamed: 0,OBJECTID,FIPS,Name,State,Households,"Percent of Households that Have No Computer, Smartphone, or Tablet",Percent of Households with No Internet Access,Percent of Population whose income in the past 12 months is below poverty level,Ookla Median Download Speed (Mbps),Ookla Median Upload Speed (Mbps),Population 2019 (FCC Estimate),M-Lab Median Download Speed (Mbps),M-Lab Median Upload Speed (Mbps),Microsoft Broadband Usage Percentage,Microsoft Broadband Usage,ShapeArea,ShapeLength
0,1,1001,Autauga County,Alabama,21397.0,10.8,17.2,15.2,72.884,11.212,55869,40.80152,10.77479,28.0,0.28,2208588000.0,249970.1915
1,2,1003,Baldwin County,Alabama,80930.0,9.1,13.8,10.4,25.996,9.529,223230,16.93789,4.648978,30.0,0.3,5909266000.0,536067.2537
2,3,1005,Barbour County,Alabama,9345.0,22.8,32.6,30.7,30.183,10.045,24686,24.93791,6.325272,18.0,0.18,3257907000.0,320882.8479
3,4,1007,Bibb County,Alabama,6891.0,21.9,26.7,18.1,15.956,4.931,22394,13.15805,6.211988,7.0,0.07,2311990000.0,227919.4181
4,5,1009,Blount County,Alabama,20847.0,19.0,23.9,13.6,15.635,5.02,57826,8.286291,2.143121,9.0,0.09,2456904000.0,292630.2818


In [3]:
# Renaming columns 
county_bb_data = county_bb_data.rename(columns={'Percent of Households that Have No Computer, Smartphone, or Tablet': 'pct_nodevices',
                                                'Percent of Households with No Internet Access': 'pct_nointaccess', 
                                                'Percent of Population whose income in the past 12 months is below poverty level':'pct_poverty', 
                                                'Ookla Median Download Speed (Mbps)':'mediandownloadspeed', 
                                                'Ookla Median Upload Speed (Mbps)':'medianuploadspeed', 
                                                'Microsoft Broadband Usage Percentage':'bb_usepercent', 
                                                'Microsoft Broadband Usage':'bb_use', 
                                                'FIPS':'id', 
                                                'Name':'county',
                                                'Population 2019 (FCC Estimate)':'population'})
county_bb_data.head()


Unnamed: 0,OBJECTID,id,county,State,Households,pct_nodevices,pct_nointaccess,pct_poverty,mediandownloadspeed,medianuploadspeed,population,M-Lab Median Download Speed (Mbps),M-Lab Median Upload Speed (Mbps),bb_usepercent,bb_use,ShapeArea,ShapeLength
0,1,1001,Autauga County,Alabama,21397.0,10.8,17.2,15.2,72.884,11.212,55869,40.80152,10.77479,28.0,0.28,2208588000.0,249970.1915
1,2,1003,Baldwin County,Alabama,80930.0,9.1,13.8,10.4,25.996,9.529,223230,16.93789,4.648978,30.0,0.3,5909266000.0,536067.2537
2,3,1005,Barbour County,Alabama,9345.0,22.8,32.6,30.7,30.183,10.045,24686,24.93791,6.325272,18.0,0.18,3257907000.0,320882.8479
3,4,1007,Bibb County,Alabama,6891.0,21.9,26.7,18.1,15.956,4.931,22394,13.15805,6.211988,7.0,0.07,2311990000.0,227919.4181
4,5,1009,Blount County,Alabama,20847.0,19.0,23.9,13.6,15.635,5.02,57826,8.286291,2.143121,9.0,0.09,2456904000.0,292630.2818


In [4]:
state_region_key = pd.read_csv('https://raw.githubusercontent.com/cphalpert/census-regions/master/us%20census%20bureau%20regions%20and%20divisions.csv')
state_region_key.head()
    


Unnamed: 0,State,State Code,Region,Division
0,Alaska,AK,West,Pacific
1,Alabama,AL,South,East South Central
2,Arkansas,AR,South,West South Central
3,Arizona,AZ,West,Mountain
4,California,CA,West,Pacific


In [5]:
countybb = pd.merge(state_region_key, county_bb_data, how='left', on='State')
countybb.head()

Unnamed: 0,State,State Code,Region,Division,OBJECTID,id,county,Households,pct_nodevices,pct_nointaccess,pct_poverty,mediandownloadspeed,medianuploadspeed,population,M-Lab Median Download Speed (Mbps),M-Lab Median Upload Speed (Mbps),bb_usepercent,bb_use,ShapeArea,ShapeLength
0,Alaska,AK,West,Pacific,68,2013,Aleutians East Borough,890.0,12.7,33.6,13.8,1.871,0.947,3337,3.557595,2.434108,1.0,0.01,57988810000.0,5512876.0
1,Alaska,AK,West,Pacific,69,2016,Aleutians West Census Area,1187.0,7.2,18.2,6.8,1.436,0.824,5634,1.03882,0.48355,2.0,0.02,33090200000.0,5411096.0
2,Alaska,AK,West,Pacific,70,2020,Anchorage Municipality,106567.0,4.1,7.5,9.0,67.467,15.416,288000,15.85771,6.084588,55.0,0.55,19380080000.0,672638.8
3,Alaska,AK,West,Pacific,71,2050,Bethel Census Area,4489.0,12.6,23.4,28.3,5.682,1.873,18386,3.096083,1.351883,3.0,0.03,463000000000.0,6120351.0
4,Alaska,AK,West,Pacific,72,2060,Bristol Bay Borough,314.0,7.6,15.6,7.2,5.053,0.959,836,4.036467,1.048883,4.0,0.04,4947585000.0,395185.4


In [6]:
# Estimate of how many people in the U.S. have no home internet access
pop_nointaccess = countybb['pct_nointaccess'] * countybb['population']/100
sum(pop_nointaccess)

45333287.498999946

In [7]:
# Estimate of how many people in the U.S. are not using internet at broadband speeds
no_bb_use = countybb['bb_use'] * countybb['population']
sum(no_bb_use)

170197856.54999954

In [8]:
out = pd.read_csv('out.csv')
out.head()

Unnamed: 0,Midwest,Northeast,South,West
0,22.9,9.0,17.2,33.6
1,19.6,13.1,13.8,18.2
2,23.7,10.6,32.6,7.5
3,25.9,10.6,26.7,23.4
4,19.2,13.5,23.9,15.6


In [9]:
vert = alt.Chart(countybb).mark_bar().encode(
    x='count()',
    y=alt.Y('pct_poverty:Q', bin=alt.Bin(maxbins=30)),
    color='Region:O'
).properties(
    height=300,
    width=100)

In [10]:
flat = alt.Chart(countybb[countybb["Region"] == 'Northeast']).mark_bar().encode(
    alt.X('pct_nointaccess', title='Percent of Households with No Internet Access', bin=alt.Bin(maxbins=30)),
    alt.Y('count()'),
    color='Region'
).properties(
    height=300,
    width=500)

flat

In [11]:
countybb['Division'].value_counts()

West North Central    616
South Atlantic        581
West South Central    470
East North Central    437
East South Central    363
Mountain              281
Pacific               165
Middle Atlantic       150
New England            67
Name: Division, dtype: int64

In [12]:
# Higher income counties are more likely to have higher rates of household home internet access

base = alt.Chart(countybb)
# Configure the scatterplot
points = base.mark_point().encode(
    x=alt.X('pct_poverty', title=''),
    y=alt.Y('pct_nointaccess', title=''),
    color='Region:O',
    size='population'
).properties(
    title = "Internet Access vs. Poverty Levels Across U.S. Counties"
).properties(
    height=300,
    width=400)

# Configure the ticks
tick_axis = alt.Axis(labels=False, domain=False, ticks=False)

x_ticks = base.mark_bar().encode(
    alt.X('pct_poverty', bin=alt.Bin(maxbins=30), axis=tick_axis, title='% of Population Under Poverty Line'),
    alt.Y('count()', title='',axis=tick_axis),
    color='Region:O'
).properties(
    height=100,
    width=400)

y_ticks = base.mark_bar().encode(
    alt.X('count()', title='', axis=tick_axis),
    alt.Y('pct_nointaccess', bin=alt.Bin(maxbins=30), axis=tick_axis, title='% of Households with No Home Internet Access'),
    color='Region:O'
).properties(
    height=300,
    width=100)
    
# Build the chart
y_ticks | (points & x_ticks)


In [13]:
# Higher income counties are more likely to have higher rates of household home internet access

brush = alt.selection(type='interval')
base = alt.Chart(countybb).add_selection(brush)
# Configure the points
points = base.mark_circle(
    opacity=0.5
).encode(
    x=alt.X('pct_poverty', title=''),
    y=alt.Y('pct_nointaccess', title=''),
    color=alt.condition(brush, 'Region', alt.value('grey')),
).properties(
    title = "Internet Access vs. Poverty Levels Across U.S. Counties"
)

# Configure the ticks
tick_axis = alt.Axis(labels=False, domain=False, ticks=False)

x_ticks = base.mark_tick(
    opacity=0.2
).encode(
    alt.X('pct_poverty', axis=tick_axis, title='% of Population Under the Poverty Line'),
    alt.Y('Region', axis=tick_axis),
    color=alt.condition(brush, 'Region', alt.value('lightgrey'))
)

y_ticks = base.mark_tick(
    opacity=0.2
).encode(
    alt.X('Region', axis=tick_axis),
    alt.Y('pct_nointaccess', axis=tick_axis, title='% of Households with No Home Internet Access'),
    color=alt.condition(brush, 'Region', alt.value('lightgrey'))
)

# Build the chart
y_ticks | (points & x_ticks)



In [14]:
pct_noint = countybb['pct_nointaccess']
pct_noint_binned = []

for i in pct_noint:
    if i < 10.0:
        pct_noint_binned.append(10)
    elif i < 20.0:
        pct_noint_binned.append(20)
    elif i < 30.0:
        pct_noint_binned.append(30)
    elif i < 40.0:
        pct_noint_binned.append(40)
    elif i < 50.0:
        pct_noint_binned.append(50)
    elif i < 60.0:
        pct_noint_binned.append(60)
    elif i < 70.0:
        pct_noint_binned.append(70)
    elif i >= 70.0:
        pct_noint_binned.append(80)

In [15]:
countybb['pct_nointaccess_binned'] = pct_noint_binned

In [16]:
counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, feature='states')

# Filling in empty counties grey
null = alt.Chart(states).mark_geoshape(
    strokeWidth=0,
    fill='grey'
)
# Coloring in county-level data
counties = alt.Chart(counties).mark_geoshape(
    stroke='white',
    strokeWidth=0.3
).encode(
    color=alt.Color('mediandownloadspeed:Q', 
                    legend=alt.Legend(title="Median Download Speed (mbps)", tickCount=7), 
                    scale=alt.Scale(scheme='redyellowblue', type='log'))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(countybb, 'id', ['mediandownloadspeed'])
).project(
    type='albersUsa'
).properties(
    width=700,
    height=500,
    title='Broadband Use Across U.S. Counties (2021)'
)

# Overlaying state outlines
states = alt.Chart(states).mark_geoshape(
    stroke='white',
    strokeWidth=2,
    fillOpacity=0
)

alt.layer(null, counties, states).configure_view(strokeWidth=0).configure_legend(
  orient='bottom'
)




In [17]:
# https://broadbandusa.maps.arcgis.com/home/search.html?q=county%20csv

counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, feature='states')

# Filling in empty counties grey
null = alt.Chart(states).mark_geoshape(
    strokeWidth=0,
    fill='grey'
)
# Coloring in county-level data
counties = alt.Chart(counties).mark_geoshape(
    stroke='white',
    strokeWidth=0.3
).encode(
    color=alt.Color('pct_nointaccess:Q', 
                    legend=alt.Legend(title="% HHs Without Internet Access", tickCount=7), 
                    scale=alt.Scale(scheme='redyellowblue', reverse=True))
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(countybb, 'id', ['pct_nointaccess'])
).project(
    type='albersUsa'
).properties(
    width=700,
    height=500,
    title='Home Internet Access Across U.S. Counties (2021)'
)

# Overlaying state outlines
states = alt.Chart(states).mark_geoshape(
    stroke='white',
    strokeWidth=2,
    fillOpacity=0
)

alt.layer(null, counties, states).configure_view(strokeWidth=0).configure_legend(
  orient='bottom'
)




In [18]:
# https://broadbandusa.maps.arcgis.com/home/search.html?q=county%20csv

counties = alt.topo_feature(data.us_10m.url, 'counties')
states = alt.topo_feature(data.us_10m.url, feature='states')

# Filling in empty counties grey
null = alt.Chart(states).mark_geoshape(
    strokeWidth=0,
    fill='grey'
)

# Coloring in county-level data
counties = alt.Chart(counties).mark_geoshape(
    stroke='white',
    strokeWidth=0.3
).encode(
    color=alt.Color('bb_usepercent:Q', 
                    legend=alt.Legend(title="% Broadband Use", tickCount=6), 
                    scale=alt.Scale(scheme='yellowgreenblue')),
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(countybb, 'id', ['bb_usepercent'])
).project(
    type='albersUsa'
).properties(
    width=700,
    height=500,
    title='Broadband Use Across U.S. Counties (2021)'
)

# Overlaying state outlines
states = alt.Chart(states).mark_geoshape(
    stroke='white',
    strokeWidth=2,
    fillOpacity=0
)

alt.layer(null, counties, states).configure_view(strokeWidth=0).configure_legend(
  orient='bottom'
)


In [19]:
community = pd.read_csv('pew_broadband_rural.csv')

base = alt.Chart(community).encode(
    alt.X('Date:T', axis=alt.Axis(title="Year", grid=False))
).properties(
    width=350,
    height=150,
    title="Access By Community"
)

g1_c = base.mark_line(color='#425ba8').encode(alt.Y('Suburban', axis = alt.Axis(title='% of Adults with Home Broadband', grid=False)))
g2_c = base.mark_line(color='#a4d5ef').encode(alt.Y('Urban'))
g3_c = base.mark_line(color='#7ac696').encode(alt.Y('Rural'))


comm_bb = (g1_c + g2_c + g3_c).resolve_scale(y="shared")
comm_bb


In [20]:
race = pd.read_csv('pew_broadband_race.csv')

base = alt.Chart(race).encode(
    alt.X('Date:T', axis=alt.Axis(title="Year", grid=False))
).properties(
    width=350,
    height=150,
    title="Access By Race"
)

g1_race = base.mark_line(color='#425ba8').encode(alt.Y('White', axis = alt.Axis(title='% of Adults with Home Broadband', grid=False)))
g2_race = base.mark_line(color='#a4d5ef').encode(alt.Y('Black'))
g3_race = base.mark_line(color='#7ac696').encode(alt.Y('Hispanic'))

race_bb = (g1_race + g2_race + g3_race).resolve_scale(y="shared")

In [33]:
edu = pd.read_csv('pew_broadband_education.csv')

base = alt.Chart(edu).encode(
    alt.X('Date:T', axis=alt.Axis(title="Year", grid=False))
).properties(
    width=350,
    height=150,
    title="Access By Educational Attainment"
)

g1_edu = base.mark_line(color='#eeca3b').encode(alt.Y('Less than high school graduate', axis = alt.Axis(title='% of Adults with Home Broadband', grid=False)))
g2_edu = base.mark_line(color='#7ac696').encode(alt.Y('High school graduate'))
g3_edu = base.mark_line(color='#a4d5ef').encode(alt.Y('Some college'))
g4_edu = base.mark_line(color='#425ba8').encode(alt.Y('College graduate'))


edu_bb = (g1_edu + g2_edu + g3_edu + g4_edu).resolve_scale(y="shared")


In [22]:
income = pd.read_csv('pew_broadband_income.csv')

base = alt.Chart(income).encode(
    alt.X('Date:T', axis=alt.Axis(title="Year", grid=False))
).properties(
    width=350,
    height=150,
    title="Access By Annual Household Income Level"
)

g1_inc = base.mark_line(color='#eeca3b').encode(alt.Y('Less than $30,000', axis = alt.Axis(title='% of Adults with Home Broadband', grid=False)))
g2_inc = base.mark_line(color='#7ac696').encode(alt.Y('$30,000-$49,999'))
g3_inc = base.mark_line(color='#a4d5ef').encode(alt.Y('$50,000-$74,999'))
g4_inc = base.mark_line(color='#425ba8').encode(alt.Y('$75,000+'))


income_bb = (g1_inc + g2_inc + g3_inc + g4_inc).resolve_scale(y="shared")


In [27]:
# https://www.pewresearch.org/internet/fact-sheet/internet-broadband/?menuItem=2ab2b0be-6364-4d3a-8db7-ae134dbc05cd

left = alt.vconcat(edu_bb, comm_bb)
right = alt.vconcat(income_bb, race_bb)

alt.hconcat(right, left).configure_view(stroke=None).configure_axis(
    labelFontSize=12,
    labelOverlap='parity',
)



In [24]:
demographics = pd.read_csv('broadband_demographics.csv')
demographics

Unnamed: 0,Demographic,Home Broadband Access
0,U.S. Adults,77
1,"Less than $30,000",57
2,"$30,000-$49,000",74
3,"$50,000-$99,000",87
4,"$100,000+",92
5,Less than HS degree,46
6,High School Graduate,59
7,Some College,80
8,College Graduate,94
9,Urban,77


In [25]:
# Provenance
# Pew Research Center, 2019
# https://www.pewresearch.org/internet/fact-sheet/internet-broadband/?menuItem=3109350c-8dba-4b7f-ad52-a3e976ab8c8f

order = ['U.S. Adults',
'Less than $30,000',
'$30,000-$49,000',
'$50,000-$99,000',
'$100,000+',
'Less than HS degree',
'High School Graduate',
'Some College',
'College Graduate',
'Rural',
'Urban',
'Suburban',
'Hispanic',
'Black',
'White']

bars = alt.Chart(demographics).mark_bar().encode(
    alt.Y('Demographic:O', axis=alt.Axis(grid=False), sort=order),
    alt.X('Home Broadband Access:Q', axis=alt.Axis(title='% Adults with Home Broadband Access', grid=False)),
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text='Home Broadband Access:Q'
)

(bars + text).configure_view(strokeWidth=0).properties(
    title = 'U.S. Broadband Access Across Demographic Groups (2021)'
)