### Imports and Read Data

In [30]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
census_data = pd.read_csv('/home/ec2-user/capstone/broadband-capstone/data/relabeled_census.csv')

In [3]:
census_data.columns

Index(['NAME', 'median_age_overall', 'median_age_male', 'median_age_female',
       'state', 'county', 'tract', 'employment_rate', 'median_income',
       'total_households', 'ave_household_size', 'ave_family_size',
       'total_population', 'median_house_value', 'pct_white',
       'pct_hisp_latino', 'pct_black', 'pct_native', 'pct_asian', 'pct_hi_pi',
       'pct_other_race', 'pct_two+_race', 'pct_rent_burdened', 'poverty_rate',
       'pct_pop_bachelors+', 'pct_pop_hs+', 'pct_internet',
       'pct_internet_dial_up', 'pct_internet_broadband_any_type',
       'pct_internet_cellular', 'pct_only_cellular',
       'pct_internet_broadband_fiber', 'pct_internet_broadband_satellite',
       'pct_internet_only_satellite', 'pct_internet_other',
       'pct_internet_no_subscrp', 'pct_internet_none', 'pct_computer',
       'pct_computer_with_dialup', 'pct_computer_with_broadband',
       'pct_computer_no_internet', 'pct_no_computer'],
      dtype='object')

### Visualize demographics against broadband accessibility, grouped by zip code

Relevant fields:
- pct_white, pct_hisp_latino, pct_black, pct_native, pct_asian, pct_hi_pi, pct_other_race, pct_two+_race
- pct_internet_broadband_any_type, pct_internet, pct_internet_dial_up, pct_internet_cellular, pct_only_cellular, pct_internet_broadband_fiber, pct_internet_broadband_satellite, pct_internet_only_satellite, pct_internet_other, pct_internet_no_subscrp, pct_internet_none

Expected Output:

| Zipcode | Dominant Ethnic Group | Percentage with Broadband Access |
| --- | --- | ---| 
| 63021 | White | 70% |
| 60654 | Black | 60% |


In [4]:
broadband_by_demographics = census_data[['NAME',
                                         'pct_white',
                                         'pct_hisp_latino', 
                                         'pct_black', 
                                         'pct_native', 
                                         'pct_asian', 
                                         'pct_hi_pi',
                                         'pct_other_race', 
                                         'pct_two+_race',
                                         'pct_internet_broadband_any_type']]

broadband_by_demographics.rename(columns = {'pct_white':'White',
                                          'pct_hisp_latino':'Hispanic/Latino', 
                                          'pct_black':'Black', 
                                          'pct_native':'Native American', 
                                          'pct_asian':'Asian', 
                                          'pct_hi_pi':'Hawaiian/Pacific Islander',
                                          'pct_other_race': 'Other', 
                                          'pct_two+_race': '2+ Races'}, 
                                 inplace = True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [5]:
broadband_by_demographics['primary_ethnicity'] = broadband_by_demographics[['White',
                                                                           'Hispanic/Latino',
                                                                           'Black',
                                                                           'Native American',
                                                                           'Asian',
                                                                           'Hawaiian/Pacific Islander',
                                                                           'Other',
                                                                           '2+ Races']].idxmax(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [16]:
bins = [0, 0.50, 0.75, 0.9, 1, np.inf]
names = ['<50%', '50-75%', '75-90%', '90-100%', 'impossible']

broadband_by_demographics['Broadband Accessibility'] = pd.cut(broadband_by_demographics['pct_internet_broadband_any_type'], 
                                                              bins, 
                                                              labels=names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [81]:
grouped_demographics = broadband_by_demographics[['primary_ethnicity','Broadband Accessibility','NAME']].groupby(['primary_ethnicity','Broadband Accessibility'], observed=True).count()
grouped_demographics.rename(columns = {'NAME': 'counts'}, inplace=True)

grouped_demographics_pct = grouped_demographics.groupby(level=0).apply(lambda x: 100 * x / float(x.sum())).unstack(level=1)
grouped_demographics_pct.columns = grouped_demographics_pct.columns.droplevel(0)

grouped_demographics_pct


Broadband Accessibility,50-75%,75-90%,90-100%,<50%
primary_ethnicity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2+ Races,23.076923,69.230769,7.692308,
Asian,5.059733,21.363317,73.295854,0.281096
Black,33.44464,24.657293,36.063269,5.834798
Hawaiian/Pacific Islander,44.444444,50.0,5.555556,
Hispanic/Latino,21.010333,22.812859,53.869116,2.307692
Native American,33.606557,4.918033,24.590164,36.885246
Other,,100.0,,
White,15.432471,34.989365,48.88692,0.691244


In [83]:
fig = px.bar(grouped_demographics_pct, 
             barmode='group', 
             color_discrete_sequence=px.colors.qualitative.D3,
             title='Broadband Accesibility by Demographic Group')
fig.show()