[Census Documentation](https://www2.census.gov/programs-surveys/acs/tech_docs/subject_definitions/2019_ACSSubjectDefinitions.pdf)

In [1]:
%config Completer.use_jedi = False

In [2]:
from census.getCensus import getCensus
from census.variables.models import GroupCode
from census.models import GeoDomain
import pandas as pd
from pprint import pprint

We'll use these since these are the primary geographies we care about

In [3]:
geoDomains = [GeoDomain('congressional district'), GeoDomain('state')]

In [4]:
c = getCensus(2019, shouldCacheOnDisk=True, shouldLoadFromExistingCache=True, shouldReplaceColumnHeaders=True)

Get the group data

In [5]:
_ = c.getGroups()

# Education statistics

In [6]:
educVars = c.getVariablesByGroup(c.groups.EducationalAttainmentForThePopulation25YearsAndOver.code)

  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
educData = c.getStats(educVars['code'].tolist(), *geoDomains)


|          | 0/? [00:00<?, ?it/s]

In [8]:
educPcts = educData.loc[:,['NAME', 'congressional district', 'state']]

In [9]:
educPcts['pctNoHs'] = (educData.Estimate_Total_NoSchoolingCompleted \
                    + educData.Estimate_Total_NurseryTo4thGrade \
                    + educData.Estimate_Total_5thAnd6thGrade \
                    + educData.Estimate_Total_7thAnd8thGrade \
                    + educData.Estimate_Total_9thGrade \
                    + educData.Estimate_Total_10thGrade \
                    + educData.Estimate_Total_11thGrade \
                    + educData.Estimate_Total_12thGradeNoDiploma) / educData.Estimate_Total
educPcts['pctHs'] = (educData.Estimate_Total_RegularHighSchoolDiploma \
                     + educData.Estimate_Total_GedOrAlternativeCredential) \
                        / educData.Estimate_Total
educPcts['pctBach'] = educData.Estimate_Total_BachelorsDegree / educData.Estimate_Total
educPcts['pctSomeCollege1YrNoDegree'] = educData.Estimate_Total_SomeCollegeLessThan1Year / educData.Estimate_Total
educPcts['pctSomeCollegeManyYrsNoDegree'] = educData.Estimate_Total_SomeCollege1OrMoreYearsNoDegree / educData.Estimate_Total
educPcts['pctGradPlus'] = (educData.Estimate_Total_DoctorateDegree + \
                          educData.Estimate_Total_MastersDegree ) / educData.Estimate_Total

# Employment statistics

In [10]:
emplVars = c.getVariablesByGroup(c.groups.EmploymentStatusForThePopulation16YearsAndOver.code)
emplData = c.getStats(emplVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [11]:
emplPcts = emplData.loc[:,['NAME', 'state', 'congressional district']]

In [12]:
emplPcts['pctInLaborForce'] = (emplData.Estimate_Total_InLaborForce_CivilianLaborForce / 
                             emplData.Estimate_Total)
emplPcts['pctEmployed'] = (emplData.Estimate_Total_InLaborForce_CivilianLaborForce_Employed /
                         emplData.Estimate_Total_InLaborForce_CivilianLaborForce)
emplPcts['pctUnemployed'] = (emplData.Estimate_Total_InLaborForce_CivilianLaborForce_Unemployed /
                         emplData.Estimate_Total_InLaborForce_CivilianLaborForce)

In [13]:
healthInsVars = c.getVariablesByGroup(c.groups.HealthInsuranceCoverageStatusBySexByAge.code)
healthInsData = c.getStats(healthInsVars['code'].tolist(), *geoDomains)
healthInsPct = healthInsData.loc[:,['NAME', 'state', 'congressional district']]

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [14]:
healthInsPct['pctMaleWithIns'] = ( healthInsData.Estimate_Total_Male_19To64Years_WithHealthInsuranceCoverage /
                                    healthInsData.Estimate_Total_Male_19To64Years)
healthInsPct['pctFemWithIns'] = ( healthInsData.Estimate_Total_Female_19To64Years_WithHealthInsuranceCoverage /
                                    healthInsData.Estimate_Total_Female_19To64Years)
healthInsPct['pctMaleWithoutIns'] = ( healthInsData.Estimate_Total_Male_19To64Years_NoHealthInsuranceCoverage /
                                    healthInsData.Estimate_Total_Male_19To64Years)
healthInsPct['pctFemWithoutIns'] = ( healthInsData.Estimate_Total_Female_19To64Years_NoHealthInsuranceCoverage /
                                    healthInsData.Estimate_Total_Female_19To64Years)
healthInsPct['pctWithIns'] = (( healthInsData.Estimate_Total_Male_19To64Years_WithHealthInsuranceCoverage + 
                               healthInsData.Estimate_Total_Female_19To64Years_WithHealthInsuranceCoverage) /
                                (healthInsData.Estimate_Total_Male_19To64Years +
                                    healthInsData.Estimate_Total_Female_19To64Years))
healthInsPct['pctWithoutIns'] = (( healthInsData.Estimate_Total_Male_19To64Years_NoHealthInsuranceCoverage +
                                  healthInsData.Estimate_Total_Female_19To64Years_NoHealthInsuranceCoverage) /
                                (healthInsData.Estimate_Total_Male_19To64Years +
                                    healthInsData.Estimate_Total_Female_19To64Years))


In [15]:
healthInsPct.head(n=2)

Unnamed: 0,NAME,state,congressional district,pctMaleWithIns,pctFemWithIns,pctMaleWithoutIns,pctFemWithoutIns,pctWithIns,pctWithoutIns
0,"Congressional District (at Large) (116th Congress), Alaska",2,0,0.824244,0.859597,0.175756,0.140403,0.841665,0.158335
1,"Congressional District (at Large) (116th Congress), Delaware",10,0,0.885137,0.926468,0.114863,0.073532,0.906714,0.093286


# Gini Index

In [16]:
giniVars = c.getVariablesByGroup(c.groups.GiniIndexOfIncomeInequality.code)
giniData = c.getStats(giniVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [17]:
giniData.Estimate_GiniIndex.describe()

count    437.000000
mean       0.459643
std        0.031104
min        0.392500
25%        0.439100
50%        0.456200
75%        0.475000
max        0.581900
Name: Estimate_GiniIndex, dtype: float64

# Income

In [18]:
c.searchGroups('aggregate income')

incomeVars = c.getVariablesByGroup(c.groups.AggregateIncomeInThePast12MonthsIn2019InflationAdjustedDollars.code)
incomeData = c.getStats(incomeVars['code'].tolist(), *geoDomains)

populationVars = c.getVariablesByGroup(c.groups.TotalPopulation.code)
populationData = c.getStats(populationVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [19]:
avgIncome = incomeData.loc[:,['NAME', 'state', 'congressional district']]

avgIncome['avgIncome'] = (incomeData.Estimate_AggregateIncomeInThePast12MonthsIn2019InflationAdjustedDollars /
                         populationData.Estimate_Total)

In [20]:
avgIncome.head(n=2)

Unnamed: 0,NAME,state,congressional district,avgIncome
0,"Congressional District (at Large) (116th Congress), Alaska",2,0,36977.864246
1,"Congressional District (at Large) (116th Congress), Delaware",10,0,36857.973698


# Internet access

In [21]:
c.searchGroups('internet')
internetVars = c.getVariablesByGroup(c.groups.PresenceAndTypesOfInternetSubscriptionsInHousehold.code)
internetData = c.getStats(internetVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [22]:
[col for col in internetData.columns.tolist() if col.startswith('Estimate')]

['Estimate_Total',
 'Estimate_Total_WithAnInternetSubscription',
 'Estimate_Total_WithAnInternetSubscription_DialUpWithNoOtherTypeOfInternetSubscription',
 'Estimate_Total_WithAnInternetSubscription_BroadbandOfAnyType',
 'Estimate_Total_WithAnInternetSubscription_CellularDataPlan',
 'Estimate_Total_WithAnInternetSubscription_CellularDataPlan_CellularDataPlanWithNoOtherTypeOfInternetSubscription',
 'Estimate_Total_WithAnInternetSubscription_BroadbandSuchAsCableFiberOpticOrDsl',
 'Estimate_Total_WithAnInternetSubscription_BroadbandSuchAsCableFiberOpticOrDsl_BroadbandSuchAsCableFiberOpticOrDslWithNoOtherTypeOfInternetSubscription',
 'Estimate_Total_WithAnInternetSubscription_SatelliteInternetService',
 'Estimate_Total_WithAnInternetSubscription_SatelliteInternetService_SatelliteInternetServiceWithNoOtherTypeOfInternetSubscription',
 'Estimate_Total_WithAnInternetSubscription_OtherServiceWithNoOtherTypeOfInternetSubscription',
 'Estimate_Total_InternetAccessWithoutASubscription',
 'Estimat

In [23]:
pctInternetAccess = internetData.loc[:,['NAME', 'state', 'congressional district']]

pctInternetAccess['pctHasInternet'] = (internetData.Estimate_Total_WithAnInternetSubscription /
                                      internetData.Estimate_Total)
pctInternetAccess['pctHasNoInternet'] = (internetData.Estimate_Total_NoInternetAccess /
                                        internetData.Estimate_Total)
pctInternetAccess['pctCellData'] = (internetData.Estimate_Total_WithAnInternetSubscription_CellularDataPlan /
                                   internetData.Estimate_Total_WithAnInternetSubscription) * pctInternetAccess.pctHasInternet

In [24]:
pctInternetAccess.head(n=2)

Unnamed: 0,NAME,state,congressional district,pctHasInternet,pctHasNoInternet,pctCellData
0,"Congressional District (at Large) (116th Congress), Alaska",2,0,0.879718,0.102407,0.808909
1,"Congressional District (at Large) (116th Congress), Delaware",10,0,0.889004,0.086169,0.820859


# Tech access

In [25]:
computerVars = c.getVariablesByGroup(c.groups.TypesOfComputersInHousehold.code)

  0%|          | 0/1 [00:00<?, ?it/s]

In [26]:
[var for var in computerVars['name'].tolist() if var.startswith('Estimate')]

['Estimate!!Total:',
 'Estimate!!Total:!!Has one or more types of computing devices:',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Desktop or laptop',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Desktop or laptop!!Desktop or laptop with no other type of computing device',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Smartphone',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Smartphone!!Smartphone with no other type of computing device',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Tablet or other portable wireless computer',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Tablet or other portable wireless computer!!Tablet or other portable wireless computer with no other type of computing device',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Other computer',
 'Estimate!!Total:!!Has one or more types of computing devices:!!Other computer!!Other comput

In [27]:
computerStats = c.getStats(computerVars['code'].tolist(), *geoDomains)

|          | 0/? [00:00<?, ?it/s]

In [28]:
computerPcts = computerStats.loc[:,["NAME", "state", "congressional district"]]

computerPcts['pctHasTech'] = (computerStats.Estimate_Total_HasOneOrMoreTypesOfComputingDevices /
                                 computerStats.Estimate_Total)
computerPcts['pctNoTech'] = (computerStats.Estimate_Total_NoComputer / 
                                computerStats.Estimate_Total)
computerPcts['pctSmartphone'] = (computerStats.Estimate_Total_HasOneOrMoreTypesOfComputingDevices_Smartphone /
                               computerStats.Estimate_Total)
computerPcts['pctNoSmartphone'] = 1 - computerPcts.pctSmartphone

In [29]:
computerPcts.head(n=2)

Unnamed: 0,NAME,state,congressional district,pctHasTech,pctNoTech,pctSmartphone,pctNoSmartphone
0,"Congressional District (at Large) (116th Congress), Alaska",2,0,0.952728,0.047272,0.907712,0.092288
1,"Congressional District (at Large) (116th Congress), Delaware",10,0,0.941136,0.058864,0.877841,0.122159


# Language spoken at home

In [30]:
 'LanguageSpokenAtHomeForThePopulation5YearsAndOver',
 'HouseholdLanguageByHouseholdLimitedEnglishSpeakingStatus',

('HouseholdLanguageByHouseholdLimitedEnglishSpeakingStatus',)

# Work commute

In [31]:
transportationVars = c.getVariablesByGroup(c.groups.MeansOfTransportationToWork.code)
transportationStats = c.getStats(transportationVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [32]:
[stat for stat in transportationStats.columns.tolist() if stat.startswith('Estimate')]

['Estimate_Total',
 'Estimate_Total_CarTruckOrVan',
 'Estimate_Total_CarTruckOrVan_DroveAlone',
 'Estimate_Total_CarTruckOrVan_Carpooled',
 'Estimate_Total_CarTruckOrVan_Carpooled_In2PersonCarpool',
 'Estimate_Total_CarTruckOrVan_Carpooled_In3PersonCarpool',
 'Estimate_Total_CarTruckOrVan_Carpooled_In4OrMorePersonCarpool',
 'Estimate_Total_PublicTransportationExcludingTaxicab',
 'Estimate_Total_Walked',
 'Estimate_Total_TaxicabMotorcycleBicycleOrOtherMeans',
 'Estimate_Total_WorkedFromHome']

# Commute time

In [33]:
c.searchGroups('travel')

Unnamed: 0,code,description
0,B08012,SEX OF WORKERS BY TRAVEL TIME TO WORK
1,B08013,AGGREGATE TRAVEL TIME TO WORK (IN MINUTES) OF WORKERS BY SEX
2,B08131,AGGREGATE TRAVEL TIME TO WORK (IN MINUTES) OF WORKERS BY PLACE OF WORK--STATE AND COUNTY LEVEL
3,B08133,AGGREGATE TRAVEL TIME TO WORK (IN MINUTES) OF WORKERS BY TIME OF DEPARTURE TO GO TO WORK
4,B08134,MEANS OF TRANSPORTATION TO WORK BY TRAVEL TIME TO WORK
5,B08135,AGGREGATE TRAVEL TIME TO WORK (IN MINUTES) OF WORKERS BY TRAVEL TIME TO WORK
6,B08136,AGGREGATE TRAVEL TIME TO WORK (IN MINUTES) OF WORKERS BY MEANS OF TRANSPORTATION TO WORK
7,B08303,TRAVEL TIME TO WORK
8,B08412,SEX OF WORKERS BY TRAVEL TIME TO WORK FOR WORKPLACE GEOGRAPHY
9,B08534,MEANS OF TRANSPORTATION TO WORK BY TRAVEL TIME TO WORK FOR WORKPLACE GEOGRAPHY


# Means of transportation by commute time

In [34]:
transByCommuteTimeVars = c.getVariablesByGroup(c.groups.MeansOfTransportationToWorkByTravelTimeToWork.code)
transByCommuteTimeStats = c.getStats(transByCommuteTimeVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [35]:
cols = [col for col in transByCommuteTimeStats.columns.tolist() if col.startswith('Estimate')]
transByCommuteTimeStats = transByCommuteTimeStats[['NAME', 'state', 'congressional district'] + cols]

In [36]:
transByCommuteTimePcts = transByCommuteTimeStats.loc[:,['NAME', 'state', 'congressional district']]

transByCommuteTimePcts['pctLt10'] = (transByCommuteTimeStats.Estimate_Total_LessThan10Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct10to14'] = (transByCommuteTimeStats.Estimate_Total_10To14Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct15to19'] = (transByCommuteTimeStats.Estimate_Total_15To19Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct20to24'] = (transByCommuteTimeStats.Estimate_Total_20To24Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct25to29'] = (transByCommuteTimeStats.Estimate_Total_25To29Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct30to34'] = (transByCommuteTimeStats.Estimate_Total_30To34Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct35to44'] = (transByCommuteTimeStats.Estimate_Total_35To44Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct44to59'] = (transByCommuteTimeStats.Estimate_Total_45To59Minutes /
                                    transByCommuteTimeStats.Estimate_Total)
transByCommuteTimePcts['pct60plus'] = (transByCommuteTimeStats.Estimate_Total_60OrMoreMinutes /
                                    transByCommuteTimeStats.Estimate_Total)

In [37]:
transByCommuteTimePcts.describe()

Unnamed: 0,pctLt10,pct10to14,pct15to19,pct20to24,pct25to29,pct30to34,pct35to44,pct44to59,pct60plus
count,437.0,437.0,437.0,437.0,437.0,437.0,437.0,437.0,437.0
mean,0.121342,0.130172,0.149963,0.140375,0.065557,0.137498,0.072203,0.084572,0.098318
std,0.055188,0.034026,0.03029,0.026597,0.016654,0.031528,0.021628,0.032521,0.05798
min,0.020903,0.036667,0.03538,0.061593,0.02322,0.056777,0.019706,0.025861,0.024628
25%,0.079812,0.108312,0.131845,0.122912,0.055837,0.116477,0.056266,0.060898,0.058075
50%,0.113645,0.130965,0.150303,0.139941,0.065057,0.133991,0.070505,0.080233,0.081621
75%,0.148323,0.152541,0.169617,0.154432,0.074629,0.15677,0.08623,0.104302,0.122141
max,0.377546,0.224908,0.246341,0.220081,0.224424,0.251859,0.147998,0.216932,0.384638


# Agg Traveltime

In [38]:
aggTravTimeVars = c.getVariablesByGroup(c.groups.AggregateTravelTimeToWorkInMinutesOfWorkersByTravelTimeToWork.code)
aggTravTimeStats = c.getStats(aggTravTimeVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [39]:
avgTravelTime = aggTravTimeStats.loc[:,['NAME', 'state', 'congressional district']]
avgTravelTime['avgTravelTime'] = (aggTravTimeStats.Estimate_AggregateTravelTimeToWorkInMinutes /
                                 populationData.Estimate_Total)

In [40]:
avgTravelTime.describe()

Unnamed: 0,avgTravelTime
count,437.0
mean,12.401007
std,2.651061
min,7.789928
25%,10.325869
50%,11.974308
75%,14.145483
max,21.859793


# Time of departure to work

In [41]:
depTimeVars = c.getVariablesByGroup(c.groups.TimeOfDepartureToGoToWork.code)
filteredVars = [rec['code'] for rec in depTimeVars.to_dict('records') if rec['name'].startswith('Estimate')]
depTimeStats = c.getStats(filteredVars, *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [42]:
depTimeStats

Unnamed: 0,NAME,congressional district,state,Estimate_Total,Estimate_Total_1200AMTo459AM,Estimate_Total_500AMTo529AM,Estimate_Total_530AMTo559AM,Estimate_Total_600AMTo629AM,Estimate_Total_630AMTo659AM,Estimate_Total_700AMTo729AM,Estimate_Total_730AMTo759AM,Estimate_Total_800AMTo829AM,Estimate_Total_830AMTo859AM,Estimate_Total_900AMTo959AM,Estimate_Total_1000AMTo1059AM,Estimate_Total_1100AMTo1159AM,Estimate_Total_1200PMTo359PM,Estimate_Total_400PMTo1159PM
0,"Congressional District (at Large) (116th Congress), Alaska",00,02,334670.0,12751.0,11993.0,16398.0,24971.0,29944.0,41146.0,53250.0,38581.0,20031.0,24427.0,9451.0,5084.0,23001.0,23642.0
1,"Congressional District (at Large) (116th Congress), Delaware",00,10,439954.0,18151.0,13422.0,19031.0,35151.0,41635.0,71341.0,60534.0,49165.0,25932.0,32465.0,12411.0,4435.0,29416.0,26865.0
2,"Congressional District (at Large) (116th Congress), Montana",00,30,480953.0,18113.0,12502.0,18016.0,34709.0,44808.0,70479.0,96016.0,56001.0,27274.0,27442.0,14875.0,6683.0,26640.0,27395.0
3,"Congressional District (at Large) (116th Congress), North Dakota",00,38,390897.0,15705.0,10453.0,19501.0,24652.0,43926.0,58746.0,81355.0,40659.0,15764.0,19342.0,8250.0,3592.0,24636.0,24316.0
4,"Congressional District (at Large) (116th Congress), South Dakota",00,46,420853.0,14519.0,11674.0,19970.0,29725.0,44003.0,68122.0,88280.0,44644.0,14452.0,15968.0,9132.0,6269.0,29727.0,24368.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,"Congressional District 51 (116th Congress), California",51,06,307126.0,26855.0,21414.0,17910.0,32457.0,21033.0,45338.0,29053.0,27124.0,10954.0,20193.0,8672.0,4690.0,23705.0,17728.0
433,"Congressional District 52 (116th Congress), California",52,06,369142.0,11378.0,10295.0,16263.0,23232.0,30745.0,53677.0,44138.0,54561.0,27633.0,41829.0,15001.0,7229.0,18213.0,14948.0
434,"Congressional District 53 (116th Congress), California",53,06,380471.0,19020.0,20046.0,20374.0,39043.0,33099.0,48958.0,42388.0,40847.0,21952.0,26952.0,12357.0,5472.0,27564.0,22399.0
435,"Delegate District (at Large) (116th Congress), District of Columbia",98,11,357476.0,5305.0,6899.0,6534.0,16589.0,18110.0,45522.0,46706.0,73467.0,55654.0,41803.0,9599.0,3919.0,16067.0,11302.0


In [43]:
mgd = pd.merge(left=educPcts, right=emplPcts, on=['state', 'congressional district', 'NAME'])
mgd = pd.merge(left=mgd, right=computerPcts, on=['state', 'congressional district', 'NAME'])
mgd = pd.merge(left=mgd, right=giniData, on=['state', 'congressional district', 'NAME'])
mgd = pd.merge(left=mgd, right=avgIncome, on=['state', 'congressional district', 'NAME'])

In [44]:
mgd.corr()

Unnamed: 0,pctNoHs,pctHs,pctBach,pctSomeCollege1YrNoDegree,pctSomeCollegeManyYrsNoDegree,pctGradPlus,pctInLaborForce,pctEmployed,pctUnemployed,pctHasTech,pctNoTech,pctSmartphone,pctNoSmartphone,Estimate_GiniIndex,MarginOfError_GiniIndex,avgIncome
pctNoHs,1.0,0.178613,-0.559658,-0.198253,0.105307,-0.531119,-0.245193,-0.502975,0.502975,-0.345541,0.345541,-0.079459,0.079459,0.0913,0.291348,-0.525402
pctHs,0.178613,1.0,-0.820182,0.419146,0.099233,-0.725833,-0.576897,-0.217133,0.217133,-0.709414,0.709414,-0.767809,0.767809,-0.190939,0.153568,-0.714676
pctBach,-0.559658,-0.820182,1.0,-0.456921,-0.388264,0.84596,0.677671,0.471912,-0.471912,0.665961,-0.665961,0.620782,-0.620782,0.176874,-0.263496,0.860153
pctSomeCollege1YrNoDegree,-0.198253,0.419146,-0.456921,1.0,0.607048,-0.521287,-0.390142,0.017666,-0.017666,-0.06008,0.06008,-0.236031,0.236031,-0.468974,-0.010577,-0.432199
pctSomeCollegeManyYrsNoDegree,0.105307,0.099233,-0.388264,0.607048,1.0,-0.526781,-0.292324,-0.300941,0.300941,0.022361,-0.022361,0.07735,-0.07735,-0.251097,0.115139,-0.485254
pctGradPlus,-0.531119,-0.725833,0.84596,-0.521287,-0.526781,1.0,0.538701,0.36949,-0.36949,0.51153,-0.51153,0.442343,-0.442343,0.258362,-0.229895,0.881395
pctInLaborForce,-0.245193,-0.576897,0.677671,-0.390142,-0.292324,0.538701,1.0,0.45196,-0.45196,0.582327,-0.582327,0.604844,-0.604844,-0.112872,-0.203314,0.538522
pctEmployed,-0.502975,-0.217133,0.471912,0.017666,-0.300941,0.36949,0.45196,1.0,-1.0,0.432663,-0.432663,0.2635,-0.2635,-0.287001,-0.311713,0.4444
pctUnemployed,0.502975,0.217133,-0.471912,-0.017666,0.300941,-0.36949,-0.45196,-1.0,1.0,-0.432663,0.432663,-0.2635,0.2635,0.287001,0.311713,-0.4444
pctHasTech,-0.345541,-0.709414,0.665961,-0.06008,0.022361,0.51153,0.582327,0.432663,-0.432663,1.0,-1.0,0.899346,-0.899346,-0.234855,-0.163076,0.578956


# Poverty

In [45]:
povertyVars = c.getVariablesByGroup(c.groups.PovertyStatusInThePast12MonthsByAge.code)
filteredVars = [var['code'] for var in povertyVars.to_dict('records') if var['name'].startswith('Estimate')]
povertyStats = c.getStats(filteredVars, *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [46]:
povertyPcts = povertyStats.loc[:,['NAME', 'state', 'congressional district']]

povertyPcts['atOrAbove'] = (povertyStats.Estimate_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel /
             povertyStats.Estimate_Total)

povertyPcts['below'] = (povertyStats.Estimate_Total_IncomeInThePast12MonthsBelowPovertyLevel /
                       povertyStats.Estimate_Total)

In [47]:
povertyPcts.describe()

Unnamed: 0,atOrAbove,below
count,437.0,437.0
mean,0.875172,0.124828
std,0.047464,0.047464
min,0.56529,0.03463
25%,0.852099,0.091802
50%,0.881482,0.118518
75%,0.908198,0.147901
max,0.96537,0.43471


In [48]:
geoCodes = c.getGeographyCodes(GeoDomain('state'))

In [49]:
geoCodes[geoCodes['NAME'] == 'New York']

Unnamed: 0,NAME,state
32,New York,36


In [50]:
congDistCodes = c.getGeographyCodes(GeoDomain('congressional district'),
                                   GeoDomain('state', '36'))

In [51]:
povertyPcts[(povertyPcts.state == '36') & (povertyPcts['congressional district'] == '14')]

Unnamed: 0,NAME,state,congressional district,atOrAbove,below
334,"Congressional District 14 (116th Congress), New York",36,14,0.871396,0.128604


In [52]:
blackPovVars = c.getVariablesByGroup(c.groups.PovertyStatusInThePast12MonthsByAgeBlackOrAfricanAmericanAlone.code)
blackPovStats = c.getStats(blackPovVars['code'].tolist(), *geoDomains)

  0%|          | 0/1 [00:00<?, ?it/s]

|          | 0/? [00:00<?, ?it/s]

In [53]:
blackPovStats

Unnamed: 0,NAME,congressional district,state,Estimate_Total,AnnotationOfEstimate_Total,MarginOfError_Total,AnnotationOfMarginOfError_Total,Estimate_Total_IncomeInThePast12MonthsBelowPovertyLevel,AnnotationOfEstimate_Total_IncomeInThePast12MonthsBelowPovertyLevel,MarginOfError_Total_IncomeInThePast12MonthsBelowPovertyLevel,...,MarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_60To74Years,AnnotationOfMarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_60To74Years,Estimate_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_75To84Years,AnnotationOfEstimate_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_75To84Years,MarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_75To84Years,AnnotationOfMarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_75To84Years,Estimate_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_85YearsAndOver,AnnotationOfEstimate_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_85YearsAndOver,MarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_85YearsAndOver,AnnotationOfMarginOfError_Total_IncomeInThePast12MonthsAtOrAbovePovertyLevel_85YearsAndOver
0,"Congressional District (at Large) (116th Congress), Alaska",00,02,20213.0,,1817.0,,3107.0,,2003.0,...,836.0,,465.0,,369.0,,0.0,,163.0,
1,"Congressional District (at Large) (116th Congress), Delaware",00,10,209888.0,,4154.0,,36392.0,,5758.0,...,1984.0,,6485.0,,1160.0,,2124.0,,849.0,
2,"Congressional District (at Large) (116th Congress), Montana",00,30,,,,,,,,...,,,,,,,,,,
3,"Congressional District (at Large) (116th Congress), North Dakota",00,38,,,,,,,,...,,,,,,,,,,
4,"Congressional District (at Large) (116th Congress), South Dakota",00,46,19537.0,,2167.0,,3891.0,,2675.0,...,442.0,,284.0,,292.0,,73.0,,120.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,"Congressional District 51 (116th Congress), California",51,06,41515.0,,7317.0,,9697.0,,3492.0,...,1462.0,,1522.0,,645.0,,408.0,,315.0,
433,"Congressional District 52 (116th Congress), California",52,06,20868.0,,4958.0,,2137.0,,933.0,...,691.0,,274.0,,241.0,,20.0,,34.0,
434,"Congressional District 53 (116th Congress), California",53,06,60956.0,,6895.0,,13268.0,,5016.0,...,1442.0,,2095.0,,849.0,,295.0,,269.0,
435,"Delegate District (at Large) (116th Congress), District of Columbia",98,11,310435.0,,4883.0,,67097.0,,7197.0,...,2574.0,,12551.0,,1335.0,,5276.0,,1103.0,
