## This notebook handles mapping srprec to census tract, and then adding on census data

In [1]:
# imports
import pandas as pd
import numpy as np
import requests
import pickle as pkl
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

### Converter file

In [3]:
convert18 = pd.read_pickle('./data/df_convert18.pkl')

In [4]:
convert18.head(3)

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract
0,0,7701,110304,100.0
1,0,75702,75809,97.095134
2,1,75702,75814,2.904866


In [5]:
convert18.shape

(2011, 4)

### Merge the full dataframes
#### Steps:
1. Attach census data on 'tract', apply weights by 'tract', aggregate to 'srprec'.
2. Attach vote data on 'srprec'

#### Step 1:  Attach census data on 'tract', apply weights by 'tract', aggregate to 'srprec'.

#### Census data set

In [6]:
census18_DP03 = pd.read_pickle('./census_data/DP03_subset.pkl')
census18_DP03.shape

(583, 12)

In [8]:
census18_DP03.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 583 entries, 1 to 583
Data columns (total 12 columns):
 #   Column                                                                                                                           Non-Null Count  Dtype 
---  ------                                                                                                                           --------------  ----- 
 0   Geographic Area Name                                                                                                             583 non-null    object
 1   Geographic Area Name                                                                                                             583 non-null    object
 2   GEO_ID                                                                                                                           583 non-null    object
 3   tract                                                                                                    

In [11]:
census18_DP03.drop(['Geographic Area Name', 'GEO_ID'], axis=1, inplace=True)

In [12]:

census18_DP03.head(3)

Unnamed: 0,tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars)
1,62644,6767,3967,2800,2547,262,819,91,146953
2,62640,3062,2446,616,1202,287,563,63,84632
3,63008,833,431,402,255,22,104,0,100396


In [13]:
census18_DP03.shape

(583, 9)

#### Merge census data into convert18 df on 'tract' to create combo df

In [15]:
# pd.merge(left=c, right=b, how='outer', left_on=['tract'], right_on=['tract'], indicator=True, suffixes=('_c', '_b'))
combo = pd.merge(left=convert18, right=census18_DP03, how='outer', left_on=['tract'], right_on=['tract'], indicator=True)

In [16]:
combo.head(10)

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),_merge
0,0.0,7701.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both
1,0.0,7140.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both
2,0.0,7138.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both
3,0.0,11075.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both
4,2.0,11043.0,110304,25.630594,4243,2801,1442,875,621,697,195,105746,both
5,0.0,75702.0,75809,97.095134,2486,1295,1191,725,133,238,71,165278,both
6,0.0,75116.0,75809,70.290635,2486,1295,1191,725,133,238,71,165278,both
7,0.0,75110.0,75809,16.946309,2486,1295,1191,725,133,238,71,165278,both
8,0.0,63710.0,75809,4.524089,2486,1295,1191,725,133,238,71,165278,both
9,1.0,75702.0,75814,2.904866,2397,1681,716,890,199,388,0,130438,both


In [18]:
combo.shape

(2012, 13)

#### Inspect resulting dataframe:  'srprec' with multiple tracts, nulls

In [19]:
#stackoverflow:  https://stackoverflow.com/questions/14247586/how-to-select-rows-with-one-or-more-nulls-from-a-pandas-dataframe-without-listin
combo[pd.isnull(combo).any(axis=1)]

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),_merge
2011,,,990100,,0,0,0,0,0,0,0,-666666666,right_only


In [27]:
combo.loc[combo['srprec_orig'] == 14082]

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),_merge
1567,0.0,14082.0,88101,0.127389,1737,1185,552,356,275,306,98,74665,both
1878,1.0,14082.0,110003,99.872611,2487,1557,930,731,116,372,150,86176,both


In [29]:
combo.loc[combo['_merge'] == 'right_only']

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),_merge
2011,,,990100,,0,0,0,0,0,0,0,-666666666,right_only


In [30]:
combo.drop(index=2011, inplace=True)

In [43]:
combo['pctsrprec_tract'] = round(combo['pctsrprec_tract'], 2)

In [44]:
combo.head(10)

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),_merge,test,hh_me_inc_wgt
0,0.0,7701.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0
1,0.0,7140.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0
2,0.0,7138.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0
3,0.0,11075.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0
4,2.0,11043.0,110304,25.63,4243,2801,1442,875,621,697,195,105746,both,27103.33,27102.7
5,0.0,75702.0,75809,97.1,2486,1295,1191,725,133,238,71,165278,both,160476.9,160484.94
6,0.0,75116.0,75809,70.29,2486,1295,1191,725,133,238,71,165278,both,116174.96,116173.91
7,0.0,75110.0,75809,16.95,2486,1295,1191,725,133,238,71,165278,both,28008.52,28014.62
8,0.0,63710.0,75809,4.52,2486,1295,1191,725,133,238,71,165278,both,7477.32,7470.57
9,1.0,75702.0,75814,2.9,2397,1681,716,890,199,388,0,130438,both,3789.05,3782.7


#### Calculate weighted values for each tract

In [64]:
combo['hh_med_inc_wgt'] = round((combo['pctsrprec_tract']/100 * 
                      combo['Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars)'])
                      , 2)

In [65]:
combo.head(10)

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),prec_merge,test,hh_me_inc_wgt,hh_med_inc_wgt
0,0.0,7701.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0,105746.0
1,0.0,7140.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0,105746.0
2,0.0,7138.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0,105746.0
3,0.0,11075.0,110304,100.0,4243,2801,1442,875,621,697,195,105746,both,105746.0,105746.0,105746.0
4,2.0,11043.0,110304,25.63,4243,2801,1442,875,621,697,195,105746,both,27103.33,27102.7,27102.7
5,0.0,75702.0,75809,97.1,2486,1295,1191,725,133,238,71,165278,both,160476.9,160484.94,160484.94
6,0.0,75116.0,75809,70.29,2486,1295,1191,725,133,238,71,165278,both,116174.96,116173.91,116173.91
7,0.0,75110.0,75809,16.95,2486,1295,1191,725,133,238,71,165278,both,28008.52,28014.62,28014.62
8,0.0,63710.0,75809,4.52,2486,1295,1191,725,133,238,71,165278,both,7477.32,7470.57,7470.57
9,1.0,75702.0,75814,2.9,2397,1681,716,890,199,388,0,130438,both,3789.05,3782.7,3782.7


In [66]:
combo.loc[combo['srprec_orig'] == 75116]

Unnamed: 0,subindex,srprec_orig,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),prec_merge,test,hh_me_inc_wgt,hh_med_inc_wgt
6,0.0,75116.0,75809,70.29,2486,1295,1191,725,133,238,71,165278,both,116174.96,116173.91,116173.91
10,4.0,75116.0,75814,0.43,2397,1681,716,890,199,388,0,130438,both,561.63,560.88,560.88
18,1.0,75116.0,75810,21.1,2621,1260,1361,615,91,387,87,122188,both,25779.17,25781.67,25781.67
23,2.0,75116.0,75811,6.24,2571,1665,906,514,357,291,201,71402,both,4457.82,4455.48,4455.48
26,3.0,75116.0,75813,1.94,4234,2642,1592,1146,390,673,103,121667,both,2357.38,2360.34,2360.34


#### Aggregate results to 'srprec'

In [67]:
srprec_agg = combo.groupby('srprec_orig').sum()
srprec_agg

Unnamed: 0_level_0,subindex,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),test,hh_me_inc_wgt,hh_med_inc_wgt
srprec_orig,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2001.0,3.0,184605,100.0,11832,7801,4031,847,1765,1960,1061,158776,54480.52,54480.48,54480.48
2002.0,0.0,11720,100.0,4632,3107,1525,363,573,383,569,44136,44136.00,44136.00,44136.00
2008.0,0.0,86702,100.0,5664,4226,1438,675,796,1064,368,66970,66970.00,66970.00,66970.00
2009.0,3.0,284005,100.0,17313,9816,7497,2179,2147,2757,843,162333,54967.18,54967.05,54967.05
2011.0,0.0,21905,100.0,4466,3065,1401,1364,331,1011,151,119595,119595.00,119595.00,119595.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75116.0,10.0,379057,100.0,14309,8543,5766,3890,1170,1977,462,610973,149330.96,149332.28,149332.28
75117.0,0.0,75810,100.0,2621,1260,1361,615,91,387,87,122188,122188.00,122188.00,122188.00
75122.0,0.0,75810,100.0,2621,1260,1361,615,91,387,87,122188,122188.00,122188.00,122188.00
75701.0,0.0,75810,100.0,2621,1260,1361,615,91,387,87,122188,122188.00,122188.00,122188.00


In [68]:
srprec_agg.reset_index(inplace=True)

In [69]:
srprec_agg.head()

Unnamed: 0,srprec_orig,subindex,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),test,hh_me_inc_wgt,hh_med_inc_wgt
0,2001.0,3.0,184605,100.0,11832,7801,4031,847,1765,1960,1061,158776,54480.52,54480.48,54480.48
1,2002.0,0.0,11720,100.0,4632,3107,1525,363,573,383,569,44136,44136.0,44136.0,44136.0
2,2008.0,0.0,86702,100.0,5664,4226,1438,675,796,1064,368,66970,66970.0,66970.0,66970.0
3,2009.0,3.0,284005,100.0,17313,9816,7497,2179,2147,2757,843,162333,54967.18,54967.05,54967.05
4,2011.0,0.0,21905,100.0,4466,3065,1401,1364,331,1011,151,119595,119595.0,119595.0,119595.0


#### Inspect

In [70]:
srprec_agg.loc[srprec_agg['srprec_orig'] == 75116]

Unnamed: 0,srprec_orig,subindex,tract,pctsrprec_tract,Estimate EMPLOYMENT STATUS Population 16 years and over,Estimate EMPLOYMENT STATUS Population 16 years and over In labor force,Estimate EMPLOYMENT STATUS Population 16 years and over Not in labor force,"Estimate OCCUPATION Civilian employed population 16 years and over Management, business, science, and arts occupations",Estimate OCCUPATION Civilian employed population 16 years and over Service occupations,Estimate OCCUPATION Civilian employed population 16 years and over Sales and office occupations,"Estimate OCCUPATION Civilian employed population 16 years and over Natural resources, construction, and maintenance occupations",Estimate INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS) Total households Median household income (dollars),test,hh_me_inc_wgt,hh_med_inc_wgt
1329,75116.0,10.0,379057,100.0,14309,8543,5766,3890,1170,1977,462,610973,149330.96,149332.28,149332.28


#### **DROP TRACT DETAIL**

In [86]:
# cols that don't make sense in aggregate:
# subindex
# tract
# test


In [87]:
srprec_census = srprec_agg[['srprec_orig', 'subindex', 'pctsrprec_tract', 'hh_med_inc_wgt']]
srprec_census

Unnamed: 0,srprec_orig,subindex,pctsrprec_tract,hh_med_inc_wgt
0,2001.0,3.0,100.0,54480.48
1,2002.0,0.0,100.0,44136.00
2,2008.0,0.0,100.0,66970.00
3,2009.0,3.0,100.0,54967.05
4,2011.0,0.0,100.0,119595.00
...,...,...,...,...
1329,75116.0,10.0,100.0,149332.28
1330,75117.0,0.0,100.0,122188.00
1331,75122.0,0.0,100.0,122188.00
1332,75701.0,0.0,100.0,122188.00


In [88]:
srprec_census.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1334 entries, 0 to 1333
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   srprec_orig      1334 non-null   float64
 1   subindex         1334 non-null   float64
 2   pctsrprec_tract  1334 non-null   float64
 3   hh_med_inc_wgt   1334 non-null   float64
dtypes: float64(4)
memory usage: 41.8 KB


### Merge in Vote data on 'srprec'

In [89]:
#rename traceability column in advance of second merge
#should be dropped already by .groupby() aggregation step, but a reminder just in case:

# df.rename(columns={'_merge':'prec_merge'}, inplace=True)

#### Vote dataset

In [90]:
vote18 = pd.read_pickle('./data/trend18.pkl')
vote18.shape

(1334, 26)

In [98]:
vote18['srprec'] = vote18['srprec'].astype('int64')

In [99]:
vote18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1334 entries, 0 to 1545
Data columns (total 26 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   county    1334 non-null   int64  
 1   srprec    1334 non-null   int64  
 2   cddist    1334 non-null   int64  
 3   TOTREG    1334 non-null   int64  
 4   TOTVOTE   1334 non-null   int64  
 5   CNGDEM01  1334 non-null   int64  
 6   CNGREP01  1334 non-null   int64  
 7   election  1334 non-null   object 
 8   type      1334 non-null   object 
 9   totreg_r  1334 non-null   float64
 10  dem       1334 non-null   float64
 11  rep       1334 non-null   float64
 12  aip       1334 non-null   float64
 13  paf       1334 non-null   float64
 14  msc       1334 non-null   float64
 15  lib       1334 non-null   float64
 16  nlp       1334 non-null   float64
 17  grn       1334 non-null   float64
 18  ref       1334 non-null   float64
 19  dcl       1334 non-null   float64
 20  male      1334 non-null   floa

In [92]:
vote18.head(3)

Unnamed: 0,county,srprec,cddist,TOTREG,TOTVOTE,CNGDEM01,CNGREP01,election,type,totreg_r,...,nlp,grn,ref,dcl,male,female,hispdem,hisprep,hispdcl,hispoth
0,30,10316,47,1735,1278,630,592,g18,V,1238.0,...,0.0,2.0,0.0,265.0,582.0,656.0,55.0,43.0,30.0,6.0
1,30,10317,47,2079,1214,638,513,g18,V,1184.0,...,0.0,4.0,0.0,286.0,588.0,596.0,165.0,60.0,71.0,10.0
2,30,10319,47,1448,999,495,462,g18,V,945.0,...,0.0,2.0,2.0,256.0,443.0,502.0,68.0,37.0,49.0,5.0


#### Merge vote data into srprec_census df on 'srprec'

In [100]:
combo = pd.merge(left=srprec_census, right=vote18, how='outer', left_on=['srprec_orig'], right_on=['srprec'], indicator=True)
combo.head()

Unnamed: 0,srprec_orig,subindex,pctsrprec_tract,hh_med_inc_wgt,county,srprec,cddist,TOTREG,TOTVOTE,CNGDEM01,...,grn,ref,dcl,male,female,hispdem,hisprep,hispdcl,hispoth,_merge
0,2001.0,3.0,100.0,54480.48,30,2001,46,684,381,249,...,4.0,1.0,83.0,173.0,198.0,106.0,20.0,56.0,7.0,both
1,2002.0,0.0,100.0,44136.0,30,2002,46,295,123,100,...,0.0,0.0,39.0,46.0,63.0,52.0,0.0,23.0,0.0,both
2,2008.0,0.0,100.0,66970.0,30,2008,46,2341,1295,929,...,5.0,0.0,301.0,574.0,623.0,376.0,60.0,171.0,12.0,both
3,2009.0,3.0,100.0,54967.05,30,2009,46,1450,772,515,...,1.0,0.0,160.0,359.0,376.0,169.0,39.0,54.0,1.0,both
4,2011.0,0.0,100.0,119595.0,30,2011,45,1113,829,368,...,2.0,0.0,167.0,415.0,390.0,59.0,29.0,30.0,5.0,both


In [101]:
combo.shape

(1334, 31)

#### compare conversion fields

In [98]:
data18[['srprec', 'cddist']].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1334 entries, 0 to 1545
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   srprec  1334 non-null   Int64
 1   cddist  1334 non-null   int64
dtypes: Int64(1), int64(1)
memory usage: 32.6 KB


In [100]:
convert18[['srprec', 'tract', 'block']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22985 entries, 0 to 22984
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   srprec  22984 non-null  float64
 1   tract   22985 non-null  int64  
 2   block   22985 non-null  int64  
dtypes: float64(1), int64(2)
memory usage: 538.8 KB


In [104]:
dp03_18sub[['tract']].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 583 entries, 1 to 583
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tract   583 non-null    object
dtypes: object(1)
memory usage: 9.1+ KB
