In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy as scp
from scipy import stats
import sklearn
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels.formula.api as smf
import re
from functools import reduce

In [2]:
pd.options.display.max_columns = 150

## bring in datasets

In [3]:
df_inc = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/2017_block_income_ann.csv')
df_pop = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/2017_block_pop_with_ann.csv')
df_land_area = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/2010_tract_landarea_ann.csv')
df_medinc = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/ACS_17_5YR_S1901_with_ann.csv')
df_employment = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/employment_final.csv', dtype={'tract':str})
df_owner_renter = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/owner_renter_final.csv', dtype={'tract':str})
df_vacancy = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/Census/vacancy_final.csv', dtype={'tract':str})
df = pd.read_csv('~/Dropbox/CDS-2019-AlbanyHub/ToDatabase/TotalHouse.csv')

## block group level household income clean-up

In [4]:
df_inc.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HD01_VD01,HD02_VD01,HD01_VD02,HD02_VD02,HD01_VD03,HD02_VD03,HD01_VD04,HD02_VD04,HD01_VD05,HD02_VD05,HD01_VD06,HD02_VD06,HD01_VD07,HD02_VD07,HD01_VD08,HD02_VD08,HD01_VD09,HD02_VD09,HD01_VD10,HD02_VD10,HD01_VD11,HD02_VD11,HD01_VD12,HD02_VD12,HD01_VD13,HD02_VD13,HD01_VD14,HD02_VD14,HD01_VD15,HD02_VD15,HD01_VD16,HD02_VD16,HD01_VD17,HD02_VD17
0,Id,Id2,Geography,Estimate; Total:,Margin of Error; Total:,"Estimate; Total: - Less than $10,000","Margin of Error; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Margin of Error; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Margin of Error; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Margin of Error; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Margin of Error; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Margin of Error; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Margin of Error; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Margin of Error; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Margin of Error; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Margin of Error; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Margin of Error; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Margin of Error; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Margin of Error; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Margin of Error; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Margin of Error; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more","Margin of Error; Total: - $200,000 or more"
1,1500000US130079601001,130079601001,"Block Group 1, Census Tract 9601, Baker County...",378,77,88,38,47,29,6,10,9,14,18,16,28,28,7,13,14,18,16,17,22,18,40,32,44,36,22,20,10,14,7,11,0,13
2,1500000US130079601002,130079601002,"Block Group 2, Census Tract 9601, Baker County...",426,71,88,59,7,10,7,11,33,25,8,10,7,9,9,10,44,41,14,21,79,46,53,39,12,14,45,36,13,17,3,5,4,6
3,1500000US130079602001,130079602001,"Block Group 1, Census Tract 9602, Baker County...",224,63,0,13,7,10,7,10,27,26,13,18,26,28,0,13,5,8,25,39,36,34,27,21,30,27,0,13,0,13,0,13,21,20
4,1500000US130079602002,130079602002,"Block Group 2, Census Tract 9602, Baker County...",293,64,55,48,0,13,10,11,23,23,44,39,24,22,14,15,0,13,18,33,30,32,5,8,36,25,0,13,0,13,34,30,0,13


In [5]:
new = df_inc["GEO.display-label"].str.split(", ", expand = True)
df_inc["block"] = new[0] 
df_inc["tract"] = new[1] 
df_inc["county"] = new[2] 

In [6]:
df_inc = df_inc.drop('GEO.display-label', axis = 1)

In [7]:
l = len(df_inc['tract'])
df_inc1 = df_inc.copy()
re_string = 'Census Tract (\d+(?:.\d+)?)'
for i in range(1,l):
    s = df_inc.loc[i, 'tract']
    m = re.search(re_string, s)
    df_inc1.loc[i, 'tract'] = m.group(1)

In [8]:
l = len(df_inc['block'])
re_string = 'Block Group (\d+(?:.\d+)?)'
for i in range(1,l):
    s = df_inc.loc[i, 'block']
    m = re.search(re_string, s)
    df_inc1.loc[i, 'block'] = m.group(1)

In [9]:
tract_series = df_inc1['tract']
l = len(df_inc1['tract'])
for i in range(1,l):
    if '.' in tract_series[i]:
        s = tract_series[i].split('.')
        if len(s[0]) == 1:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 2:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 3:
            tract_series[i] = s[0] + s[1]
    else:
        if len(tract_series[i]) == 3:
            tract_series[i] = df_inc1['tract'][i] + '00'
        elif len(tract_series[i]) == 2:
            tract_series[i] = df_inc1['tract'][i] + '00'
        elif len(tract_series[i]) == 1:
            tract_series[i] = df_inc1['tract'][i] + '00'
        elif len(tract_series[i]) == 4:
            tract_series[i] = df_inc1['tract'][i] + '00'

In [10]:
df_inc1.head()

Unnamed: 0,GEO.id,GEO.id2,HD01_VD01,HD02_VD01,HD01_VD02,HD02_VD02,HD01_VD03,HD02_VD03,HD01_VD04,HD02_VD04,HD01_VD05,HD02_VD05,HD01_VD06,HD02_VD06,HD01_VD07,HD02_VD07,HD01_VD08,HD02_VD08,HD01_VD09,HD02_VD09,HD01_VD10,HD02_VD10,HD01_VD11,HD02_VD11,HD01_VD12,HD02_VD12,HD01_VD13,HD02_VD13,HD01_VD14,HD02_VD14,HD01_VD15,HD02_VD15,HD01_VD16,HD02_VD16,HD01_VD17,HD02_VD17,block,tract,county
0,Id,Id2,Estimate; Total:,Margin of Error; Total:,"Estimate; Total: - Less than $10,000","Margin of Error; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Margin of Error; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Margin of Error; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Margin of Error; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Margin of Error; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Margin of Error; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Margin of Error; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Margin of Error; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Margin of Error; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Margin of Error; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Margin of Error; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Margin of Error; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Margin of Error; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Margin of Error; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Margin of Error; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more","Margin of Error; Total: - $200,000 or more",Geography,,
1,1500000US130079601001,130079601001,378,77,88,38,47,29,6,10,9,14,18,16,28,28,7,13,14,18,16,17,22,18,40,32,44,36,22,20,10,14,7,11,0,13,1,960100.0,Baker County
2,1500000US130079601002,130079601002,426,71,88,59,7,10,7,11,33,25,8,10,7,9,9,10,44,41,14,21,79,46,53,39,12,14,45,36,13,17,3,5,4,6,2,960100.0,Baker County
3,1500000US130079602001,130079602001,224,63,0,13,7,10,7,10,27,26,13,18,26,28,0,13,5,8,25,39,36,34,27,21,30,27,0,13,0,13,0,13,21,20,1,960200.0,Baker County
4,1500000US130079602002,130079602002,293,64,55,48,0,13,10,11,23,23,44,39,24,22,14,15,0,13,18,33,30,32,5,8,36,25,0,13,0,13,34,30,0,13,2,960200.0,Baker County


## block group level population clean-up

In [11]:
df_pop.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HD01_VD01,HD02_VD01
0,Id,Id2,Geography,Estimate; Total,Margin of Error; Total
1,1500000US130079601001,130079601001,"Block Group 1, Census Tract 9601, Baker County...",869,181
2,1500000US130079601002,130079601002,"Block Group 2, Census Tract 9601, Baker County...",1247,223
3,1500000US130079602001,130079602001,"Block Group 1, Census Tract 9602, Baker County...",487,149
4,1500000US130079602002,130079602002,"Block Group 2, Census Tract 9602, Baker County...",648,120


In [12]:
new1 = df_pop["GEO.display-label"].str.split(", ", expand = True)
df_pop["block"] = new1[0] 
df_pop["tract"] = new1[1] 
df_pop["county"] = new1[2] 

In [13]:
df_pop = df_pop.drop('GEO.display-label', axis = 1)

In [14]:
l = len(df_pop['tract'])
df_pop1 = df_pop.copy()
re_string = 'Census Tract (\d+(?:.\d+)?)'
for i in range(1,l):
    s = df_pop.loc[i, 'tract']
    m = re.search(re_string, s)
    df_pop1.loc[i, 'tract'] = m.group(1)

In [15]:
l = len(df_pop['block'])
re_string = 'Block Group (\d+(?:.\d+)?)'
for i in range(1,l):
    s = df_pop.loc[i, 'block']
    m = re.search(re_string, s)
    df_pop1.loc[i, 'block'] = m.group(1)

In [16]:
tract_series = df_pop1['tract']
l = len(df_pop1['tract'])
for i in range(1,l):
    if '.' in tract_series[i]:
        s = tract_series[i].split('.')
        if len(s[0]) == 1:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 2:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 3:
            tract_series[i] = s[0] + s[1]
    else:
        if len(tract_series[i]) == 3:
            tract_series[i] = df_pop1['tract'][i] + '00'
        elif len(tract_series[i]) == 2:
            tract_series[i] = df_pop1['tract'][i] + '00'
        elif len(tract_series[i]) == 1:
            tract_series[i] = df_pop1['tract'][i] + '00'
        elif len(tract_series[i]) == 4:
            tract_series[i] = df_pop1['tract'][i] + '00'

In [17]:
df_pop1.head()

Unnamed: 0,GEO.id,GEO.id2,HD01_VD01,HD02_VD01,block,tract,county
0,Id,Id2,Estimate; Total,Margin of Error; Total,Geography,,
1,1500000US130079601001,130079601001,869,181,1,960100.0,Baker County
2,1500000US130079601002,130079601002,1247,223,2,960100.0,Baker County
3,1500000US130079602001,130079602001,487,149,1,960200.0,Baker County
4,1500000US130079602002,130079602002,648,120,2,960200.0,Baker County


## tract land area clean-up

In [18]:
df_land_area.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,GCT_STUB.target-geo-id,GCT_STUB.target-geo-id2,GCT_STUB.display-label,GCT_STUB.display-label.1,HD01,HD02,SUBHD0301,SUBHD0302,SUBHD0303,SUBHD0401,SUBHD0402
0,Id,Id2,Geography,Target Geo Id,Target Geo Id2,Geographic area,Geographic area,Population,Housing units,Area in square miles - Total area,Area in square miles - Water area,Area in square miles - Land area,Density per square mile of land area - Population,Density per square mile of land area - Housing...
1,0500000US13095,13095,"Dougherty County, Georgia",0500000US13095,13095,Dougherty County,Dougherty County,94565,40801,334.63,5.94,328.69,287.7,124.1
2,0500000US13095,13095,"Dougherty County, Georgia",1400000US13095000100,13095000100,Dougherty County - Census Tract 1,Census Tract 1,5944,2545,3.17,0.02,3.15,1886.7,807.8
3,0500000US13095,13095,"Dougherty County, Georgia",1400000US13095000200,13095000200,Dougherty County - Census Tract 2,Census Tract 2,3123,1348,1.91,0.04,1.87,1667.0,719.5
4,0500000US13095,13095,"Dougherty County, Georgia",1400000US13095000400,13095000400,Dougherty County - Census Tract 4,Census Tract 4,6276,3302,2.90,0.05,2.84,2206.7,1161.0


In [19]:
new2 = df_land_area["GEO.display-label"].str.split(", ", expand = True)
df_land_area["county"] = new2[0] 

In [20]:
df_land_area = df_land_area.drop('GEO.display-label', axis = 1)

In [21]:
df_land_area['tract'] = df_land_area['GCT_STUB.display-label.1'] 
df_land_area = df_land_area.drop('GCT_STUB.display-label.1', axis=1)

In [22]:
df_land_area.head()

Unnamed: 0,GEO.id,GEO.id2,GCT_STUB.target-geo-id,GCT_STUB.target-geo-id2,GCT_STUB.display-label,HD01,HD02,SUBHD0301,SUBHD0302,SUBHD0303,SUBHD0401,SUBHD0402,county,tract
0,Id,Id2,Target Geo Id,Target Geo Id2,Geographic area,Population,Housing units,Area in square miles - Total area,Area in square miles - Water area,Area in square miles - Land area,Density per square mile of land area - Population,Density per square mile of land area - Housing...,Geography,Geographic area
1,0500000US13095,13095,0500000US13095,13095,Dougherty County,94565,40801,334.63,5.94,328.69,287.7,124.1,Dougherty County,Dougherty County
2,0500000US13095,13095,1400000US13095000100,13095000100,Dougherty County - Census Tract 1,5944,2545,3.17,0.02,3.15,1886.7,807.8,Dougherty County,Census Tract 1
3,0500000US13095,13095,1400000US13095000200,13095000200,Dougherty County - Census Tract 2,3123,1348,1.91,0.04,1.87,1667.0,719.5,Dougherty County,Census Tract 2
4,0500000US13095,13095,1400000US13095000400,13095000400,Dougherty County - Census Tract 4,6276,3302,2.90,0.05,2.84,2206.7,1161.0,Dougherty County,Census Tract 4


In [23]:
type(df_land_area['tract'][0])

str

In [24]:
l = len(df_land_area['tract'])
df_land_area1 = df_land_area.copy()
re_string = 'Census Tract (\d+(?:.\d+)?)'
for i in range(2,l):
    s = df_land_area.loc[i, 'tract']
    m = re.search(re_string, s)
    df_land_area1.loc[i, 'tract'] = m.group(1)

In [25]:
tract_series = df_land_area1['tract']
l = len(df_land_area1['tract'])
for i in range(1,l):
    if '.' in tract_series[i]:
        s = tract_series[i].split('.')
        if len(s[0]) == 1:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 2:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 3:
            tract_series[i] = s[0] + s[1]
    else:
        if len(tract_series[i]) == 3:
            tract_series[i] = df_land_area1['tract'][i] + '00'
        elif len(tract_series[i]) == 2:
            tract_series[i] = df_land_area1['tract'][i] + '00'
        elif len(tract_series[i]) == 1:
            tract_series[i] = df_land_area1['tract'][i] + '00'
        elif len(tract_series[i]) == 4:
            tract_series[i] = df_land_area1['tract'][i] + '00'

In [26]:
df_land_area1.head()

Unnamed: 0,GEO.id,GEO.id2,GCT_STUB.target-geo-id,GCT_STUB.target-geo-id2,GCT_STUB.display-label,HD01,HD02,SUBHD0301,SUBHD0302,SUBHD0303,SUBHD0401,SUBHD0402,county,tract
0,Id,Id2,Target Geo Id,Target Geo Id2,Geographic area,Population,Housing units,Area in square miles - Total area,Area in square miles - Water area,Area in square miles - Land area,Density per square mile of land area - Population,Density per square mile of land area - Housing...,Geography,Geographic area
1,0500000US13095,13095,0500000US13095,13095,Dougherty County,94565,40801,334.63,5.94,328.69,287.7,124.1,Dougherty County,Dougherty County
2,0500000US13095,13095,1400000US13095000100,13095000100,Dougherty County - Census Tract 1,5944,2545,3.17,0.02,3.15,1886.7,807.8,Dougherty County,100
3,0500000US13095,13095,1400000US13095000200,13095000200,Dougherty County - Census Tract 2,3123,1348,1.91,0.04,1.87,1667.0,719.5,Dougherty County,200
4,0500000US13095,13095,1400000US13095000400,13095000400,Dougherty County - Census Tract 4,6276,3302,2.90,0.05,2.84,2206.7,1161.0,Dougherty County,400


## new column for merge

In [27]:
df_inc1['block_tract'] = df_inc1['block'] + '_' + df_inc1['tract']
df_inc1.head()

Unnamed: 0,GEO.id,GEO.id2,HD01_VD01,HD02_VD01,HD01_VD02,HD02_VD02,HD01_VD03,HD02_VD03,HD01_VD04,HD02_VD04,HD01_VD05,HD02_VD05,HD01_VD06,HD02_VD06,HD01_VD07,HD02_VD07,HD01_VD08,HD02_VD08,HD01_VD09,HD02_VD09,HD01_VD10,HD02_VD10,HD01_VD11,HD02_VD11,HD01_VD12,HD02_VD12,HD01_VD13,HD02_VD13,HD01_VD14,HD02_VD14,HD01_VD15,HD02_VD15,HD01_VD16,HD02_VD16,HD01_VD17,HD02_VD17,block,tract,county,block_tract
0,Id,Id2,Estimate; Total:,Margin of Error; Total:,"Estimate; Total: - Less than $10,000","Margin of Error; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Margin of Error; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Margin of Error; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Margin of Error; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Margin of Error; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Margin of Error; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Margin of Error; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Margin of Error; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Margin of Error; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Margin of Error; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Margin of Error; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Margin of Error; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Margin of Error; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Margin of Error; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Margin of Error; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more","Margin of Error; Total: - $200,000 or more",Geography,,,
1,1500000US130079601001,130079601001,378,77,88,38,47,29,6,10,9,14,18,16,28,28,7,13,14,18,16,17,22,18,40,32,44,36,22,20,10,14,7,11,0,13,1,960100.0,Baker County,1_960100
2,1500000US130079601002,130079601002,426,71,88,59,7,10,7,11,33,25,8,10,7,9,9,10,44,41,14,21,79,46,53,39,12,14,45,36,13,17,3,5,4,6,2,960100.0,Baker County,2_960100
3,1500000US130079602001,130079602001,224,63,0,13,7,10,7,10,27,26,13,18,26,28,0,13,5,8,25,39,36,34,27,21,30,27,0,13,0,13,0,13,21,20,1,960200.0,Baker County,1_960200
4,1500000US130079602002,130079602002,293,64,55,48,0,13,10,11,23,23,44,39,24,22,14,15,0,13,18,33,30,32,5,8,36,25,0,13,0,13,34,30,0,13,2,960200.0,Baker County,2_960200


In [28]:
df_pop1['block_tract'] = df_pop1['block'] + '_' + df_pop1['tract']
df_pop1.head()

Unnamed: 0,GEO.id,GEO.id2,HD01_VD01,HD02_VD01,block,tract,county,block_tract
0,Id,Id2,Estimate; Total,Margin of Error; Total,Geography,,,
1,1500000US130079601001,130079601001,869,181,1,960100.0,Baker County,1_960100
2,1500000US130079601002,130079601002,1247,223,2,960100.0,Baker County,2_960100
3,1500000US130079602001,130079602001,487,149,1,960200.0,Baker County,1_960200
4,1500000US130079602002,130079602002,648,120,2,960200.0,Baker County,2_960200


## tract level median household income clean-up

In [29]:
df_medinc.head()

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,HC01_EST_VC01,HC01_MOE_VC01,HC02_EST_VC01,HC02_MOE_VC01,HC03_EST_VC01,HC03_MOE_VC01,HC04_EST_VC01,HC04_MOE_VC01,HC01_EST_VC02,HC01_MOE_VC02,HC02_EST_VC02,HC02_MOE_VC02,HC03_EST_VC02,HC03_MOE_VC02,HC04_EST_VC02,HC04_MOE_VC02,HC01_EST_VC03,HC01_MOE_VC03,HC02_EST_VC03,HC02_MOE_VC03,HC03_EST_VC03,HC03_MOE_VC03,HC04_EST_VC03,HC04_MOE_VC03,HC01_EST_VC04,HC01_MOE_VC04,HC02_EST_VC04,HC02_MOE_VC04,HC03_EST_VC04,HC03_MOE_VC04,HC04_EST_VC04,HC04_MOE_VC04,HC01_EST_VC05,HC01_MOE_VC05,HC02_EST_VC05,HC02_MOE_VC05,HC03_EST_VC05,HC03_MOE_VC05,HC04_EST_VC05,HC04_MOE_VC05,HC01_EST_VC06,HC01_MOE_VC06,HC02_EST_VC06,HC02_MOE_VC06,HC03_EST_VC06,HC03_MOE_VC06,HC04_EST_VC06,HC04_MOE_VC06,HC01_EST_VC07,HC01_MOE_VC07,HC02_EST_VC07,HC02_MOE_VC07,HC03_EST_VC07,HC03_MOE_VC07,HC04_EST_VC07,HC04_MOE_VC07,HC01_EST_VC08,HC01_MOE_VC08,HC02_EST_VC08,HC02_MOE_VC08,HC03_EST_VC08,HC03_MOE_VC08,HC04_EST_VC08,HC04_MOE_VC08,HC01_EST_VC09,HC01_MOE_VC09,HC02_EST_VC09,HC02_MOE_VC09,HC03_EST_VC09,HC03_MOE_VC09,HC04_EST_VC09,HC04_MOE_VC09,HC01_EST_VC10,HC01_MOE_VC10,HC02_EST_VC10,HC02_MOE_VC10,HC03_EST_VC10,HC03_MOE_VC10,HC04_EST_VC10,HC04_MOE_VC10,HC01_EST_VC11,HC01_MOE_VC11,HC02_EST_VC11,HC02_MOE_VC11,HC03_EST_VC11,HC03_MOE_VC11,HC04_EST_VC11,HC04_MOE_VC11,HC01_EST_VC13,HC01_MOE_VC13,HC02_EST_VC13,HC02_MOE_VC13,HC03_EST_VC13,HC03_MOE_VC13,HC04_EST_VC13,HC04_MOE_VC13,HC01_EST_VC15,HC01_MOE_VC15,HC02_EST_VC15,HC02_MOE_VC15,HC03_EST_VC15,HC03_MOE_VC15,HC04_EST_VC15,HC04_MOE_VC15,HC01_EST_VC18,HC01_MOE_VC18,HC02_EST_VC18,HC02_MOE_VC18,HC03_EST_VC18,HC03_MOE_VC18,HC04_EST_VC18,HC04_MOE_VC18,HC01_EST_VC19,HC01_MOE_VC19,HC02_EST_VC19,HC02_MOE_VC19,HC03_EST_VC19,HC03_MOE_VC19,HC04_EST_VC19,HC04_MOE_VC19,HC01_EST_VC20,HC01_MOE_VC20,HC02_EST_VC20,HC02_MOE_VC20,HC03_EST_VC20,HC03_MOE_VC20,HC04_EST_VC20,HC04_MOE_VC20
0,Id,Id2,Geography,Households; Estimate; Total,Households; Margin of Error; Total,Families; Estimate; Total,Families; Margin of Error; Total,Married-couple families; Estimate; Total,Married-couple families; Margin of Error; Total,Nonfamily households; Estimate; Total,Nonfamily households; Margin of Error; Total,"Households; Estimate; Less than $10,000","Households; Margin of Error; Less than $10,000","Families; Estimate; Less than $10,000","Families; Margin of Error; Less than $10,000",Married-couple families; Estimate; Less than $...,Married-couple families; Margin of Error; Less...,"Nonfamily households; Estimate; Less than $10,000",Nonfamily households; Margin of Error; Less th...,"Households; Estimate; $10,000 to $14,999","Households; Margin of Error; $10,000 to $14,999","Families; Estimate; $10,000 to $14,999","Families; Margin of Error; $10,000 to $14,999","Married-couple families; Estimate; $10,000 to ...","Married-couple families; Margin of Error; $10,...","Nonfamily households; Estimate; $10,000 to $14...","Nonfamily households; Margin of Error; $10,000...","Households; Estimate; $15,000 to $24,999","Households; Margin of Error; $15,000 to $24,999","Families; Estimate; $15,000 to $24,999","Families; Margin of Error; $15,000 to $24,999","Married-couple families; Estimate; $15,000 to ...","Married-couple families; Margin of Error; $15,...","Nonfamily households; Estimate; $15,000 to $24...","Nonfamily households; Margin of Error; $15,000...","Households; Estimate; $25,000 to $34,999","Households; Margin of Error; $25,000 to $34,999","Families; Estimate; $25,000 to $34,999","Families; Margin of Error; $25,000 to $34,999","Married-couple families; Estimate; $25,000 to ...","Married-couple families; Margin of Error; $25,...","Nonfamily households; Estimate; $25,000 to $34...","Nonfamily households; Margin of Error; $25,000...","Households; Estimate; $35,000 to $49,999","Households; Margin of Error; $35,000 to $49,999","Families; Estimate; $35,000 to $49,999","Families; Margin of Error; $35,000 to $49,999","Married-couple families; Estimate; $35,000 to ...","Married-couple families; Margin of Error; $35,...","Nonfamily households; Estimate; $35,000 to $49...","Nonfamily households; Margin of Error; $35,000...","Households; Estimate; $50,000 to $74,999","Households; Margin of Error; $50,000 to $74,999","Families; Estimate; $50,000 to $74,999","Families; Margin of Error; $50,000 to $74,999","Married-couple families; Estimate; $50,000 to ...","Married-couple families; Margin of Error; $50,...","Nonfamily households; Estimate; $50,000 to $74...","Nonfamily households; Margin of Error; $50,000...","Households; Estimate; $75,000 to $99,999","Households; Margin of Error; $75,000 to $99,999","Families; Estimate; $75,000 to $99,999","Families; Margin of Error; $75,000 to $99,999","Married-couple families; Estimate; $75,000 to ...","Married-couple families; Margin of Error; $75,...","Nonfamily households; Estimate; $75,000 to $99...","Nonfamily households; Margin of Error; $75,000...","Households; Estimate; $100,000 to $149,999","Households; Margin of Error; $100,000 to $149,999","Families; Estimate; $100,000 to $149,999","Families; Margin of Error; $100,000 to $149,999","Married-couple families; Estimate; $100,000 to...",Married-couple families; Margin of Error; $100...,"Nonfamily households; Estimate; $100,000 to $1...","Nonfamily households; Margin of Error; $100,00...","Households; Estimate; $150,000 to $199,999","Households; Margin of Error; $150,000 to $199,999","Families; Estimate; $150,000 to $199,999","Families; Margin of Error; $150,000 to $199,999","Married-couple families; Estimate; $150,000 to...",Married-couple families; Margin of Error; $150...,"Nonfamily households; Estimate; $150,000 to $1...","Nonfamily households; Margin of Error; $150,00...","Households; Estimate; $200,000 or more","Households; Margin of Error; $200,000 or more","Families; Estimate; $200,000 or more","Families; Margin of Error; $200,000 or more","Married-couple families; Estimate; $200,000 or...",Married-couple families; Margin of Error; $200...,"Nonfamily households; Estimate; $200,000 or more","Nonfamily households; Margin of Error; $200,00...",Households; Estimate; Median income (dollars),Households; Margin of Error; Median income (do...,Families; Estimate; Median income (dollars),Families; Margin of Error; Median income (doll...,Married-couple families; Estimate; Median inco...,Married-couple families; Margin of Error; Medi...,Nonfamily households; Estimate; Median income ...,Nonfamily households; Margin of Error; Median ...,Households; Estimate; Mean income (dollars),Households; Margin of Error; Mean income (doll...,Families; Estimate; Mean income (dollars),Families; Margin of Error; Mean income (dollars),Married-couple families; Estimate; Mean income...,Married-couple families; Margin of Error; Mean...,Nonfamily households; Estimate; Mean income (d...,Nonfamily households; Margin of Error; Mean in...,Households; Estimate; PERCENT ALLOCATED - Hous...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Househ...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...,Households; Estimate; PERCENT ALLOCATED - Fami...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Family...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...,Households; Estimate; PERCENT ALLOCATED - Nonf...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Nonfam...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...
1,1400000US13007960100,13007960100,"Census Tract 9601, Baker County, Georgia",804,99,528,69,345,78,276,94,21.9,8.2,11.0,7.5,10.1,10.2,42.8,16.1,6.7,3.9,7.4,5.1,4.9,4.2,5.4,5.2,6.8,3.5,3.0,3.2,4.6,4.8,14.1,9.2,7.6,4.3,10.4,6.4,7.0,6.1,3.3,4.1,12.9,6.3,12.1,7.1,13.3,10.4,13.4,12.5,24.1,8.4,30.5,10.6,28.4,10.9,13.4,11.7,7.0,4.9,10.6,7.0,16.2,10.5,0.0,12.7,11.2,5.0,12.3,6.1,11.3,8.0,7.6,10.2,1.2,1.5,1.9,2.3,2.9,3.7,0.0,12.7,0.5,0.8,0.8,1.1,1.2,1.7,0.0,12.7,43594,8322,52614,4041,54844,11041,17083,12413,48346,6931,57024,8985,N,N,31070,11981,59.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),58.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),59.8,(X)
2,1400000US13007960200,13007960200,"Census Tract 9602, Baker County, Georgia",517,83,311,66,253,61,206,80,10.6,9.1,1.6,2.7,0.0,13.7,24.3,20.8,1.4,1.9,0.0,11.3,0.0,13.7,3.4,4.9,13.0,7.3,9.3,9.2,11.5,10.9,18.4,14.9,20.7,10.5,7.7,8.0,9.5,9.7,40.3,19.8,12.0,10.2,12.5,12.5,12.3,14.7,11.2,15.1,19.0,10.5,31.5,15.0,32.4,17.6,0.0,16.6,12.8,7.2,19.6,12.2,21.3,14.5,2.4,4.2,0.0,7.0,0.0,11.3,0.0,13.7,0.0,16.6,6.6,5.8,10.9,9.5,4.7,6.9,0.0,16.6,4.1,3.9,6.8,6.4,8.3,7.9,0.0,16.6,45486,18438,54926,22075,52574,17530,28106,11326,58316,13713,80776,20162,N,N,24407,6978,55.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),57.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),52.9,(X)
3,1400000US13095000100,13095000100,"Census Tract 1, Dougherty County, Georgia",1905,220,911,173,244,117,994,210,19.0,6.9,17.9,9.2,0.0,14.2,23.9,10.7,6.3,4.2,3.5,4.5,0.0,14.2,8.9,6.8,19.6,8.1,23.3,12.9,5.3,8.4,19.8,11.7,21.6,8.7,11.0,9.3,5.7,10.0,27.5,14.2,17.4,8.4,15.9,8.6,20.1,20.2,18.7,11.8,9.6,5.6,16.1,9.5,49.2,23.9,0.0,3.7,3.6,3.0,7.5,6.2,19.7,17.5,0.0,3.7,0.6,1.0,0.0,4.0,0.0,14.2,1.2,1.9,2.3,3.5,4.8,7.0,0.0,14.2,0.0,3.7,0.0,1.9,0.0,4.0,0.0,14.2,0.0,3.7,26681,4099,30806,10054,54808,21554,21597,7978,32589,5350,40319,9670,N,N,23157,4282,46.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),38.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),49.4,(X)
4,1400000US13095000200,13095000200,"Census Tract 2, Dougherty County, Georgia",1006,141,643,118,200,90,363,127,25.0,9.4,25.7,11.1,0.0,17.0,26.2,17.2,14.9,6.9,6.7,6.7,0.0,17.0,29.5,15.6,20.6,9.4,20.1,12.4,21.5,18.2,19.0,13.5,18.0,8.7,22.9,11.5,42.0,20.3,9.4,11.8,11.7,7.6,14.8,11.1,16.5,18.1,6.3,8.0,8.0,6.4,7.9,5.9,13.5,15.4,8.0,12.9,1.0,1.8,1.6,2.8,5.0,8.9,0.0,9.8,0.6,1.1,0.0,5.7,0.0,17.0,1.7,3.0,0.0,3.7,0.0,5.7,0.0,17.0,0.0,9.8,0.3,1.0,0.5,1.6,1.5,5.4,0.0,9.8,20789,3843,23750,4682,30571,5690,12351,7963,24533,4489,26745,5892,N,N,20391,6997,45.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),43.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),47.4,(X)


In [30]:
new = df_medinc["GEO.display-label"].str.split(", ", expand = True)
df_medinc["tract"] = new[0] 
df_medinc["county"] = new[1] 

In [31]:
df_medinc = df_medinc.drop('GEO.display-label', axis = 1)

In [32]:
l = len(df_medinc['tract'])
df_medinc1 = df_medinc.copy()
re_string = 'Census Tract (\d+(?:.\d+)?)'
for i in range(1,l):
    s = df_medinc.loc[i, 'tract']
    m = re.search(re_string, s)
    df_medinc1.loc[i, 'tract'] = m.group(1)

In [33]:
tract_series = df_medinc1['tract']
l = len(df_medinc1['tract'])
for i in range(1,l):
    if '.' in tract_series[i]:
        s = tract_series[i].split('.')
        if len(s[0]) == 1:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 2:
            tract_series[i] = s[0] + s[1]
        elif len(s[0]) == 3:
            tract_series[i] = s[0] + s[1]
    else:
        if len(tract_series[i]) == 3:
            tract_series[i] = df_medinc1['tract'][i] + '00'
        elif len(tract_series[i]) == 2:
            tract_series[i] = df_medinc1['tract'][i] + '00'
        elif len(tract_series[i]) == 1:
            tract_series[i] = df_medinc1['tract'][i] + '00'
        elif len(tract_series[i]) == 4:
            tract_series[i] = df_medinc1['tract'][i] + '00'

In [34]:
cols = list(df_medinc1)

cols.insert(132, cols.pop(cols.index('tract')))
df_medinc1 = df_medinc1.loc[:, cols]

len(cols)

132

In [35]:
cols = list(df_land_area1)

cols.insert(14, cols.pop(cols.index('tract')))
df_land_area1 = df_land_area1.loc[:, cols]

#len(cols)
df_land_area1.drop(1, axis=0, inplace=True)
df_land_area1.head()

Unnamed: 0,GEO.id,GEO.id2,GCT_STUB.target-geo-id,GCT_STUB.target-geo-id2,GCT_STUB.display-label,HD01,HD02,SUBHD0301,SUBHD0302,SUBHD0303,SUBHD0401,SUBHD0402,county,tract
0,Id,Id2,Target Geo Id,Target Geo Id2,Geographic area,Population,Housing units,Area in square miles - Total area,Area in square miles - Water area,Area in square miles - Land area,Density per square mile of land area - Population,Density per square mile of land area - Housing...,Geography,Geographic area
2,0500000US13095,13095,1400000US13095000100,13095000100,Dougherty County - Census Tract 1,5944,2545,3.17,0.02,3.15,1886.7,807.8,Dougherty County,100
3,0500000US13095,13095,1400000US13095000200,13095000200,Dougherty County - Census Tract 2,3123,1348,1.91,0.04,1.87,1667.0,719.5,Dougherty County,200
4,0500000US13095,13095,1400000US13095000400,13095000400,Dougherty County - Census Tract 4,6276,3302,2.90,0.05,2.84,2206.7,1161.0,Dougherty County,400
5,0500000US13095,13095,1400000US13095000501,13095000501,Dougherty County - Census Tract 5.01,4798,2365,1.76,0.02,1.74,2764.5,1362.6,Dougherty County,501


## cleaning merged data (block group)

In [36]:
#df_block_full = df_block_full.drop(['HD02_VD01_x', 'HD02_VD02_x'], axis=1, inplace=True)
#df_block_full.head()
#df_block_full['HD02_VD01_x']

## change columns

In [37]:
l = len(df_inc1.columns)
a = df_inc1.columns
for j in range(0,l-1):
    i = a[j]
    df_inc1['i_'+i] = df_inc1[i]
    df_inc1 = df_inc1.drop(columns=i)
        
df_inc1.columns

Index(['block_tract', 'i_GEO.id', 'i_GEO.id2', 'i_HD01_VD01', 'i_HD02_VD01',
       'i_HD01_VD02', 'i_HD02_VD02', 'i_HD01_VD03', 'i_HD02_VD03',
       'i_HD01_VD04', 'i_HD02_VD04', 'i_HD01_VD05', 'i_HD02_VD05',
       'i_HD01_VD06', 'i_HD02_VD06', 'i_HD01_VD07', 'i_HD02_VD07',
       'i_HD01_VD08', 'i_HD02_VD08', 'i_HD01_VD09', 'i_HD02_VD09',
       'i_HD01_VD10', 'i_HD02_VD10', 'i_HD01_VD11', 'i_HD02_VD11',
       'i_HD01_VD12', 'i_HD02_VD12', 'i_HD01_VD13', 'i_HD02_VD13',
       'i_HD01_VD14', 'i_HD02_VD14', 'i_HD01_VD15', 'i_HD02_VD15',
       'i_HD01_VD16', 'i_HD02_VD16', 'i_HD01_VD17', 'i_HD02_VD17', 'i_block',
       'i_tract', 'i_county'],
      dtype='object')

In [38]:
l = len(df_pop1.columns)
a = df_pop1.columns
for j in range(0,l-1):
    i = a[j]
    df_pop1['p_'+i] = df_pop1[i]
    df_pop1 = df_pop1.drop(columns=i)
        
df_pop1.columns

Index(['block_tract', 'p_GEO.id', 'p_GEO.id2', 'p_HD01_VD01', 'p_HD02_VD01',
       'p_block', 'p_tract', 'p_county'],
      dtype='object')

In [39]:
l = len(df_employment.columns)
a = df_employment.columns
for j in range(0,l-1):
    i = a[j]
    df_employment['e_'+i] = df_employment[i]
    df_employment = df_employment.drop(columns=i)
        
df_employment.columns

Index(['block_tract', 'e_GEO.id', 'e_GEO.id2', 'e_HD01_VD01', 'e_HD02_VD01',
       'e_HD01_VD02', 'e_HD02_VD02', 'e_HD01_VD03', 'e_HD02_VD03',
       'e_HD01_VD04', 'e_HD02_VD04', 'e_HD01_VD05', 'e_HD02_VD05',
       'e_HD01_VD06', 'e_HD02_VD06', 'e_HD01_VD07', 'e_HD02_VD07', 'e_block',
       'e_tract', 'e_county'],
      dtype='object')

In [40]:
l = len(df_owner_renter.columns)
a = df_owner_renter.columns
for j in range(0,l-1):
    i = a[j]
    df_owner_renter['or_'+i] = df_owner_renter[i]
    df_owner_renter = df_owner_renter.drop(columns=i)
        
df_owner_renter.columns

Index(['block_tract', 'or_GEO.id', 'or_GEO.id2', 'or_HD01_VD01',
       'or_HD02_VD01', 'or_HD01_VD02', 'or_HD02_VD02', 'or_HD01_VD03',
       'or_HD02_VD03', 'or_block', 'or_tract', 'or_county'],
      dtype='object')

In [41]:
l = len(df_vacancy.columns)
a = df_vacancy.columns
for j in range(0,l-1):
    i = a[j]
    df_vacancy['v_'+i] = df_vacancy[i]
    df_vacancy = df_vacancy.drop(columns=i)
        
df_vacancy.columns

Index(['block_tract', 'v_GEO.id', 'v_GEO.id2', 'v_HD01_VD01', 'v_HD02_VD01',
       'v_HD01_VD02', 'v_HD02_VD02', 'v_HD01_VD03', 'v_HD02_VD03', 'v_block',
       'v_tract', 'v_county'],
      dtype='object')

In [42]:
l = len(df_medinc1.columns)
a = df_medinc1.columns
for j in range(0,l-1):
    i = a[j]
    df_medinc1['m_'+i] = df_medinc1[i]
    df_medinc1 = df_medinc1.drop(columns=i)
        
df_medinc1.columns
#df_medinc1.head()
#df_medinc1.shape

Index(['tract', 'm_GEO.id', 'm_GEO.id2', 'm_HC01_EST_VC01', 'm_HC01_MOE_VC01',
       'm_HC02_EST_VC01', 'm_HC02_MOE_VC01', 'm_HC03_EST_VC01',
       'm_HC03_MOE_VC01', 'm_HC04_EST_VC01',
       ...
       'm_HC04_MOE_VC19', 'm_HC01_EST_VC20', 'm_HC01_MOE_VC20',
       'm_HC02_EST_VC20', 'm_HC02_MOE_VC20', 'm_HC03_EST_VC20',
       'm_HC03_MOE_VC20', 'm_HC04_EST_VC20', 'm_HC04_MOE_VC20', 'm_county'],
      dtype='object', length=132)

In [43]:
l = len(df_land_area1.columns)
a = df_land_area1.columns
for j in range(0,l-1):
    i = a[j]
    df_land_area1['la_'+i] = df_land_area1[i]
    df_land_area1 = df_land_area1.drop(columns=i)
        
df_land_area1.columns
#df_land_area1.head()
#df_land_area1.shape

Index(['tract', 'la_GEO.id', 'la_GEO.id2', 'la_GCT_STUB.target-geo-id',
       'la_GCT_STUB.target-geo-id2', 'la_GCT_STUB.display-label', 'la_HD01',
       'la_HD02', 'la_SUBHD0301', 'la_SUBHD0302', 'la_SUBHD0303',
       'la_SUBHD0401', 'la_SUBHD0402', 'la_county'],
      dtype='object')

## merging! (block group level)

In [44]:
df_block_full = df_inc1.merge(df_pop1, on='block_tract', how='inner')
df_block_full.shape

(112, 47)

In [45]:
df_block_full = df_block_full.merge(df_employment, on='block_tract', how='inner')
df_block_full.shape

(112, 66)

In [46]:
df_block_full = df_block_full.merge(df_vacancy, on='block_tract', how='inner')
df_block_full.shape

(112, 77)

In [47]:
df_block_full = df_block_full.merge(df_owner_renter, on='block_tract', how='inner')
df_block_full.shape

(112, 88)

In [48]:
len(list(df_pop1)) + len(list(df_inc1)) + len(list(df_employment)) + len(list(df_vacancy)) + len(list(df_owner_renter))

92

In [49]:
df_block_full.head()

Unnamed: 0,block_tract,i_GEO.id,i_GEO.id2,i_HD01_VD01,i_HD02_VD01,i_HD01_VD02,i_HD02_VD02,i_HD01_VD03,i_HD02_VD03,i_HD01_VD04,i_HD02_VD04,i_HD01_VD05,i_HD02_VD05,i_HD01_VD06,i_HD02_VD06,i_HD01_VD07,i_HD02_VD07,i_HD01_VD08,i_HD02_VD08,i_HD01_VD09,i_HD02_VD09,i_HD01_VD10,i_HD02_VD10,i_HD01_VD11,i_HD02_VD11,i_HD01_VD12,i_HD02_VD12,i_HD01_VD13,i_HD02_VD13,i_HD01_VD14,i_HD02_VD14,i_HD01_VD15,i_HD02_VD15,i_HD01_VD16,i_HD02_VD16,i_HD01_VD17,i_HD02_VD17,i_block,i_tract,i_county,p_GEO.id,p_GEO.id2,p_HD01_VD01,p_HD02_VD01,p_block,p_tract,p_county,e_GEO.id,e_GEO.id2,e_HD01_VD01,e_HD02_VD01,e_HD01_VD02,e_HD02_VD02,e_HD01_VD03,e_HD02_VD03,e_HD01_VD04,e_HD02_VD04,e_HD01_VD05,e_HD02_VD05,e_HD01_VD06,e_HD02_VD06,e_HD01_VD07,e_HD02_VD07,e_block,e_tract,e_county,v_GEO.id,v_GEO.id2,v_HD01_VD01,v_HD02_VD01,v_HD01_VD02,v_HD02_VD02,v_HD01_VD03,v_HD02_VD03,v_block,v_tract,v_county,or_GEO.id,or_GEO.id2,or_HD01_VD01,or_HD02_VD01,or_HD01_VD02,or_HD02_VD02,or_HD01_VD03,or_HD02_VD03,or_block,or_tract,or_county
0,,Id,Id2,Estimate; Total:,Margin of Error; Total:,"Estimate; Total: - Less than $10,000","Margin of Error; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Margin of Error; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Margin of Error; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Margin of Error; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Margin of Error; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Margin of Error; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Margin of Error; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Margin of Error; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Margin of Error; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Margin of Error; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Margin of Error; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Margin of Error; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Margin of Error; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Margin of Error; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Margin of Error; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more","Margin of Error; Total: - $200,000 or more",Geography,,,Id,Id2,Estimate; Total,Margin of Error; Total,Geography,,,Id,Id2,Estimate; Total:,Margin of Error; Total:,Estimate; In labor force:,Margin of Error; In labor force:,Estimate; In labor force: - Civilian labor force:,Margin of Error; In labor force: - Civilian la...,Estimate; In labor force: - Civilian labor for...,Margin of Error; In labor force: - Civilian la...,Estimate; In labor force: - Civilian labor for...,Margin of Error; In labor force: - Civilian la...,Estimate; In labor force: - Armed Forces,Margin of Error; In labor force: - Armed Forces,Estimate; Not in labor force,Margin of Error; Not in labor force,Geography,,,Id,Id2,Estimate; Total:,Margin of Error; Total:,Estimate; Total: - Occupied,Margin of Error; Total: - Occupied,Estimate; Total: - Vacant,Margin of Error; Total: - Vacant,Geography,,,Id,Id2,Estimate; Total:,Margin of Error; Total:,Estimate; Total: - Owner occupied,Margin of Error; Total: - Owner occupied,Estimate; Total: - Renter occupied,Margin of Error; Total: - Renter occupied,Geography,,
1,1_960100,1500000US130079601001,130079601001,378,77,88,38,47,29,6,10,9,14,18,16,28,28,7,13,14,18,16,17,22,18,40,32,44,36,22,20,10,14,7,11,0,13,1,960100.0,Baker County,1500000US130079601001,130079601001,869,181,1,960100.0,Baker County,1500000US130079601001,130079601001,684,144,350,113,350,113,350,113,0,13,0,13,334,87,1,960100.0,Baker County,1500000US130079601001,130079601001,524,91,378,77,146,58,1,960100.0,Baker County,1500000US130079601001,130079601001,869,181,457,159,412,154,1,960100.0,Baker County
2,2_960100,1500000US130079601002,130079601002,426,71,88,59,7,10,7,11,33,25,8,10,7,9,9,10,44,41,14,21,79,46,53,39,12,14,45,36,13,17,3,5,4,6,2,960100.0,Baker County,1500000US130079601002,130079601002,1247,223,2,960100.0,Baker County,1500000US130079601002,130079601002,1038,170,488,124,488,124,461,126,27,22,0,13,550,156,2,960100.0,Baker County,1500000US130079601002,130079601002,595,94,426,71,169,68,2,960100.0,Baker County,1500000US130079601002,130079601002,1247,223,764,170,483,210,2,960100.0,Baker County
3,1_960200,1500000US130079602001,130079602001,224,63,0,13,7,10,7,10,27,26,13,18,26,28,0,13,5,8,25,39,36,34,27,21,30,27,0,13,0,13,0,13,21,20,1,960200.0,Baker County,1500000US130079602001,130079602001,487,149,1,960200.0,Baker County,1500000US130079602001,130079602001,450,129,262,106,262,106,262,106,0,13,0,13,188,70,1,960200.0,Baker County,1500000US130079602001,130079602001,298,65,224,63,74,47,1,960200.0,Baker County,1500000US130079602001,130079602001,487,149,386,133,101,74,1,960200.0,Baker County
4,2_960200,1500000US130079602002,130079602002,293,64,55,48,0,13,10,11,23,23,44,39,24,22,14,15,0,13,18,33,30,32,5,8,36,25,0,13,0,13,34,30,0,13,2,960200.0,Baker County,1500000US130079602002,130079602002,648,120,2,960200.0,Baker County,1500000US130079602002,130079602002,507,98,219,69,219,69,213,69,6,9,0,13,288,81,2,960200.0,Baker County,1500000US130079602002,130079602002,353,69,293,64,60,40,2,960200.0,Baker County,1500000US130079602002,130079602002,645,120,489,120,156,91,2,960200.0,Baker County


## cleaning up merged table (block group level)

In [50]:
df_block_full.drop(['i_HD02_VD01','i_HD02_VD02','i_HD02_VD03','i_HD02_VD04','i_HD02_VD05',
                    'i_HD02_VD06','i_HD02_VD07','i_HD02_VD08','i_HD02_VD09','i_HD02_VD10',
                    'i_HD02_VD11','i_HD02_VD12','i_HD02_VD13','i_HD02_VD14','i_HD02_VD15',
                    'i_HD02_VD16','i_HD02_VD17'], axis=1, inplace=True)

In [51]:
df_block_full.drop(['p_GEO.id','p_GEO.id2','p_HD02_VD01','p_block',
                    'p_tract','p_county'], axis=1, inplace=True)

In [52]:
df_block_full.drop(['e_GEO.id','e_GEO.id2','e_HD02_VD01','e_HD02_VD02','e_HD02_VD03',
                    'e_HD02_VD04','e_HD02_VD05','e_HD02_VD06','e_HD02_VD07','e_block',
                    'e_tract','e_county'], axis=1, inplace=True)

In [53]:
df_block_full.drop(['v_GEO.id','v_GEO.id2','v_HD02_VD01','v_HD02_VD02','v_HD02_VD03',
                    'v_block','v_tract','v_county'], axis=1, inplace=True)

In [54]:
df_block_full.drop(['or_GEO.id','or_GEO.id2','or_HD02_VD01','or_HD02_VD02','or_HD02_VD03',
                    'or_block','or_tract','or_county'], axis=1, inplace=True)

In [55]:
df_block_full.head()

Unnamed: 0,block_tract,i_GEO.id,i_GEO.id2,i_HD01_VD01,i_HD01_VD02,i_HD01_VD03,i_HD01_VD04,i_HD01_VD05,i_HD01_VD06,i_HD01_VD07,i_HD01_VD08,i_HD01_VD09,i_HD01_VD10,i_HD01_VD11,i_HD01_VD12,i_HD01_VD13,i_HD01_VD14,i_HD01_VD15,i_HD01_VD16,i_HD01_VD17,i_block,i_tract,i_county,p_HD01_VD01,e_HD01_VD01,e_HD01_VD02,e_HD01_VD03,e_HD01_VD04,e_HD01_VD05,e_HD01_VD06,e_HD01_VD07,v_HD01_VD01,v_HD01_VD02,v_HD01_VD03,or_HD01_VD01,or_HD01_VD02,or_HD01_VD03
0,,Id,Id2,Estimate; Total:,"Estimate; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more",Geography,,,Estimate; Total,Estimate; Total:,Estimate; In labor force:,Estimate; In labor force: - Civilian labor force:,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Armed Forces,Estimate; Not in labor force,Estimate; Total:,Estimate; Total: - Occupied,Estimate; Total: - Vacant,Estimate; Total:,Estimate; Total: - Owner occupied,Estimate; Total: - Renter occupied
1,1_960100,1500000US130079601001,130079601001,378,88,47,6,9,18,28,7,14,16,22,40,44,22,10,7,0,1,960100.0,Baker County,869,684,350,350,350,0,0,334,524,378,146,869,457,412
2,2_960100,1500000US130079601002,130079601002,426,88,7,7,33,8,7,9,44,14,79,53,12,45,13,3,4,2,960100.0,Baker County,1247,1038,488,488,461,27,0,550,595,426,169,1247,764,483
3,1_960200,1500000US130079602001,130079602001,224,0,7,7,27,13,26,0,5,25,36,27,30,0,0,0,21,1,960200.0,Baker County,487,450,262,262,262,0,0,188,298,224,74,487,386,101
4,2_960200,1500000US130079602002,130079602002,293,55,0,10,23,44,24,14,0,18,30,5,36,0,0,34,0,2,960200.0,Baker County,648,507,219,219,213,6,0,288,353,293,60,645,489,156


In [56]:
cols = list(df_block_full)

cols.insert(0, cols.pop(cols.index('i_block')))
df_block_full = df_block_full.loc[:, cols]

cols.insert(1, cols.pop(cols.index('i_tract')))
df_block_full = df_block_full.loc[:, cols]

cols.insert(2, cols.pop(cols.index('i_county')))
df_block_full = df_block_full.loc[:, cols]
df_block_full.head()

Unnamed: 0,i_block,i_tract,i_county,block_tract,i_GEO.id,i_GEO.id2,i_HD01_VD01,i_HD01_VD02,i_HD01_VD03,i_HD01_VD04,i_HD01_VD05,i_HD01_VD06,i_HD01_VD07,i_HD01_VD08,i_HD01_VD09,i_HD01_VD10,i_HD01_VD11,i_HD01_VD12,i_HD01_VD13,i_HD01_VD14,i_HD01_VD15,i_HD01_VD16,i_HD01_VD17,p_HD01_VD01,e_HD01_VD01,e_HD01_VD02,e_HD01_VD03,e_HD01_VD04,e_HD01_VD05,e_HD01_VD06,e_HD01_VD07,v_HD01_VD01,v_HD01_VD02,v_HD01_VD03,or_HD01_VD01,or_HD01_VD02,or_HD01_VD03
0,Geography,,,,Id,Id2,Estimate; Total:,"Estimate; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more",Estimate; Total,Estimate; Total:,Estimate; In labor force:,Estimate; In labor force: - Civilian labor force:,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Armed Forces,Estimate; Not in labor force,Estimate; Total:,Estimate; Total: - Occupied,Estimate; Total: - Vacant,Estimate; Total:,Estimate; Total: - Owner occupied,Estimate; Total: - Renter occupied
1,1,960100.0,Baker County,1_960100,1500000US130079601001,130079601001,378,88,47,6,9,18,28,7,14,16,22,40,44,22,10,7,0,869,684,350,350,350,0,0,334,524,378,146,869,457,412
2,2,960100.0,Baker County,2_960100,1500000US130079601002,130079601002,426,88,7,7,33,8,7,9,44,14,79,53,12,45,13,3,4,1247,1038,488,488,461,27,0,550,595,426,169,1247,764,483
3,1,960200.0,Baker County,1_960200,1500000US130079602001,130079602001,224,0,7,7,27,13,26,0,5,25,36,27,30,0,0,0,21,487,450,262,262,262,0,0,188,298,224,74,487,386,101
4,2,960200.0,Baker County,2_960200,1500000US130079602002,130079602002,293,55,0,10,23,44,24,14,0,18,30,5,36,0,0,34,0,648,507,219,219,213,6,0,288,353,293,60,645,489,156


In [57]:
df_block_full = df_block_full.rename(columns={'i_block': 'block_group', 'i_tract': 'tract', 
                                             'i_county': 'county', 'i_GEO.id': 'geo_id',
                                             'i_GEO.id2': 'geo_id2', 'i_HD01_VD01': 'total_inc',
                                             'i_HD01_VD02': 'inc_less_10k', 'i_HD01_VD03': 'inc_10k_14999',
                                             'i_HD01_VD04': 'inc_15k_19999', 'i_HD01_VD05': 'inc_20k_24999',
                                             'i_HD01_VD06': 'inc_25k_29999', 'i_HD01_VD07': 'inc_30k_34999',
                                             'i_HD01_VD08': 'inc_35k_39999', 'i_HD01_VD09': 'inc_40k_44999',
                                             'i_HD01_VD10': 'inc_45k_49999', 'i_HD01_VD11': 'inc_50k_59999',
                                             'i_HD01_VD12': 'inc_60k_74999', 'i_HD01_VD13': 'inc_75k_99999',
                                             'i_HD01_VD14': 'inc_100k_124999', 'i_HD01_VD15': 'inc_125k_149999',
                                             'i_HD01_VD16': 'inc_150k_199999', 'i_HD01_VD17': 'inc_200k_more',
                                             'p_HD01_VD01': 'total_pop', 'e_HD01_VD01': 'total_labor',
                                             'e_HD01_VD02': 'total_in_laborforce', 'e_HD01_VD03': 'total_civil_labor',
                                             'e_HD01_VD04': 'emp_civil_labor', 'e_HD01_VD05': 'unemp_civil_labor',
                                             'e_HD01_VD06': 'total_armedforces', 'e_HD01_VD07': 'total_notin_laborforce',
                                             'v_HD01_VD01': 'total_vacdata', 'v_HD01_VD02': 'total_occ_homes',
                                             'v_HD01_VD03': 'total_vac_homes', 'or_HD01_VD01': 'total_ordata',
                                             'or_HD01_VD02': 'total_owner', 'or_HD01_VD03': 'total_renter'})
df_block_full.head()

Unnamed: 0,block_group,tract,county,block_tract,geo_id,geo_id2,total_inc,inc_less_10k,inc_10k_14999,inc_15k_19999,inc_20k_24999,inc_25k_29999,inc_30k_34999,inc_35k_39999,inc_40k_44999,inc_45k_49999,inc_50k_59999,inc_60k_74999,inc_75k_99999,inc_100k_124999,inc_125k_149999,inc_150k_199999,inc_200k_more,total_pop,total_labor,total_in_laborforce,total_civil_labor,emp_civil_labor,unemp_civil_labor,total_armedforces,total_notin_laborforce,total_vacdata,total_occ_homes,total_vac_homes,total_ordata,total_owner,total_renter
0,Geography,,,,Id,Id2,Estimate; Total:,"Estimate; Total: - Less than $10,000","Estimate; Total: - $10,000 to $14,999","Estimate; Total: - $15,000 to $19,999","Estimate; Total: - $20,000 to $24,999","Estimate; Total: - $25,000 to $29,999","Estimate; Total: - $30,000 to $34,999","Estimate; Total: - $35,000 to $39,999","Estimate; Total: - $40,000 to $44,999","Estimate; Total: - $45,000 to $49,999","Estimate; Total: - $50,000 to $59,999","Estimate; Total: - $60,000 to $74,999","Estimate; Total: - $75,000 to $99,999","Estimate; Total: - $100,000 to $124,999","Estimate; Total: - $125,000 to $149,999","Estimate; Total: - $150,000 to $199,999","Estimate; Total: - $200,000 or more",Estimate; Total,Estimate; Total:,Estimate; In labor force:,Estimate; In labor force: - Civilian labor force:,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Civilian labor for...,Estimate; In labor force: - Armed Forces,Estimate; Not in labor force,Estimate; Total:,Estimate; Total: - Occupied,Estimate; Total: - Vacant,Estimate; Total:,Estimate; Total: - Owner occupied,Estimate; Total: - Renter occupied
1,1,960100.0,Baker County,1_960100,1500000US130079601001,130079601001,378,88,47,6,9,18,28,7,14,16,22,40,44,22,10,7,0,869,684,350,350,350,0,0,334,524,378,146,869,457,412
2,2,960100.0,Baker County,2_960100,1500000US130079601002,130079601002,426,88,7,7,33,8,7,9,44,14,79,53,12,45,13,3,4,1247,1038,488,488,461,27,0,550,595,426,169,1247,764,483
3,1,960200.0,Baker County,1_960200,1500000US130079602001,130079602001,224,0,7,7,27,13,26,0,5,25,36,27,30,0,0,0,21,487,450,262,262,262,0,0,188,298,224,74,487,386,101
4,2,960200.0,Baker County,2_960200,1500000US130079602002,130079602002,293,55,0,10,23,44,24,14,0,18,30,5,36,0,0,34,0,648,507,219,219,213,6,0,288,353,293,60,645,489,156


In [58]:
df_block_full.drop(0, axis=0, inplace=True)
df_block_full = df_block_full.reset_index(drop=True)
df_block_full.head()

Unnamed: 0,block_group,tract,county,block_tract,geo_id,geo_id2,total_inc,inc_less_10k,inc_10k_14999,inc_15k_19999,inc_20k_24999,inc_25k_29999,inc_30k_34999,inc_35k_39999,inc_40k_44999,inc_45k_49999,inc_50k_59999,inc_60k_74999,inc_75k_99999,inc_100k_124999,inc_125k_149999,inc_150k_199999,inc_200k_more,total_pop,total_labor,total_in_laborforce,total_civil_labor,emp_civil_labor,unemp_civil_labor,total_armedforces,total_notin_laborforce,total_vacdata,total_occ_homes,total_vac_homes,total_ordata,total_owner,total_renter
0,1,960100,Baker County,1_960100,1500000US130079601001,130079601001,378,88,47,6,9,18,28,7,14,16,22,40,44,22,10,7,0,869,684,350,350,350,0,0,334,524,378,146,869,457,412
1,2,960100,Baker County,2_960100,1500000US130079601002,130079601002,426,88,7,7,33,8,7,9,44,14,79,53,12,45,13,3,4,1247,1038,488,488,461,27,0,550,595,426,169,1247,764,483
2,1,960200,Baker County,1_960200,1500000US130079602001,130079602001,224,0,7,7,27,13,26,0,5,25,36,27,30,0,0,0,21,487,450,262,262,262,0,0,188,298,224,74,487,386,101
3,2,960200,Baker County,2_960200,1500000US130079602002,130079602002,293,55,0,10,23,44,24,14,0,18,30,5,36,0,0,34,0,648,507,219,219,213,6,0,288,353,293,60,645,489,156
4,1,100,Dougherty County,1_100,1500000US130950001001,130950001001,280,0,0,0,9,50,13,15,88,35,26,44,0,0,0,0,0,669,475,282,282,282,0,0,193,433,280,153,669,452,217


In [59]:
cols = list(df_block_full)

cols.insert(0, cols.pop(cols.index('geo_id')))
df_block_full = df_block_full.loc[:, cols]

cols.insert(1, cols.pop(cols.index('geo_id2')))
df_block_full = df_block_full.loc[:, cols]

cols.insert(6, cols.pop(cols.index('total_pop')))
df_block_full = df_block_full.loc[:, cols]

cols.insert(26, cols.pop(cols.index('total_notin_laborforce')))
df_block_full = df_block_full.loc[:, cols]
df_block_full.head()

Unnamed: 0,geo_id,geo_id2,block_group,tract,county,block_tract,total_pop,total_inc,inc_less_10k,inc_10k_14999,inc_15k_19999,inc_20k_24999,inc_25k_29999,inc_30k_34999,inc_35k_39999,inc_40k_44999,inc_45k_49999,inc_50k_59999,inc_60k_74999,inc_75k_99999,inc_100k_124999,inc_125k_149999,inc_150k_199999,inc_200k_more,total_labor,total_in_laborforce,total_notin_laborforce,total_civil_labor,emp_civil_labor,unemp_civil_labor,total_armedforces,total_vacdata,total_occ_homes,total_vac_homes,total_ordata,total_owner,total_renter
0,1500000US130079601001,130079601001,1,960100,Baker County,1_960100,869,378,88,47,6,9,18,28,7,14,16,22,40,44,22,10,7,0,684,350,334,350,350,0,0,524,378,146,869,457,412
1,1500000US130079601002,130079601002,2,960100,Baker County,2_960100,1247,426,88,7,7,33,8,7,9,44,14,79,53,12,45,13,3,4,1038,488,550,488,461,27,0,595,426,169,1247,764,483
2,1500000US130079602001,130079602001,1,960200,Baker County,1_960200,487,224,0,7,7,27,13,26,0,5,25,36,27,30,0,0,0,21,450,262,188,262,262,0,0,298,224,74,487,386,101
3,1500000US130079602002,130079602002,2,960200,Baker County,2_960200,648,293,55,0,10,23,44,24,14,0,18,30,5,36,0,0,34,0,507,219,288,219,213,6,0,353,293,60,645,489,156
4,1500000US130950001001,130950001001,1,100,Dougherty County,1_100,669,280,0,0,0,9,50,13,15,88,35,26,44,0,0,0,0,0,475,282,193,282,282,0,0,433,280,153,669,452,217


## merging! (tract level)

In [60]:
df_tract_full = df_land_area1.merge(df_medinc1, on='tract', how='inner')
df_tract_full.drop(22, axis=0, inplace=True)
#df_tract_full

In [61]:
df_tract_full.head()

Unnamed: 0,tract,la_GEO.id,la_GEO.id2,la_GCT_STUB.target-geo-id,la_GCT_STUB.target-geo-id2,la_GCT_STUB.display-label,la_HD01,la_HD02,la_SUBHD0301,la_SUBHD0302,la_SUBHD0303,la_SUBHD0401,la_SUBHD0402,la_county,m_GEO.id,m_GEO.id2,m_HC01_EST_VC01,m_HC01_MOE_VC01,m_HC02_EST_VC01,m_HC02_MOE_VC01,m_HC03_EST_VC01,m_HC03_MOE_VC01,m_HC04_EST_VC01,m_HC04_MOE_VC01,m_HC01_EST_VC02,m_HC01_MOE_VC02,m_HC02_EST_VC02,m_HC02_MOE_VC02,m_HC03_EST_VC02,m_HC03_MOE_VC02,m_HC04_EST_VC02,m_HC04_MOE_VC02,m_HC01_EST_VC03,m_HC01_MOE_VC03,m_HC02_EST_VC03,m_HC02_MOE_VC03,m_HC03_EST_VC03,m_HC03_MOE_VC03,m_HC04_EST_VC03,m_HC04_MOE_VC03,m_HC01_EST_VC04,m_HC01_MOE_VC04,m_HC02_EST_VC04,m_HC02_MOE_VC04,m_HC03_EST_VC04,m_HC03_MOE_VC04,m_HC04_EST_VC04,m_HC04_MOE_VC04,m_HC01_EST_VC05,m_HC01_MOE_VC05,m_HC02_EST_VC05,m_HC02_MOE_VC05,m_HC03_EST_VC05,m_HC03_MOE_VC05,m_HC04_EST_VC05,m_HC04_MOE_VC05,m_HC01_EST_VC06,m_HC01_MOE_VC06,m_HC02_EST_VC06,m_HC02_MOE_VC06,m_HC03_EST_VC06,m_HC03_MOE_VC06,m_HC04_EST_VC06,m_HC04_MOE_VC06,m_HC01_EST_VC07,m_HC01_MOE_VC07,m_HC02_EST_VC07,m_HC02_MOE_VC07,m_HC03_EST_VC07,m_HC03_MOE_VC07,m_HC04_EST_VC07,m_HC04_MOE_VC07,m_HC01_EST_VC08,m_HC01_MOE_VC08,m_HC02_EST_VC08,m_HC02_MOE_VC08,m_HC03_EST_VC08,m_HC03_MOE_VC08,m_HC04_EST_VC08,m_HC04_MOE_VC08,m_HC01_EST_VC09,m_HC01_MOE_VC09,m_HC02_EST_VC09,m_HC02_MOE_VC09,m_HC03_EST_VC09,m_HC03_MOE_VC09,m_HC04_EST_VC09,m_HC04_MOE_VC09,m_HC01_EST_VC10,m_HC01_MOE_VC10,m_HC02_EST_VC10,m_HC02_MOE_VC10,m_HC03_EST_VC10,m_HC03_MOE_VC10,m_HC04_EST_VC10,m_HC04_MOE_VC10,m_HC01_EST_VC11,m_HC01_MOE_VC11,m_HC02_EST_VC11,m_HC02_MOE_VC11,m_HC03_EST_VC11,m_HC03_MOE_VC11,m_HC04_EST_VC11,m_HC04_MOE_VC11,m_HC01_EST_VC13,m_HC01_MOE_VC13,m_HC02_EST_VC13,m_HC02_MOE_VC13,m_HC03_EST_VC13,m_HC03_MOE_VC13,m_HC04_EST_VC13,m_HC04_MOE_VC13,m_HC01_EST_VC15,m_HC01_MOE_VC15,m_HC02_EST_VC15,m_HC02_MOE_VC15,m_HC03_EST_VC15,m_HC03_MOE_VC15,m_HC04_EST_VC15,m_HC04_MOE_VC15,m_HC01_EST_VC18,m_HC01_MOE_VC18,m_HC02_EST_VC18,m_HC02_MOE_VC18,m_HC03_EST_VC18,m_HC03_MOE_VC18,m_HC04_EST_VC18,m_HC04_MOE_VC18,m_HC01_EST_VC19,m_HC01_MOE_VC19,m_HC02_EST_VC19,m_HC02_MOE_VC19,m_HC03_EST_VC19,m_HC03_MOE_VC19,m_HC04_EST_VC19,m_HC04_MOE_VC19,m_HC01_EST_VC20,m_HC01_MOE_VC20,m_HC02_EST_VC20,m_HC02_MOE_VC20,m_HC03_EST_VC20,m_HC03_MOE_VC20,m_HC04_EST_VC20,m_HC04_MOE_VC20,m_county
0,100,0500000US13095,13095,1400000US13095000100,13095000100,Dougherty County - Census Tract 1,5944,2545,3.17,0.02,3.15,1886.7,807.8,Dougherty County,1400000US13095000100,13095000100,1905,220,911,173,244,117,994,210,19.0,6.9,17.9,9.2,0.0,14.2,23.9,10.7,6.3,4.2,3.5,4.5,0.0,14.2,8.9,6.8,19.6,8.1,23.3,12.9,5.3,8.4,19.8,11.7,21.6,8.7,11.0,9.3,5.7,10.0,27.5,14.2,17.4,8.4,15.9,8.6,20.1,20.2,18.7,11.8,9.6,5.6,16.1,9.5,49.2,23.9,0.0,3.7,3.6,3.0,7.5,6.2,19.7,17.5,0.0,3.7,0.6,1.0,0.0,4.0,0.0,14.2,1.2,1.9,2.3,3.5,4.8,7.0,0.0,14.2,0.0,3.7,0.0,1.9,0.0,4.0,0.0,14.2,0.0,3.7,26681,4099,30806,10054,54808,21554,21597,7978,32589,5350,40319,9670,N,N,23157,4282,46.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),38.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),49.4,(X),Dougherty County
1,200,0500000US13095,13095,1400000US13095000200,13095000200,Dougherty County - Census Tract 2,3123,1348,1.91,0.04,1.87,1667.0,719.5,Dougherty County,1400000US13095000200,13095000200,1006,141,643,118,200,90,363,127,25.0,9.4,25.7,11.1,0.0,17.0,26.2,17.2,14.9,6.9,6.7,6.7,0.0,17.0,29.5,15.6,20.6,9.4,20.1,12.4,21.5,18.2,19.0,13.5,18.0,8.7,22.9,11.5,42.0,20.3,9.4,11.8,11.7,7.6,14.8,11.1,16.5,18.1,6.3,8.0,8.0,6.4,7.9,5.9,13.5,15.4,8.0,12.9,1.0,1.8,1.6,2.8,5.0,8.9,0.0,9.8,0.6,1.1,0.0,5.7,0.0,17.0,1.7,3.0,0.0,3.7,0.0,5.7,0.0,17.0,0.0,9.8,0.3,1.0,0.5,1.6,1.5,5.4,0.0,9.8,20789,3843,23750,4682,30571,5690,12351,7963,24533,4489,26745,5892,N,N,20391,6997,45.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),43.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),47.4,(X),Dougherty County
2,400,0500000US13095,13095,1400000US13095000400,13095000400,Dougherty County - Census Tract 4,6276,3302,2.9,0.05,2.84,2206.7,1161.0,Dougherty County,1400000US13095000400,13095000400,2786,200,1446,202,623,191,1340,223,11.5,5.2,10.6,6.3,0.0,5.8,12.5,8.4,7.4,4.5,7.2,6.2,0.0,5.8,9.8,6.9,14.0,5.1,10.2,7.1,10.6,10.9,18.1,9.0,17.7,6.7,23.6,10.1,15.9,15.3,11.4,6.8,16.1,5.6,11.5,6.1,2.4,3.6,18.7,9.4,17.2,5.2,16.1,7.6,35.6,16.1,18.4,7.7,9.8,3.9,13.5,5.7,18.5,9.2,5.7,4.9,3.2,2.9,1.1,1.4,2.6,3.4,5.4,5.6,0.4,0.8,0.7,1.5,1.6,3.5,0.0,2.8,2.9,2.9,5.5,5.4,12.8,12.0,0.0,2.8,32372,13920,32139,13807,59688,5592,31223,17196,45856,6785,50867,10806,N,N,39857,7452,35.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),34.3,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),34.6,(X),Dougherty County
3,501,0500000US13095,13095,1400000US13095000501,13095000501,Dougherty County - Census Tract 5.01,4798,2365,1.76,0.02,1.74,2764.5,1362.6,Dougherty County,1400000US13095000501,13095000501,2281,156,1239,199,564,130,1042,186,1.3,1.6,1.7,2.7,0.0,6.4,0.9,1.4,4.7,3.5,2.2,3.3,0.0,6.4,7.8,6.5,8.1,4.2,5.8,5.0,0.0,6.4,12.1,7.2,13.2,6.6,13.1,8.7,1.4,3.7,16.2,11.5,14.1,6.8,12.4,9.1,0.0,6.4,22.0,11.7,24.4,7.5,24.9,11.3,27.8,15.6,13.6,7.2,17.1,6.9,20.4,10.4,29.8,16.5,13.2,9.1,9.5,5.3,13.5,8.0,27.7,13.8,4.8,4.3,3.8,3.2,2.3,4.0,5.1,9.1,5.6,5.4,3.8,3.0,3.7,4.1,8.2,9.0,3.8,4.4,55366,4802,57303,17544,90182,16215,42694,3658,72521,9997,72277,10807,N,N,70103,20186,44.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),46.8,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),40.4,(X),Dougherty County
4,502,0500000US13095,13095,1400000US13095000502,13095000502,Dougherty County - Census Tract 5.02,4151,1938,2.68,0.03,2.65,1565.4,730.8,Dougherty County,1400000US13095000502,13095000502,1811,121,1198,107,969,103,613,97,3.8,2.8,1.5,2.5,0.0,3.8,8.3,6.5,4.5,2.9,0.0,3.1,0.0,3.8,13.4,7.9,3.4,2.1,2.0,2.0,2.5,2.4,6.0,4.5,9.7,4.1,7.1,5.2,5.8,3.8,14.8,7.1,14.9,4.8,14.6,6.1,8.6,6.0,15.3,7.9,17.3,4.8,15.7,6.3,13.4,6.4,20.4,7.5,10.0,4.3,11.4,5.5,13.3,6.5,7.3,7.0,18.4,5.6,23.7,7.5,27.5,8.7,9.3,5.9,11.3,3.7,14.4,5.1,17.1,6.0,3.9,3.5,6.7,3.2,9.6,4.8,11.9,5.6,1.1,1.9,64750,20495,95313,19181,112026,15385,37146,8219,88223,8964,105355,12096,N,N,54071,9951,37.3,(X),(X),(X),(X),(X),(X),(X),(X),(X),40.3,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),31.5,(X),Dougherty County


In [62]:
df_tract_full.drop(['la_GCT_STUB.display-label','m_GEO.id','m_GEO.id2','m_HC01_MOE_VC01','m_HC02_MOE_VC01',
                    'm_HC03_MOE_VC01','m_HC04_MOE_VC01','m_HC01_MOE_VC02','m_HC02_MOE_VC02','m_HC03_MOE_VC02',
                    'm_HC04_MOE_VC02','m_HC01_MOE_VC03','m_HC02_MOE_VC03','m_HC03_MOE_VC03','m_HC04_MOE_VC03',
                    'm_HC01_MOE_VC04','m_HC02_MOE_VC04','m_HC03_MOE_VC04','m_HC04_MOE_VC04','m_HC01_MOE_VC05',
                    'm_HC02_MOE_VC05','m_HC03_MOE_VC05','m_HC04_MOE_VC05','m_HC01_MOE_VC06','m_HC02_MOE_VC06',
                    'm_HC03_MOE_VC06','m_HC04_MOE_VC06','m_HC01_MOE_VC07','m_HC02_MOE_VC07','m_HC03_MOE_VC07',
                    'm_HC04_MOE_VC07','m_HC01_MOE_VC08','m_HC02_MOE_VC08','m_HC03_MOE_VC08','m_HC04_MOE_VC08',
                    'm_HC01_MOE_VC09','m_HC02_MOE_VC09','m_HC03_MOE_VC09','m_HC04_MOE_VC09','m_HC01_MOE_VC10',
                    'm_HC02_MOE_VC10','m_HC03_MOE_VC10','m_HC04_MOE_VC10','m_HC01_MOE_VC11','m_HC02_MOE_VC11',
                    'm_HC03_MOE_VC11','m_HC04_MOE_VC11','m_HC01_MOE_VC13','m_HC02_MOE_VC13','m_HC03_MOE_VC13',
                    'm_HC04_MOE_VC13','m_HC01_MOE_VC15','m_HC02_MOE_VC15','m_HC03_MOE_VC15','m_HC04_MOE_VC15',
                    'm_HC01_MOE_VC18','m_HC02_MOE_VC18','m_HC03_MOE_VC18','m_HC04_MOE_VC18','m_HC01_MOE_VC19',
                    'm_HC02_MOE_VC19','m_HC03_MOE_VC19','m_HC04_MOE_VC19','m_HC01_MOE_VC20','m_HC02_MOE_VC20',
                    'm_HC03_MOE_VC20','m_HC04_MOE_VC20','m_county','m_HC01_EST_VC18','m_HC01_MOE_VC18',
                    'm_HC02_EST_VC18','m_HC02_MOE_VC18','m_HC03_EST_VC18','m_HC03_MOE_VC18','m_HC04_EST_VC18',
                    'm_HC04_MOE_VC18','m_HC01_EST_VC19','m_HC01_MOE_VC19','m_HC02_EST_VC19','m_HC02_MOE_VC19',
                    'm_HC03_EST_VC19','m_HC03_MOE_VC19','m_HC04_EST_VC19','m_HC04_MOE_VC19','m_HC01_EST_VC20',
                    'm_HC01_MOE_VC20','m_HC02_EST_VC20','m_HC02_MOE_VC20','m_HC03_EST_VC20','m_HC03_MOE_VC20',
                    'm_HC04_EST_VC20','m_HC04_MOE_VC20'], axis=1, inplace=True)

In [63]:
#df_tract_full.head()
df_medinc1.head()

Unnamed: 0,tract,m_GEO.id,m_GEO.id2,m_HC01_EST_VC01,m_HC01_MOE_VC01,m_HC02_EST_VC01,m_HC02_MOE_VC01,m_HC03_EST_VC01,m_HC03_MOE_VC01,m_HC04_EST_VC01,m_HC04_MOE_VC01,m_HC01_EST_VC02,m_HC01_MOE_VC02,m_HC02_EST_VC02,m_HC02_MOE_VC02,m_HC03_EST_VC02,m_HC03_MOE_VC02,m_HC04_EST_VC02,m_HC04_MOE_VC02,m_HC01_EST_VC03,m_HC01_MOE_VC03,m_HC02_EST_VC03,m_HC02_MOE_VC03,m_HC03_EST_VC03,m_HC03_MOE_VC03,m_HC04_EST_VC03,m_HC04_MOE_VC03,m_HC01_EST_VC04,m_HC01_MOE_VC04,m_HC02_EST_VC04,m_HC02_MOE_VC04,m_HC03_EST_VC04,m_HC03_MOE_VC04,m_HC04_EST_VC04,m_HC04_MOE_VC04,m_HC01_EST_VC05,m_HC01_MOE_VC05,m_HC02_EST_VC05,m_HC02_MOE_VC05,m_HC03_EST_VC05,m_HC03_MOE_VC05,m_HC04_EST_VC05,m_HC04_MOE_VC05,m_HC01_EST_VC06,m_HC01_MOE_VC06,m_HC02_EST_VC06,m_HC02_MOE_VC06,m_HC03_EST_VC06,m_HC03_MOE_VC06,m_HC04_EST_VC06,m_HC04_MOE_VC06,m_HC01_EST_VC07,m_HC01_MOE_VC07,m_HC02_EST_VC07,m_HC02_MOE_VC07,m_HC03_EST_VC07,m_HC03_MOE_VC07,m_HC04_EST_VC07,m_HC04_MOE_VC07,m_HC01_EST_VC08,m_HC01_MOE_VC08,m_HC02_EST_VC08,m_HC02_MOE_VC08,m_HC03_EST_VC08,m_HC03_MOE_VC08,m_HC04_EST_VC08,m_HC04_MOE_VC08,m_HC01_EST_VC09,m_HC01_MOE_VC09,m_HC02_EST_VC09,m_HC02_MOE_VC09,m_HC03_EST_VC09,m_HC03_MOE_VC09,m_HC04_EST_VC09,m_HC04_MOE_VC09,m_HC01_EST_VC10,m_HC01_MOE_VC10,m_HC02_EST_VC10,m_HC02_MOE_VC10,m_HC03_EST_VC10,m_HC03_MOE_VC10,m_HC04_EST_VC10,m_HC04_MOE_VC10,m_HC01_EST_VC11,m_HC01_MOE_VC11,m_HC02_EST_VC11,m_HC02_MOE_VC11,m_HC03_EST_VC11,m_HC03_MOE_VC11,m_HC04_EST_VC11,m_HC04_MOE_VC11,m_HC01_EST_VC13,m_HC01_MOE_VC13,m_HC02_EST_VC13,m_HC02_MOE_VC13,m_HC03_EST_VC13,m_HC03_MOE_VC13,m_HC04_EST_VC13,m_HC04_MOE_VC13,m_HC01_EST_VC15,m_HC01_MOE_VC15,m_HC02_EST_VC15,m_HC02_MOE_VC15,m_HC03_EST_VC15,m_HC03_MOE_VC15,m_HC04_EST_VC15,m_HC04_MOE_VC15,m_HC01_EST_VC18,m_HC01_MOE_VC18,m_HC02_EST_VC18,m_HC02_MOE_VC18,m_HC03_EST_VC18,m_HC03_MOE_VC18,m_HC04_EST_VC18,m_HC04_MOE_VC18,m_HC01_EST_VC19,m_HC01_MOE_VC19,m_HC02_EST_VC19,m_HC02_MOE_VC19,m_HC03_EST_VC19,m_HC03_MOE_VC19,m_HC04_EST_VC19,m_HC04_MOE_VC19,m_HC01_EST_VC20,m_HC01_MOE_VC20,m_HC02_EST_VC20,m_HC02_MOE_VC20,m_HC03_EST_VC20,m_HC03_MOE_VC20,m_HC04_EST_VC20,m_HC04_MOE_VC20,m_county
0,Geography,Id,Id2,Households; Estimate; Total,Households; Margin of Error; Total,Families; Estimate; Total,Families; Margin of Error; Total,Married-couple families; Estimate; Total,Married-couple families; Margin of Error; Total,Nonfamily households; Estimate; Total,Nonfamily households; Margin of Error; Total,"Households; Estimate; Less than $10,000","Households; Margin of Error; Less than $10,000","Families; Estimate; Less than $10,000","Families; Margin of Error; Less than $10,000",Married-couple families; Estimate; Less than $...,Married-couple families; Margin of Error; Less...,"Nonfamily households; Estimate; Less than $10,000",Nonfamily households; Margin of Error; Less th...,"Households; Estimate; $10,000 to $14,999","Households; Margin of Error; $10,000 to $14,999","Families; Estimate; $10,000 to $14,999","Families; Margin of Error; $10,000 to $14,999","Married-couple families; Estimate; $10,000 to ...","Married-couple families; Margin of Error; $10,...","Nonfamily households; Estimate; $10,000 to $14...","Nonfamily households; Margin of Error; $10,000...","Households; Estimate; $15,000 to $24,999","Households; Margin of Error; $15,000 to $24,999","Families; Estimate; $15,000 to $24,999","Families; Margin of Error; $15,000 to $24,999","Married-couple families; Estimate; $15,000 to ...","Married-couple families; Margin of Error; $15,...","Nonfamily households; Estimate; $15,000 to $24...","Nonfamily households; Margin of Error; $15,000...","Households; Estimate; $25,000 to $34,999","Households; Margin of Error; $25,000 to $34,999","Families; Estimate; $25,000 to $34,999","Families; Margin of Error; $25,000 to $34,999","Married-couple families; Estimate; $25,000 to ...","Married-couple families; Margin of Error; $25,...","Nonfamily households; Estimate; $25,000 to $34...","Nonfamily households; Margin of Error; $25,000...","Households; Estimate; $35,000 to $49,999","Households; Margin of Error; $35,000 to $49,999","Families; Estimate; $35,000 to $49,999","Families; Margin of Error; $35,000 to $49,999","Married-couple families; Estimate; $35,000 to ...","Married-couple families; Margin of Error; $35,...","Nonfamily households; Estimate; $35,000 to $49...","Nonfamily households; Margin of Error; $35,000...","Households; Estimate; $50,000 to $74,999","Households; Margin of Error; $50,000 to $74,999","Families; Estimate; $50,000 to $74,999","Families; Margin of Error; $50,000 to $74,999","Married-couple families; Estimate; $50,000 to ...","Married-couple families; Margin of Error; $50,...","Nonfamily households; Estimate; $50,000 to $74...","Nonfamily households; Margin of Error; $50,000...","Households; Estimate; $75,000 to $99,999","Households; Margin of Error; $75,000 to $99,999","Families; Estimate; $75,000 to $99,999","Families; Margin of Error; $75,000 to $99,999","Married-couple families; Estimate; $75,000 to ...","Married-couple families; Margin of Error; $75,...","Nonfamily households; Estimate; $75,000 to $99...","Nonfamily households; Margin of Error; $75,000...","Households; Estimate; $100,000 to $149,999","Households; Margin of Error; $100,000 to $149,999","Families; Estimate; $100,000 to $149,999","Families; Margin of Error; $100,000 to $149,999","Married-couple families; Estimate; $100,000 to...",Married-couple families; Margin of Error; $100...,"Nonfamily households; Estimate; $100,000 to $1...","Nonfamily households; Margin of Error; $100,00...","Households; Estimate; $150,000 to $199,999","Households; Margin of Error; $150,000 to $199,999","Families; Estimate; $150,000 to $199,999","Families; Margin of Error; $150,000 to $199,999","Married-couple families; Estimate; $150,000 to...",Married-couple families; Margin of Error; $150...,"Nonfamily households; Estimate; $150,000 to $1...","Nonfamily households; Margin of Error; $150,00...","Households; Estimate; $200,000 or more","Households; Margin of Error; $200,000 or more","Families; Estimate; $200,000 or more","Families; Margin of Error; $200,000 or more","Married-couple families; Estimate; $200,000 or...",Married-couple families; Margin of Error; $200...,"Nonfamily households; Estimate; $200,000 or more","Nonfamily households; Margin of Error; $200,00...",Households; Estimate; Median income (dollars),Households; Margin of Error; Median income (do...,Families; Estimate; Median income (dollars),Families; Margin of Error; Median income (doll...,Married-couple families; Estimate; Median inco...,Married-couple families; Margin of Error; Medi...,Nonfamily households; Estimate; Median income ...,Nonfamily households; Margin of Error; Median ...,Households; Estimate; Mean income (dollars),Households; Margin of Error; Mean income (doll...,Families; Estimate; Mean income (dollars),Families; Margin of Error; Mean income (dollars),Married-couple families; Estimate; Mean income...,Married-couple families; Margin of Error; Mean...,Nonfamily households; Estimate; Mean income (d...,Nonfamily households; Margin of Error; Mean in...,Households; Estimate; PERCENT ALLOCATED - Hous...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Househ...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...,Households; Estimate; PERCENT ALLOCATED - Fami...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Family...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...,Households; Estimate; PERCENT ALLOCATED - Nonf...,Households; Margin of Error; PERCENT ALLOCATED...,Families; Estimate; PERCENT ALLOCATED - Nonfam...,Families; Margin of Error; PERCENT ALLOCATED -...,Married-couple families; Estimate; PERCENT ALL...,Married-couple families; Margin of Error; PERC...,Nonfamily households; Estimate; PERCENT ALLOCA...,Nonfamily households; Margin of Error; PERCENT...,
1,960100,1400000US13007960100,13007960100,804,99,528,69,345,78,276,94,21.9,8.2,11.0,7.5,10.1,10.2,42.8,16.1,6.7,3.9,7.4,5.1,4.9,4.2,5.4,5.2,6.8,3.5,3.0,3.2,4.6,4.8,14.1,9.2,7.6,4.3,10.4,6.4,7.0,6.1,3.3,4.1,12.9,6.3,12.1,7.1,13.3,10.4,13.4,12.5,24.1,8.4,30.5,10.6,28.4,10.9,13.4,11.7,7.0,4.9,10.6,7.0,16.2,10.5,0.0,12.7,11.2,5.0,12.3,6.1,11.3,8.0,7.6,10.2,1.2,1.5,1.9,2.3,2.9,3.7,0.0,12.7,0.5,0.8,0.8,1.1,1.2,1.7,0.0,12.7,43594,8322,52614,4041,54844,11041,17083,12413,48346,6931,57024,8985,N,N,31070,11981,59.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),58.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),59.8,(X),Baker County
2,960200,1400000US13007960200,13007960200,517,83,311,66,253,61,206,80,10.6,9.1,1.6,2.7,0.0,13.7,24.3,20.8,1.4,1.9,0.0,11.3,0.0,13.7,3.4,4.9,13.0,7.3,9.3,9.2,11.5,10.9,18.4,14.9,20.7,10.5,7.7,8.0,9.5,9.7,40.3,19.8,12.0,10.2,12.5,12.5,12.3,14.7,11.2,15.1,19.0,10.5,31.5,15.0,32.4,17.6,0.0,16.6,12.8,7.2,19.6,12.2,21.3,14.5,2.4,4.2,0.0,7.0,0.0,11.3,0.0,13.7,0.0,16.6,6.6,5.8,10.9,9.5,4.7,6.9,0.0,16.6,4.1,3.9,6.8,6.4,8.3,7.9,0.0,16.6,45486,18438,54926,22075,52574,17530,28106,11326,58316,13713,80776,20162,N,N,24407,6978,55.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),57.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),52.9,(X),Baker County
3,100,1400000US13095000100,13095000100,1905,220,911,173,244,117,994,210,19.0,6.9,17.9,9.2,0.0,14.2,23.9,10.7,6.3,4.2,3.5,4.5,0.0,14.2,8.9,6.8,19.6,8.1,23.3,12.9,5.3,8.4,19.8,11.7,21.6,8.7,11.0,9.3,5.7,10.0,27.5,14.2,17.4,8.4,15.9,8.6,20.1,20.2,18.7,11.8,9.6,5.6,16.1,9.5,49.2,23.9,0.0,3.7,3.6,3.0,7.5,6.2,19.7,17.5,0.0,3.7,0.6,1.0,0.0,4.0,0.0,14.2,1.2,1.9,2.3,3.5,4.8,7.0,0.0,14.2,0.0,3.7,0.0,1.9,0.0,4.0,0.0,14.2,0.0,3.7,26681,4099,30806,10054,54808,21554,21597,7978,32589,5350,40319,9670,N,N,23157,4282,46.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),38.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),49.4,(X),Dougherty County
4,200,1400000US13095000200,13095000200,1006,141,643,118,200,90,363,127,25.0,9.4,25.7,11.1,0.0,17.0,26.2,17.2,14.9,6.9,6.7,6.7,0.0,17.0,29.5,15.6,20.6,9.4,20.1,12.4,21.5,18.2,19.0,13.5,18.0,8.7,22.9,11.5,42.0,20.3,9.4,11.8,11.7,7.6,14.8,11.1,16.5,18.1,6.3,8.0,8.0,6.4,7.9,5.9,13.5,15.4,8.0,12.9,1.0,1.8,1.6,2.8,5.0,8.9,0.0,9.8,0.6,1.1,0.0,5.7,0.0,17.0,1.7,3.0,0.0,3.7,0.0,5.7,0.0,17.0,0.0,9.8,0.3,1.0,0.5,1.6,1.5,5.4,0.0,9.8,20789,3843,23750,4682,30571,5690,12351,7963,24533,4489,26745,5892,N,N,20391,6997,45.5,(X),(X),(X),(X),(X),(X),(X),(X),(X),43.1,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),47.4,(X),Dougherty County


In [64]:
df_tract_full = df_tract_full.rename(columns={'m_HC01_EST_VC01': 'total_household', 'm_HC02_EST_VC01': 'total_family', 
                                             'm_HC03_EST_VC01': 'total_married', 'm_HC04_EST_VC01': 'total_nonfamily',
                                             'm_HC01_EST_VC02': 'h_less_10k', 'm_HC02_EST_VC02': 'f_less_10k',
                                             'm_HC03_EST_VC02': 'm_less_10k', 'm_HC04_EST_VC02': 'nf_less_10k',
                                             'm_HC01_EST_VC03': 'h_10k_14999', 'm_HC02_EST_VC03': 'f_10k_14999',
                                             'm_HC03_EST_VC03': 'm_10k_14999', 'm_HC04_EST_VC03': 'nf_10k_14999',
                                             'm_HC01_EST_VC04': 'h_15k_24999', 'm_HC02_EST_VC04': 'f_15k_24999',
                                             'm_HC03_EST_VC04': 'm_15k_24999', 'm_HC04_EST_VC04': 'nf_15k_24999',
                                             'm_HC01_EST_VC05': 'h_25k_34999', 'm_HC02_EST_VC05': 'f_25k_34999',
                                             'm_HC03_EST_VC05': 'm_25k_34999', 'm_HC04_EST_VC05': 'nf_25k_34999',
                                             'm_HC01_EST_VC06': 'h_35k_49999', 'm_HC02_EST_VC06': 'f_35k_49999',
                                             'm_HC03_EST_VC06': 'm_35k_49999', 'm_HC04_EST_VC06': 'nf_35k_49999',
                                             'm_HC01_EST_VC07': 'h_50k_74999', 'm_HC02_EST_VC07': 'f_50k_74999',
                                             'm_HC03_EST_VC07': 'm_50k_74999', 'm_HC04_EST_VC07': 'nf_50k_74999',
                                             'm_HC01_EST_VC08': 'h_75k_99999', 'm_HC02_EST_VC08': 'f_75k_99999',
                                             'm_HC03_EST_VC08': 'm_75k_99999', 'm_HC04_EST_VC08': 'nf_75k_99999',
                                             'm_HC01_EST_VC09': 'h_100k_149999', 'm_HC02_EST_VC09': 'f_100k_149999',
                                             'm_HC03_EST_VC09': 'm_100k_149999', 'm_HC04_EST_VC09': 'nf_100k_149999',
                                             'm_HC01_EST_VC10': 'h_150k_199999', 'm_HC02_EST_VC10': 'f_150k_199999',
                                             'm_HC03_EST_VC10': 'm_150k_199999', 'm_HC04_EST_VC10': 'nf_150k_199999',
                                             'm_HC01_EST_VC11': 'h_200k_more', 'm_HC02_EST_VC11': 'f_200k_more',
                                             'm_HC03_EST_VC11': 'm_200k_more', 'm_HC04_EST_VC11': 'nf_200k_more',
                                             'm_HC01_EST_VC13': 'h_medinc', 'm_HC02_EST_VC13': 'f_medinc',
                                             'm_HC03_EST_VC13': 'm_medinc', 'm_HC04_EST_VC13': 'nf_medinc',
                                             'm_HC01_EST_VC15': 'h_meaninc', 'm_HC02_EST_VC15': 'f_meaninc',
                                             'm_HC03_EST_VC15': 'm_meaninc', 'm_HC04_EST_VC15': 'nf_meaninc',
                                             'la_HD01': 'tract_pop', 'la_HD02': 'tract_total_housingunits',
                                             'la_SUBHD0301': 'tract_totalarea', 'la_SUBHD0302': 'tract_totalarea_water',
                                             'la_SUBHD0303': 'tract_totalarea_land', 'la_SUBHD0401': 'tract_pop_density',
                                             'la_SUBHD0402': 'tract_housing_density'})
df_tract_full.head()

Unnamed: 0,tract,la_GEO.id,la_GEO.id2,la_GCT_STUB.target-geo-id,la_GCT_STUB.target-geo-id2,tract_pop,tract_total_housingunits,tract_totalarea,tract_totalarea_water,tract_totalarea_land,tract_pop_density,tract_housing_density,la_county,total_household,total_family,total_married,total_nonfamily,h_less_10k,f_less_10k,m_less_10k,nf_less_10k,h_10k_14999,f_10k_14999,m_10k_14999,nf_10k_14999,h_15k_24999,f_15k_24999,m_15k_24999,nf_15k_24999,h_25k_34999,f_25k_34999,m_25k_34999,nf_25k_34999,h_35k_49999,f_35k_49999,m_35k_49999,nf_35k_49999,h_50k_74999,f_50k_74999,m_50k_74999,nf_50k_74999,h_75k_99999,f_75k_99999,m_75k_99999,nf_75k_99999,h_100k_149999,f_100k_149999,m_100k_149999,nf_100k_149999,h_150k_199999,f_150k_199999,m_150k_199999,nf_150k_199999,h_200k_more,f_200k_more,m_200k_more,nf_200k_more,h_medinc,f_medinc,m_medinc,nf_medinc,h_meaninc,f_meaninc,m_meaninc,nf_meaninc
0,100,0500000US13095,13095,1400000US13095000100,13095000100,5944,2545,3.17,0.02,3.15,1886.7,807.8,Dougherty County,1905,911,244,994,19.0,17.9,0.0,23.9,6.3,3.5,0.0,8.9,19.6,23.3,5.3,19.8,21.6,11.0,5.7,27.5,17.4,15.9,20.1,18.7,9.6,16.1,49.2,0.0,3.6,7.5,19.7,0.0,0.6,0.0,0.0,1.2,2.3,4.8,0.0,0.0,0.0,0.0,0.0,0.0,26681,30806,54808,21597,32589,40319,N,23157
1,200,0500000US13095,13095,1400000US13095000200,13095000200,3123,1348,1.91,0.04,1.87,1667.0,719.5,Dougherty County,1006,643,200,363,25.0,25.7,0.0,26.2,14.9,6.7,0.0,29.5,20.6,20.1,21.5,19.0,18.0,22.9,42.0,9.4,11.7,14.8,16.5,6.3,8.0,7.9,13.5,8.0,1.0,1.6,5.0,0.0,0.6,0.0,0.0,1.7,0.0,0.0,0.0,0.0,0.3,0.5,1.5,0.0,20789,23750,30571,12351,24533,26745,N,20391
2,400,0500000US13095,13095,1400000US13095000400,13095000400,6276,3302,2.9,0.05,2.84,2206.7,1161.0,Dougherty County,2786,1446,623,1340,11.5,10.6,0.0,12.5,7.4,7.2,0.0,9.8,14.0,10.2,10.6,18.1,17.7,23.6,15.9,11.4,16.1,11.5,2.4,18.7,17.2,16.1,35.6,18.4,9.8,13.5,18.5,5.7,3.2,1.1,2.6,5.4,0.4,0.7,1.6,0.0,2.9,5.5,12.8,0.0,32372,32139,59688,31223,45856,50867,N,39857
3,501,0500000US13095,13095,1400000US13095000501,13095000501,4798,2365,1.76,0.02,1.74,2764.5,1362.6,Dougherty County,2281,1239,564,1042,1.3,1.7,0.0,0.9,4.7,2.2,0.0,7.8,8.1,5.8,0.0,12.1,13.2,13.1,1.4,16.2,14.1,12.4,0.0,22.0,24.4,24.9,27.8,13.6,17.1,20.4,29.8,13.2,9.5,13.5,27.7,4.8,3.8,2.3,5.1,5.6,3.8,3.7,8.2,3.8,55366,57303,90182,42694,72521,72277,N,70103
4,502,0500000US13095,13095,1400000US13095000502,13095000502,4151,1938,2.68,0.03,2.65,1565.4,730.8,Dougherty County,1811,1198,969,613,3.8,1.5,0.0,8.3,4.5,0.0,0.0,13.4,3.4,2.0,2.5,6.0,9.7,7.1,5.8,14.8,14.9,14.6,8.6,15.3,17.3,15.7,13.4,20.4,10.0,11.4,13.3,7.3,18.4,23.7,27.5,9.3,11.3,14.4,17.1,3.9,6.7,9.6,11.9,1.1,64750,95313,112026,37146,88223,105355,N,54071


## export data

In [65]:
#df_block_full.to_csv('full_blockgroup.csv', index = False)

In [70]:
df_tract_full.to_csv('full_tract.csv', index = False)

## work for shelena

In [67]:
df.head()
df['Project'].value_counts()

Emergency Repairs                      267
Tenant Based Rental Assistance         160
Public Service                          80
Energy Efficiency                       68
New Construction                        34
Homeowner Rehabilitation                32
Rental Rehabilitation                   29
AHOP Financing                          23
CHDO                                    22
Commercial Development Program          21
Section 108 Loan Program                21
Economic Development                    16
Commercial Facade                        9
Downtown Incubator                       9
Public Facilities                        9
Demolition                               7
Disposition                              6
Acquisition/Rehabilitation               6
Acquisition                              5
Hudson Lane Sewer Connections            5
Beautification                           4
Acquisition/Rental Rehabilitation        3
Relocation                               3
Lead-Based 

In [68]:
def years(projects):
    for i in range(0, len(projects)):
        if projects[i] == 'Tenant Based Rental Assistance':
            print(df['Plan Year'][i])
        
years(df['Project'])

2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2012
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2013
2014
2014
2014
2014
2014
2014
2014
2014
2014
2014
2014
2014
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2015
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2016
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017
2017


In [69]:
list(df)

['Parcel ID',
 'Address',
 'City',
 'State',
 'XY Coordinates',
 'Plan Year',
 'IDIS Project',
 'Project',
 'IDIS Activity ID',
 'Activity Status',
 'Program',
 'Funded Amount',
 'Draw Amount',
 'Balance',
 'Multihome_Type',
 'Multihome']