In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Distribution of population within Hennepin County Census Tracts
We know that the census bureau defines a population range within census tracts. [1200, 8000] residents and [480, 3200] housing units. But does our data follow this guideline?
1. What is the mean population per census tract?
2. What is the standard deviation?
3. Can we plot a smooth density over the distribution of populations for census tracts?
4. Can we do the same for block groups?
5. Can we do the same for blocks?

While we are at it, let's plot the distribution of incomes as well.
1. What was the mean household income per census tract? Median? Standard Deviation?
2. Can we plot the distribution of average household income per census tract?
3. Can we do this for block groups?
4. Can we do this for blocks? How different does this look based on the fact that income points are randomly distributed within block groups?

In [5]:
# function which, given a dataframe with aggregated data to plot as data['x'], will plot a kernel density 

def plot_kernel_density(data, plt_title='', plt_xlabel='', plt_ylabel=''):
    sns.distplot(data['x'],
                 hist=False,
                 kde=True,
                 rug=True,
                 kde_kws={'linewidth': 3},
                 rug_kws={'color': 'black'},
                 color='darkblue')
    
    plt.title(plt_title)
    plt.xlabel(plt_xlabel)
    plt.ylabel(plt_ylabel)
    
    figname=plt_title+".png"
    plt.savefig(figname)
    
    plt.show()
    return

In [7]:
fpath = "./outputs/hennepin_incomes.csv"
hennepin = pd.read_csv(filepath_or_buffer=fpath,
                       header=0,
                       index_col=False,
                       usecols=['hh_income', 'POP100', 'COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])

In [17]:
hennepin_block_pop = hennepin.drop_duplicates(subset=['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])
hennepin_tract_pop = hennepin_block_pop.groupby(by=['COUNTY', 'TRACT'])
hennepin_tract_pop = hennepin_tract_pop.agg({'POP100': 'sum'})
hennepin_tract_pop = hennepin_tract_pop.rename(columns={'POP100':'x'})
print("Hennepin census tract population\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_tract_pop['x'].mean(), 
                                                                                                                                           hennepin_tract_pop['x'].std(), 
                                                                                                                                           hennepin_tract_pop['x'].median(), 
                                                                                                                                           hennepin_tract_pop['x'].min(), 
                                                                                                                                           hennepin_tract_pop['x'].max(), 
                                                                                                                                           hennepin_tract_pop['x'].skew(),
                                                                                                                                           hennepin_tract_pop['x'].kurtosis()) )

plot_kernel_density(hennepin_tract_pop, plt_title="Hennepin County Census Tract Populations", plt_xlabel="Tract Population", plt_ylabel="Density")

Hennepin census tract population
	mean:	3768.073578595318
	stdev:	1640.0685515027797
	median:	3515.0
	min:	145.0
	max:	12433.0
	skew:	1.5806542625798057
	kurt:	5.223498256277633
	 


In [19]:
hennepin_block_pop = hennepin.drop_duplicates(subset=['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])
hennepin_block_grp_pop = hennepin_block_pop.groupby(by=['COUNTY', 'TRACT', 'BLKGRP'])
hennepin_block_grp_pop = hennepin_block_grp_pop.agg({'POP100': 'sum'})
hennepin_block_grp_pop = hennepin_block_grp_pop.rename(columns={'POP100':'x'})
print("Hennepin census block group population\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_block_grp_pop['x'].mean(), 
                                                                                                                                           hennepin_block_grp_pop['x'].std(), 
                                                                                                                                           hennepin_block_grp_pop['x'].median(), 
                                                                                                                                           hennepin_block_grp_pop['x'].min(), 
                                                                                                                                           hennepin_block_grp_pop['x'].max(), 
                                                                                                                                           hennepin_block_grp_pop['x'].skew(),
                                                                                                                                           hennepin_block_grp_pop['x'].kurtosis()) )

plot_kernel_density(hennepin_block_grp_pop, plt_title="Hennepin County Census Block Group Populations", plt_xlabel="Block Group Population", plt_ylabel="Density")

Hennepin census block group population
	mean:	1157.9177800616649
	stdev:	570.7401351533832
	median:	1031.0
	min:	31.0
	max:	6494.0
	skew:	3.6575662552088075
	kurt:	22.838591284179046
	 


In [20]:
hennepin_block_pop = hennepin.drop_duplicates(subset=['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])
hennepin_block_pop = hennepin_block_pop.groupby(by=['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])
hennepin_block_pop = hennepin_block_pop.agg({'POP100': 'sum'})
hennepin_block_pop = hennepin_block_pop.rename(columns={'POP100':'x'})
print("Hennepin census block population\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_block_pop['x'].mean(), 
                                                                                                                                           hennepin_block_pop['x'].std(), 
                                                                                                                                           hennepin_block_pop['x'].median(), 
                                                                                                                                           hennepin_block_pop['x'].min(), 
                                                                                                                                           hennepin_block_pop['x'].max(), 
                                                                                                                                           hennepin_block_pop['x'].skew(),
                                                                                                                                           hennepin_block_pop['x'].kurtosis()) )

plot_kernel_density(hennepin_block_pop, plt_title="Hennepin County Census Block Populations", plt_xlabel="Block Population", plt_ylabel="Density")

Hennepin census block population
	mean:	69.88735190124682
	stdev:	99.97792542679582
	median:	49.0
	min:	0.0
	max:	3043.0
	skew:	6.561999452663443
	kurt:	89.02392445901091
	 


In [22]:
hennepin_tract_incs = hennepin.groupby(by=['COUNTY', 'TRACT'])
hennepin_tract_incs = hennepin_tract_incs.agg({'hh_income': 'mean'})
hennepin_tract_incs = hennepin_tract_incs.rename(columns={'hh_income':'x'})
print("Hennepin census tract income\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_tract_incs['x'].mean(), 
                                                                                                                                           hennepin_tract_incs['x'].std(), 
                                                                                                                                           hennepin_tract_incs['x'].median(), 
                                                                                                                                           hennepin_tract_incs['x'].min(), 
                                                                                                                                           hennepin_tract_incs['x'].max(), 
                                                                                                                                           hennepin_tract_incs['x'].skew(),
                                                                                                                                           hennepin_tract_incs['x'].kurtosis()) )

plot_kernel_density(hennepin_tract_incs, plt_title="Hennepin County Census Tract Incomes", plt_xlabel="Tract Average Household Income", plt_ylabel="Density")

Hennepin census tract income
	mean:	78729.96440028196
	stdev:	30323.468625499016
	median:	74916.83644859813
	min:	18899.53488372093
	max:	163509.37420814479
	skew:	0.39261938016800774
	kurt:	-0.5861147599017333
	 


In [23]:
hennepin_block_grp_incs = hennepin.groupby(by=['COUNTY', 'TRACT', 'BLKGRP'])
hennepin_block_grp_incs = hennepin_block_grp_incs.agg({'hh_income': 'mean'})
hennepin_block_grp_incs = hennepin_block_grp_incs.rename(columns={'hh_income':'x'})
print("Hennepin census block group income\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_block_grp_incs['x'].mean(), 
                                                                                                                                           hennepin_block_grp_incs['x'].std(), 
                                                                                                                                           hennepin_block_grp_incs['x'].median(), 
                                                                                                                                           hennepin_block_grp_incs['x'].min(), 
                                                                                                                                           hennepin_block_grp_incs['x'].max(), 
                                                                                                                                           hennepin_block_grp_incs['x'].skew(),
                                                                                                                                           hennepin_block_grp_incs['x'].kurtosis()) )

plot_kernel_density(hennepin_block_grp_incs, plt_title="Hennepin County Census Block Group Incomes", plt_xlabel="Block Group Household Income", plt_ylabel="Density")

Hennepin census block group income
	mean:	81334.00026144038
	stdev:	34201.44446722149
	median:	77368.95182724252
	min:	8022.5904059040595
	max:	198311.22253521127
	skew:	0.4875637713156396
	kurt:	-0.2912669098995275
	 


In [24]:
hennepin_block_incs = hennepin.groupby(by=['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'])
hennepin_block_incs = hennepin_block_incs.agg({'hh_income': 'mean'})
hennepin_block_incs = hennepin_block_incs.rename(columns={'hh_income':'x'})
print("Hennepin census block income\n\tmean:\t{0}\n\tstdev:\t{1}\n\tmedian:\t{2}\n\tmin:\t{3}\n\tmax:\t{4}\n\tskew:\t{5}\n\tkurt:\t{6}\n\t ".format(hennepin_block_incs['x'].mean(), 
                                                                                                                                           hennepin_block_incs['x'].std(), 
                                                                                                                                           hennepin_block_incs['x'].median(), 
                                                                                                                                           hennepin_block_incs['x'].min(), 
                                                                                                                                           hennepin_block_incs['x'].max(), 
                                                                                                                                           hennepin_block_incs['x'].skew(),
                                                                                                                                           hennepin_block_incs['x'].kurtosis()) )

plot_kernel_density(hennepin_block_incs, plt_title="Hennepin County Census Block Incomes", plt_xlabel="Block Household Income", plt_ylabel="Density")

Hennepin census block income
	mean:	87184.27052255278
	stdev:	45296.42273897957
	median:	79793.33333333333
	min:	0.0
	max:	801000.0
	skew:	1.950546017132531
	kurt:	11.90990254694749
	 
