In [None]:
import arcpy
import os
import numpy
import pandas
from datetime import datetime, timedelta

Load FRED/RTI microdata into database from files.

    Assumes a folder called "data/" with many subfolders representing counties. Each county subfolder should have a file "households.txt". Concatenates all these county incomes together and saves the full list as a csv.

In [None]:
arcpy.env.workspace = "C:\\Users\\Jayant\\Desktop\\SSTD\\workspace"
print(arcpy.env.workspace)

In [56]:
def read_households_file(data_root_folder, save_filename):
    full_dataset = pandas.DataFrame()  # empty dataframe to start
    numrecs = 0
    #directory = os.fsencode(data_root_folder)
    data_root_folder = os.path.join(arcpy.env.workspace, data_root_folder)
    for county_folder in os.listdir(data_root_folder):
        county_foldername = os.fsdecode(county_folder)
        folderpath = os.fsdecode(os.path.join(data_root_folder,county_foldername))
        if (folderpath.endswith(('.zip', '.csv'))):   # ignore anything that isn't a folder
            continue        
        for file in os.listdir(folderpath):
            filename = os.fsdecode(file)
            if (filename == "households.txt"):     # read households file
                filepath = os.fsdecode(os.path.join(folderpath,filename))
                print("Loading file: {0}".format(filepath))
                file_data = pandas.read_csv(filepath_or_buffer = filepath,
                                            sep='\t',                      # tab-delimited
                                            header=0,
                                            index_col='sp_id',
                                            usecols=['sp_id', 'hh_race', 'hh_income', 'latitude', 'longitude'],
                                            dtype={
                                                'sp_id': str,
                                                'hh_race': int,
                                                'hh_income': int,
                                                'latitude': str,
                                                'longitude': str
                                            },
                                            engine='c' )
                (nrows, _) = file_data.shape
                print("Read {0} income records. Merging with dataset".format(nrows))
                numrecs += nrows
                full_dataset = pandas.concat([full_dataset, file_data])
                
    # write dataset to file
    print("\nFinished merging {0} income records together. Writing full file to disk...".format(numrecs))
    save_fpath = os.fsdecode(os.path.join(data_root_folder, save_filename))
    full_dataset.to_csv(path_or_buf=save_fpath)
    print("Done.")
    return

In [101]:
def read_join_people_with_households(data_root_folder, save_filename):
    full_dataset = pandas.DataFrame()  # empty dataframe to start
    numrecs = 0
    test_flag = 0
    #directory = os.fsencode(data_root_folder)
    data_root_folder = os.path.join(arcpy.env.workspace, data_root_folder)
    for county_folder in os.listdir(data_root_folder):
        county_foldername = os.fsdecode(county_folder)
        folderpath = os.fsdecode(os.path.join(data_root_folder,county_foldername))
        if (folderpath.endswith(('.zip', '.csv'))):   # ignore anything that isn't a folder
            continue        
        for file in os.listdir(folderpath):
            filename = os.fsdecode(file)
            if (filename == "people.txt"):     # read people file
                filepath = os.fsdecode(os.path.join(folderpath,filename))
                print("Loading file: {0}".format(filepath))
                people_file_data = pandas.read_csv(filepath_or_buffer = filepath,
                                            sep='\t',                      # tab-delimited
                                            header=0,
                                            #index_col='sp_id',
                                            usecols=['sp_id', 'sp_hh_id'],
                                            dtype={
                                                'sp_id': str,
                                                'sp_hh_id': str,
                                            },
                                            engine='c' )
               
                (nrows, _) = people_file_data.shape
                print("Read {0} people records. Aggregating to get the house size".format(nrows))
                hh_size = people_file_data.groupby('sp_hh_id', as_index=False).count()
                
                #hh_size.set_index('sp_hh_id')
            if(filename == "households.txt"): # read households file
                filepath = os.fsdecode(os.path.join(folderpath,filename))
                print("Loading file: {0}".format(filepath))
                hh_file_data = pandas.read_csv(filepath_or_buffer = filepath,
                                            sep='\t',                      # tab-delimited
                                            header=0,
                          #                  index_col='sp_id',
                                            usecols=['sp_id', 'hh_race', 'hh_income', 'latitude', 'longitude'],
                                            dtype={
                                                'sp_id': str,
                                                'hh_race': int,
                                                'hh_income': int,
                                                'latitude': str,
                                                'longitude': str
                                            },
                                            engine='c' )
                (nrows, _) = hh_file_data.shape
                print("Read {0} income records. joining with household size".format(nrows))
#        
        hh_file_data = hh_file_data.rename(columns={'sp_id':'sp_hh_id'})
        hh_size = hh_size.rename(columns={'sp_id':'hh_size'})
        hh_size.set_index('sp_hh_id')
        hh_file_data.set_index('sp_hh_id')
        
        file_data = hh_file_data.merge(hh_size)
        full_dataset = pandas.concat([full_dataset, file_data])
#        file_data = hh_file_data.join(hh_size)
        (nrows, _) = file_data.shape
        
        numrecs += nrows
        if(test_flag == 1):
            break
    # write dataset to file
    print("\nFinished merging {0} income records together. Writing full file to disk...".format(numrecs))
    save_fpath = os.fsdecode(os.path.join(data_root_folder, save_filename))
    full_dataset.to_csv(path_or_buf=save_fpath)
    print("Done.")
    return


# Condense many households files into one dataframe by iterating through data folder
data_folder = "..\\data"            # position of data folder relative to this notebook file
save_fname = "rti_race_incomes.csv"     # filename to export merged csv as (within data_folder directory)
 
read_join_people_with_households(data_folder, save_fname)

Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27001\households.txt
Read 7299 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27001\people.txt
Read 15902 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27003\households.txt
Read 121227 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27003\people.txt
Read 328700 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27005\households.txt
Read 13224 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27005\people.txt
Read 31760 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27007\households.txt
Read 16846 income records. joining with household size
Loading file: C:\Users\Jayant\Desk

Read 37363 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27061\households.txt
Read 18773 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27061\people.txt
Read 44047 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27063\households.txt
Read 4429 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27063\people.txt
Read 10240 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27065\households.txt
Read 6413 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27065\people.txt
Read 16097 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27067\households.txt
Read 16732 income records. join

Read 30408 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27121\households.txt
Read 4736 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27121\people.txt
Read 10725 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27123\households.txt
Read 202691 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27123\people.txt
Read 487647 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27125\households.txt
Read 1737 income records. joining with household size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27125\people.txt
Read 4075 people records. Aggregating to get the house size
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27127\households.txt
Read 6580 income records. join

In [57]:
# Condense many households files into one dataframe by iterating through data folder
data_folder = "..\\data"            # position of data folder relative to this notebook file
save_fname = "rti_race_incomes.csv"     # filename to export merged csv as (within data_folder directory)
 
read_households_file(data_folder, save_fname)

Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27001\households.txt
Read 7299 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27003\households.txt
Read 121227 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27005\households.txt
Read 13224 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27007\households.txt
Read 16846 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27009\households.txt
Read 15079 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27011\households.txt
Read 2293 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27013\households.txt
Read 24445 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27015\households.txt
Read 1078

Read 6580 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27129\households.txt
Read 6564 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27131\households.txt
Read 22315 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27133\households.txt
Read 3918 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27135\households.txt
Read 6300 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27137\households.txt
Read 84783 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27139\households.txt
Read 45108 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\workspace\..\data\27141\households.txt
Read 30212 income records. Merging with dataset
Loading file: C:\Users\Jayant\Desktop\SSTD\wor

In [None]:
# Geocode table

# data_folder = "../data"            # position of data folder relative to this notebook file
# save_fname = "rti_incomes.csv"     # filename to export merged csv as (within data_folder directory)
# out_tablename = "rti_income_table"

# rti_incomes_path = os.fsdecode(os.path.join(data_folder, save_fname))

# outtable = arcpy.TableToTable_conversion(in_rows = rti_incomes_path, 
 #                    out_path = arcpy.env.workspace, 
  #                   out_name = out_tablename)

In [102]:
rti_income_feature_class = "rti_race_income_hhsize_feature_class"
arcpy.management.XYTableToPoint(in_table = "C:\\Users\\Jayant\\Desktop\\SSTD\\data\\rti_race_incomes.csv", 
                       out_feature_class = rti_income_feature_class, 
                       x_field = "longitude", 
                       y_field = "latitude")

In [103]:
print(arcpy.GetCount_management('rti_joined_feature_class.shp'))
print(arcpy.GetCount_management('rti_race_income_feature_class.shp'))
print(arcpy.GetCount_management('rti_joined_ri_feature_class.shp'))
print(arcpy.GetCount_management('rti_joined_rih_feature_class.shp'))

2087218
2087218
2087218
2087218


In [9]:
# Computationally efficient Gini function (from https://github.com/oliviaguest/gini) 
#    and https://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
def gini(array):
    array = array.flatten()
    print("Calculating Gini. Example of data: ")
    print(array[:5])
    # Values cannot be negative:
    if (numpy.amin(array) < 0):
        array -= numpy.amin(array)
    
    # Sort values:
    array = numpy.sort(array)
    
    # Index and count of array elements:
    index = numpy.arange(1,array.shape[0]+1)
    n = array.shape[0]
    
    # Gini coefficient:
    return (   (  numpy.sum(( (2*index) - n - 1 ) * array)  ) / (n * numpy.sum(array))   )


In [10]:
incomefile = os.path.join(arcpy.env.workspace, "..\\data\\rti_incomes.csv")
incomelist = pandas.read_csv(filepath_or_buffer=incomefile,
                             header=0,
                             index_col=False,
                             usecols=['hh_income'],
                             dtype={'hh_income': numpy.longlong},
                             engine='c')
print("Read in file. Calculating Gini...")
g = gini(incomelist.to_numpy())
print("Gini is {0}".format(g))

Read in file. Calculating Gini...
Calculating Gini. Example of data: 
[20000 35600  7800 19900 13100]
Gini is 0.3146037264674286


In [50]:
arcpy.env.workspace = "C:\\Users\\Jayant\\Desktop\\SSTD\\workspace"
fields = [field.name for field in arcpy.ListFields('tl_2020_27_bg20.shp')]
print(fields)
print("\n")
fields = [field.name for field in arcpy.ListFields('tl_2020_27_tabblock20.shp')]
print(fields)

print(arcpy.GetCount_management('tl_2020_27_bg20.shp'))
print(arcpy.GetCount_management('tl_2020_27_tabblock20.shp'))

arcpy.CopyFeatures_management('tl_2020_27_bg20.shp','bg_20_filter.shp')
dropFields = ['NAMELSAD20', 'MTFCC20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20']
arcpy.DeleteField_management('bg_20_filter.shp', dropFields)

arcpy.CopyFeatures_management('tl_2020_27_tabblock20.shp','tabblock_20_filter.shp')
dropFields = ['STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'NAME20', 'MTFCC20', 'UR20', 'UACE20', 'UATYPE20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20']
arcpy.DeleteField_management('tabblock_20_filter.shp', dropFields)
fields = [field.name for field in arcpy.ListFields('bg_20_filter.shp')]
print(fields)
fields = [field.name for field in arcpy.ListFields('tabblock_20_filter.shp')]
print(fields)

['FID', 'Shape', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLKGRPCE20', 'GEOID20', 'NAMELSAD20', 'MTFCC20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20']


['FID', 'Shape', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLOCKCE20', 'GEOID20', 'NAME20', 'MTFCC20', 'UR20', 'UACE20', 'UATYPE20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20']
4706
198705
['FID', 'Shape', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLKGRPCE20', 'GEOID20']
['FID', 'Shape', 'BLOCKCE20', 'GEOID20']


In [None]:
arcpy.env.workspace = "C:\\Users\\Jayant\\Desktop\\SSTD\\workspace"
print(arcpy.env.workspace)
t1 = datetime.now()
arcpy.SpatialJoin_analysis('tabblock_20_filter.shp', # target_features
                           'bg_20_filter.shp', # join_features 
                           '2020_hierarchy.shp', # out_feature_class
                           'JOIN_ONE_TO_ONE', # join_operation
                           'KEEP_ALL', # join_type
                           None, # field_mapping
                           'WITHIN', # match_option
                           None, # search_radius
                           None # distance_field_name
                           )
t2 = datetime.now()
t2_delta = t2-t1

In [51]:
fields = [field.name for field in arcpy.ListFields('hierarchy_2020.shp')]
print(fields)

print(arcpy.GetCount_management('hierarchy_2020.shp'))



['FID', 'Shape', 'Join_Count', 'TARGET_FID', 'BLOCKCE20', 'GEOID20', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLKGRPCE20', 'GEOID20_1', 'Shape_Leng', 'Shape_Area']
198705


In [47]:
arcpy.env.workspace = "C:\\Users\\Jayant\\Desktop\\SSTD\\test_workspace"
print(arcpy.env.workspace)
print(arcpy.GetCount_management('tl_2020_27001_bg20.shp'))
print(arcpy.GetCount_management('tl_2020_27001_tabblock20.shp'))
arcpy.CopyFeatures_management('tl_2020_27001_bg20.shp','bg_20_filter.shp')
arcpy.CopyFeatures_management('tl_2020_27001_tabblock20.shp','tablock_20_filter.shp')
dropFields = ['NAMELSAD20', 'MTFCC20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20', 'GEOID20']
arcpy.DeleteField_management('bg_20_filter.shp', dropFields)
dropFields = ['MTFCC20', 'UR20', 'UACE20', 'UATYPE20', 'FUNCSTAT20', 'ALAND20', 'AWATER20', 'INTPTLAT20', 
              'INTPTLON20', 'STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'GEOID20', 'NAME20']
arcpy.DeleteField_management('tablock_20_filter.shp', dropFields)
t1 = datetime.now()
arcpy.SpatialJoin_analysis('tablock_20_filter.shp', # target_features
                           'bg_20_filter.shp', # join_features 
                           '2020_small_hierarchy.shp', # out_feature_class, 'STATEFP20', 'COUNTYFP20', 'TRACTCE20'
                           'JOIN_ONE_TO_ONE', # join_operation
                           'KEEP_ALL', # join_type
                           None, # field_mapping
                           'WITHIN', # match_option
                           None, # search_radius
                           None # distance_field_name
                           )
t2 = datetime.now()
t2_delta = t2-t1
print("Join completed in {0} seconds ({1} minutes)".format(t2_delta.total_seconds(), t2_delta.total_seconds()/60))

C:\Users\Jayant\Desktop\SSTD\test_workspace
19
1746
Join completed in 4.307947 seconds (0.07179911666666668 minutes)


In [None]:
arcpy.env.workspace = "C:\\Users\\Jayant\\Desktop\\SSTD\\workspace"
print(arcpy.env.workspace)
# Create income means by block, block group, tract, county
t1 = datetime.now()
arcpy.SpatialJoin_analysis('tl_2020_27001_tabblock10.shp', # target_features
                           'rti_income_feature_class.shp', # join_features 
                           'rti_income_by_block_groups_2010_27001.shp', # out_feature_class
                           'JOIN_ONE_TO_MANY', # join_operation
                           'KEEP_ALL', # join_type
                           None, # field_mapping
                           'CONTAINS', # match_option
                           None, # search_radius
                           None # distance_field_name
                           )
t2 = datetime.now()
t2_delta = t2-t1
print("Assigned income points to block groups in {0} seconds ({1} minutes)".format(t2_delta.total_seconds(), t2_delta.total_seconds()/60))


In [104]:
# export grouped feature class to table for Gini calculation
in_feature_class = "rti_joined_rih_feature_class.shp"
out_location = 'C:\\Users\\Jayant\\Desktop\\SSTD\\outputs\\'
out_filename = "rti_joined_race_income_hhsize_features.csv"

outtable = arcpy.TableToTable_conversion(in_rows = in_feature_class, 
                     out_path = out_location, 
                     out_name = out_filename)

In [None]:
in_filename = 'C:\\Users\\alex\\Documents\\code\\maup_income_inequality\\maup_inequality\\outputs\\rti_income_grouped_by_block.csv'
def getCountyIoD(in_filename, level_of_aggregation):
    incomelist = pandas.read_csv(filepath_or_buffer=in_filename,
                                 header=0,
                                 index_col=False,
                                 usecols=['hh_income', 'GEOID'],
                                 engine='c')

    # drop rows with NA - if there's no data for a census block, we will ignore it
    # incomelist = incomelist.dropna()

    # calculate mean income of each census block
    # mean_income = incomelist.groupby('GEOID').mean()
    # mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
    # mean_incomes = mean_income['hh_income']
    # print(mean_income.head)

    # print("Read in file. Calculating Gini...")
    # g = gini(mean_incomes.to_numpy())
    # print("Gini is {0}".format(g))
    


In [None]:
def Gini_From_Shape_File(input_shape_feature, shape_group_name, input_shape_statefips_name, input_shape_countyfips_name):
    ## Filter shape file down to just Minnesota for speed
    print("Creating subset of shape file just for Minnesota data...")
    t1 = datetime.now()
    # Select only census blocks in MN
    arcpy.SelectLayerByAttribute_management(input_shape_feature, 
                                            'NEW_SELECTION', 
                                            input_shape_statefips_name+" = '27'")

    # Write the selected features to a new featureclass
    arcpy.CopyFeatures_management(input_shape_feature, shape_group_name+"_mn")
    t2 = datetime.now()
    t2_delta = t2-t1
    print("Created Subset of {0} in Minnesota only in {1} seconds".format(
        shape_group_name,
        t2_delta.total_seconds()))
    
    print("Assigning income points to groups...")
    ## Create income means by block, block group, tract, county
    t1 = datetime.now()
    arcpy.SpatialJoin_analysis(shape_group_name+"_mn", # target_features
                               'rti_income_feature_class', # join_features 
                               'rti_income_by_'+shape_group_name, # out_feature_class
                               'JOIN_ONE_TO_MANY', # join_operation
                               'KEEP_ALL', # join_type
                               None, # field_mapping
                               'CONTAINS', # match_option
                               None, # search_radius
                               None # distance_field_name
                               )
    t2 = datetime.now()
    t2_delta = t2-t1
    print("Assigned income points to {0} in {1} seconds ({2} minutes)".format(
        shape_group_name,
        t2_delta.total_seconds(), 
        t2_delta.total_seconds()/60))
    
    print("Saving feature layer and reading in as raster...")
    ## export grouped feature class to table for Gini calculation
    out_location = 'C:\\Users\\alex\\Documents\\code\\maup_income_inequality\\maup_inequality\\outputs\\'
    out_filename = "rti_income_grouped_by_" + shape_group_name + ".csv"
    outtable = arcpy.TableToTable_conversion(
                        in_rows = 'rti_income_by_'+shape_group_name, 
                        out_path = out_location, 
                        out_name = out_filename)

    ## Calculate GINI
    incomelist = pandas.read_csv(filepath_or_buffer=out_location+out_filename,
                                 header=0,
                                 index_col=False,
                                 usecols=['hh_income', input_shape_countyfips_name],
                                 engine='c')

    # drop rows with NA - if there's no data for a census block, we will ignore it
    incomelist = incomelist.dropna()

    print("Calcuating Gini...")
    # calculate mean income of each census block
    mean_income = incomelist.groupby(input_shape_countyfips_name).mean()
    mean_income.hh_income = mean_income[input_shape_countyfips_name].astype(numpy.longlong)
    #mean_incomes = mean_income['hh_income']
    #print(mean_income.head)

    g = gini(mean_incomes.to_numpy())
    print("Gini is {0}".format(g))
    return
    
    
    
Gini_From_Shape_File('USA Block Groups', 'census_block_group', 'STATE_FIPS', 'STCOFIPS')

In [None]:
input_shape_feature = 'USA Block Groups'
shape_group_name = 'census_block_group'
input_shape_statefips_name = 'STATE_FIPS'
input_shape_countyfips_name = 'STCOFIPS'
out_location = 'C:\\Users\\alex\\Documents\\code\\maup_income_inequality\\maup_inequality\\outputs\\'
out_filename = "rti_income_grouped_by_" + shape_group_name + ".csv"

## Calculate GINI
incomelist = pandas.read_csv(filepath_or_buffer=out_location+out_filename,
                             header=0,
                             index_col=False,
                             usecols=['hh_income', input_shape_countyfips_name],
                             engine='c')

# drop rows with NA - if there's no data for a census block, we will ignore it
incomelist = incomelist.dropna()

print("Calcuating Gini...")
# calculate mean income of each census block
mean_income = incomelist.groupby(input_shape_countyfips_name).mean()
mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
#mean_incomes = mean_income['hh_income']

g = gini(mean_incomes.to_numpy())
print("Gini is {0}".format(g))

In [None]:
in_filename = 'C:\\Users\\alex\\Documents\\code\\maup_income_inequality\\maup_inequality\\outputs\\rti_income_grouped_by_block.csv'

incomelist = pandas.read_csv(filepath_or_buffer=in_filename,
                             header=0,
                             index_col=False,
                             usecols=['hh_income', 'GEOID', 'COUNTY', 'TRACT', 'BLKGRP', 'BLOCK'],
                             engine='c')

# drop rows with NA - if there's no data for a census block, we will ignore it
incomelist = incomelist.dropna()

# calculate mean income of each census block
mean_income = incomelist.groupby(['COUNTY', 'TRACT', 'BLKGRP', 'BLOCK']).mean()
mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
mean_incomes = mean_income['hh_income']
#print(mean_income.head)
g = gini(mean_incomes.to_numpy())
print("Block Gini is {0}".format(g))


# calculate mean income of each census block group
mean_income = incomelist.groupby(['COUNTY', 'TRACT', 'BLKGRP']).mean()
mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
mean_incomes = mean_income['hh_income']
g = gini(mean_incomes.to_numpy())
print("Block Group Gini is {0}".format(g))

# calculate mean income of each census tract
mean_income = incomelist.groupby(['COUNTY', 'TRACT']).mean()
mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
mean_incomes = mean_income['hh_income']
g = gini(mean_incomes.to_numpy())
print("Tract Gini is {0}".format(g))

# calculate mean income of each county
mean_income = incomelist.groupby(['COUNTY']).mean()
mean_income.hh_income = mean_income.hh_income.astype(numpy.longlong)
mean_incomes = mean_income['hh_income']
g = gini(mean_incomes.to_numpy())
print("County Gini is {0}".format(g))