# CENSUS DATA: READING AND PRELIMINARY CLEANING
### This notebook reads the census files, selects columns, and performs preliminary cleaning



### NOTE: the following files must be downloaded from the US CENSUS BUREAU before running this notebook
### ACSDP5Y2021.DP05-Data.csv

In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [9]:
# read csv files from ACS 



# 2021: ACS 5-Year Estimates - Data Profile
DP05_ALL = pd.read_csv('./ACSDP5Y2021.DP05-Data.csv', encoding='latin-1', low_memory=False)

# S1701: POVERTY STATUS IN THE PAST 12 MONTHS - 2021: ACS 5-Year Estimates Subject Tables
S1701_ALL = pd.read_csv('./ACSST5Y2021.S1701-Data.csv', encoding='latin-1', low_memory=False)

# 2021: ACS 5-Year Estimates - Data Profile
DP03_ALL = pd.read_csv('./ACSDP5Y2021.DP03-Data.csv', encoding='latin-1', low_memory=False)

In [11]:
# Tracts are identified by FIPS: State (2) + County (3) + Tract (6)

# EPA data: 'ID' = 11 digit integer (FIPS number)
# ACS data: 'GEO_ID' - stored as string and has a 9 character prefix

# function converts ACS GEO_ID string to EPA int64 ID
def convert_ID(text):
    return int(text[9:])

In [4]:
# cols_DP05 = np.array(DP05_ALL.columns)
# cols_S1701 = np.array(S1701_ALL.columns)
# cols_DP03 = np.array(DP03_ALL.columns)

In [11]:
# #  select only the 'Estimate' columns

# # define function to check sub-string at str_index
# def is_Estimate(text):
#     return text[str_index:]=='E'

# cols = cols_DP05
# str_index = 9
# cols_DP05_data_only = cols[list(map(is_Estimate, cols))]

# cols = cols_S1701
# str_index = 13
# cols_S1701_data_only = cols[list(map(is_Estimate, cols))]

# cols = cols_DP03
# str_index = 9
# cols_DP03_data_only = cols[list(map(is_Estimate, cols))]

## RACE DATA

In [10]:
# select race data from DP05

cols_acs_race = [
'GEO_ID',
'NAME', 
'DP05_0033E', # ['RACE', 'Total population']
'DP05_0037E', # ['RACE', 'Total population', 'One race', 'White']
'DP05_0038E', # ['RACE', 'Total population', 'One race', 'Black or African American']
'DP05_0039E', # ['RACE', 'Total population', 'One race', 'American Indian and Alaska Native']
'DP05_0044E', # ['RACE', 'Total population', 'One race', 'Asian']
'DP05_0052E', # ['RACE', 'Total population', 'One race', 'Native Hawaiian and Other Pacific Islander']
'DP05_0064E', # ['Race alone or in combination with one or more other races', 'Total population', 'White']
'DP05_0065E', # ['Race alone or in combination with one or more other races', 'Total population', 'Black or African American']
'DP05_0066E', # ['Race alone or in combination with one or more other races', 'Total population', 'American Indian and Alaska Native']
'DP05_0067E', # ['Race alone or in combination with one or more other races', 'Total population', 'Asian']
'DP05_0068E', # ['Race alone or in combination with one or more other races', 'Total population', 'Native Hawaiia']
'DP05_0071E', # ['HISPANIC OR LATINO AND RACE', 'Total population', 'Hispanic or Latino (of any race)']
]

# census dataframe 
acs_race = DP05_ALL[cols_acs_race]
# store column name definitions from row 0
acs_race_define = acs_race[acs_race.index.isin([0])]
# drop definition row from data
acs_race = acs_race.drop(0,axis=0)

acs_race.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 1 to 85395
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   GEO_ID      85395 non-null  object
 1   NAME        85395 non-null  object
 2   DP05_0033E  85395 non-null  object
 3   DP05_0037E  85395 non-null  object
 4   DP05_0038E  85395 non-null  object
 5   DP05_0039E  85395 non-null  object
 6   DP05_0044E  85395 non-null  object
 7   DP05_0052E  85395 non-null  object
 8   DP05_0064E  85395 non-null  object
 9   DP05_0065E  85395 non-null  object
 10  DP05_0066E  85395 non-null  object
 11  DP05_0067E  85395 non-null  object
 12  DP05_0068E  85395 non-null  object
 13  DP05_0071E  85395 non-null  object
dtypes: object(14)
memory usage: 9.1+ MB


In [17]:
# Cleaning race data: convert datatypes, rename columns

clean_acs_race = acs_race

# replace missing values represented by '-' with None
clean_acs_race = clean_acs_race.replace('-', None)

# define new column names
new_col_names = {
'GEO_ID' : 'ID',
'DP05_0033E' : 'pop_tot',
'DP05_0037E' : 'pop_white_only',
'DP05_0038E' : 'pop_black_only',
'DP05_0039E' : 'pop_amInd_alNat_only',
'DP05_0044E' : 'pop_asian_only',
'DP05_0052E' : 'pop_natHI_PI_only',
'DP05_0064E' : 'pop_white',
'DP05_0065E' : 'pop_black',
'DP05_0066E' : 'pop_amInd_alNat',
'DP05_0067E' : 'pop_asian',
'DP05_0068E' : 'pop_natHI_PI',
'DP05_0071E' : 'pop_hispanic_latino_any'
}

# cast numeric data stored as strings to float
num_data = cols_acs_race[2:]
clean_acs_race[num_data] = clean_acs_race[num_data].astype(float)
# convert GEO_ID
clean_acs_race['GEO_ID'] = clean_acs_race['GEO_ID'].apply(convert_ID)
# rename columns
clean_acs_race = clean_acs_race.rename(columns=new_col_names)

clean_acs_race.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 1 to 85395
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       85395 non-null  int64  
 1   NAME                     85395 non-null  object 
 2   pop_tot                  85395 non-null  float64
 3   pop_white_only           85395 non-null  float64
 4   pop_black_only           85395 non-null  float64
 5   pop_amInd_alNat_only     85395 non-null  float64
 6   pop_asian_only           85395 non-null  float64
 7   pop_natHI_PI_only        85395 non-null  float64
 8   pop_white                85395 non-null  float64
 9   pop_black                85395 non-null  float64
 10  pop_amInd_alNat          85395 non-null  float64
 11  pop_asian                85395 non-null  float64
 12  pop_natHI_PI             85395 non-null  float64
 13  pop_hispanic_latino_any  85395 non-null  float64
dtypes: float64(12), int64(

In [13]:
# for col in cols_S1701_data_only : print('\'{}\', #'.format(col), S1701_ALL[col][0].split("!!")[1], S1701_ALL[col][0].split("!!")[3:])

## POVERTY DATA

In [15]:
# Select poverty data from S1701

cols_poverty = [
'GEO_ID',
# number of people for which poverty status was determined in the tract
'S1701_C01_001E', # Total []
'S1701_C01_013E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'White alone']
'S1701_C01_014E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'Black or African American alone']
'S1701_C01_015E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'American Indian and Alaska Native alone']
'S1701_C01_016E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'Asian alone']
'S1701_C01_017E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'Native Hawaiian and Other Pacific Islander alone']
'S1701_C01_020E', # Total ['RACE AND HISPANIC OR LATINO ORIGIN', 'Hispanic or Latino origin (of any race)']
# number of people below poverty level
'S1701_C02_001E', # Below poverty level []
'S1701_C02_013E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'White alone']
'S1701_C02_014E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Black or African American alone']
'S1701_C02_015E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'American Indian and Alaska Native alone']
'S1701_C02_016E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Asian alone']
'S1701_C02_017E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Native Hawaiian and Other Pacific Islander alone']
'S1701_C02_020E', # Below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Hispanic or Latino origin (of any race)']
# percent of people below poverty level
'S1701_C03_001E', # Percent below poverty level []
'S1701_C03_013E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'White alone']
'S1701_C03_014E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Black or African American alone']
'S1701_C03_015E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'American Indian and Alaska Native alone']
'S1701_C03_016E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Asian alone']
'S1701_C03_017E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Native Hawaiian and Other Pacific Islander alone']
'S1701_C03_020E', # Percent below poverty level ['RACE AND HISPANIC OR LATINO ORIGIN', 'Hispanic or Latino origin (of any race)']
]

df = S1701_ALL[cols_poverty]
acs_poverty_header = df[df.index.isin([0])]
acs_poverty = df.drop(0,axis=0)

acs_poverty.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 1 to 85395
Data columns (total 22 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   GEO_ID          85395 non-null  object
 1   S1701_C01_001E  85395 non-null  object
 2   S1701_C01_013E  85395 non-null  object
 3   S1701_C01_014E  85395 non-null  object
 4   S1701_C01_015E  85395 non-null  object
 5   S1701_C01_016E  85395 non-null  object
 6   S1701_C01_017E  85395 non-null  object
 7   S1701_C01_020E  85395 non-null  object
 8   S1701_C02_001E  85395 non-null  object
 9   S1701_C02_013E  85395 non-null  object
 10  S1701_C02_014E  85395 non-null  object
 11  S1701_C02_015E  85395 non-null  object
 12  S1701_C02_016E  85395 non-null  object
 13  S1701_C02_017E  85395 non-null  object
 14  S1701_C02_020E  85395 non-null  object
 15  S1701_C03_001E  85395 non-null  object
 16  S1701_C03_013E  85395 non-null  object
 17  S1701_C03_014E  85395 non-null  object
 18  S1701_

In [36]:
# df = acs_poverty_by_race_header
# cols = cols_poverty[1:]

# for col in cols : 
#     print('{} :'.format(col), df[col][0].split("!!")[1:3])
#     print('\t\t', df[col][0].split("!!")[3:])

In [18]:
# Cleaning poverty data: convert GEO_ID, datatypes, rename columns

clean_acs_poverty = acs_poverty

# replace missing values '-' with null
clean_acs_poverty  = clean_acs_poverty.replace('-', None)

# define new col names
new_col_names = {
'GEO_ID' : 'ID',
'S1701_C01_001E' : 'poverty_tot',
'S1701_C01_013E' : 'poverty_tot_white_only',
'S1701_C01_014E' : 'poverty_tot_black_only',
'S1701_C01_015E' : 'poverty_tot_amInd_alNat_only',
'S1701_C01_016E' : 'poverty_tot_asian_only',
'S1701_C01_017E' : 'poverty_tot_natHI_PI_only',
'S1701_C01_020E' : 'poverty_tot_hispanic_latino_any',
'S1701_C02_001E' : 'poverty_below',
'S1701_C02_013E' : 'poverty_below_white_only',
'S1701_C02_014E' : 'poverty_below_black_only',
'S1701_C02_015E' : 'poverty_below_amInd_alNat_only',
'S1701_C02_016E' : 'poverty_below_asian_only',
'S1701_C02_017E' : 'poverty_below_natHI_PI_only',
'S1701_C02_020E' : 'poverty_below_hispanic_latino_any',
'S1701_C03_001E' : 'poverty_perc',
'S1701_C03_013E' : 'poverty_perc_white_only',
'S1701_C03_014E' : 'poverty_perc_black_only',
'S1701_C03_015E' : 'poverty_perc_amInd_alNat_only',
'S1701_C03_016E' : 'poverty_perc_asian_only',
'S1701_C03_017E' : 'poverty_perc_natHI_PI_only',
'S1701_C03_020E' : 'poverty_perc_hispanic_latino_any'}

# cast numeric data  stored as string to float
num_data = cols_poverty[1:]
clean_acs_poverty[num_data] = clean_acs_poverty[num_data].astype(float)
# convert GEO_ID
clean_acs_poverty['GEO_ID'] = clean_acs_poverty['GEO_ID'].apply(convert_ID)
# rename columns
clean_acs_poverty = clean_acs_poverty.rename(columns=new_col_names)

clean_acs_poverty.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 1 to 85395
Data columns (total 22 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   ID                                 85395 non-null  int64  
 1   poverty_tot                        85395 non-null  float64
 2   poverty_tot_white_only             85395 non-null  float64
 3   poverty_tot_black_only             85395 non-null  float64
 4   poverty_tot_amInd_alNat_only       85395 non-null  float64
 5   poverty_tot_asian_only             85395 non-null  float64
 6   poverty_tot_natHI_PI_only          85395 non-null  float64
 7   poverty_tot_hispanic_latino_any    85395 non-null  float64
 8   poverty_below                      85395 non-null  float64
 9   poverty_below_white_only           85395 non-null  float64
 10  poverty_below_black_only           85395 non-null  float64
 11  poverty_below_amInd_alNat_only     85395 non-null  flo

## ECONOMIC DATA

In [19]:
# Select economic data from DP03

cols_economic = [
'GEO_ID', # []
'DP03_0032E', # ['INDUSTRY', 'Civilian employed population 16 years and over']
'DP03_0033E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Agriculture, forestry, fishing and hunting, and mining']
'DP03_0034E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Construction']
'DP03_0035E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Manufacturing']
'DP03_0036E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Wholesale trade']
'DP03_0037E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Retail trade']
'DP03_0038E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Transportation and warehousing, and utilities']
'DP03_0039E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Information']
'DP03_0040E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Finance and insurance, and real estate and rental and leasing']
'DP03_0041E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Professional, scientific, and management, and administrative and waste management services']
'DP03_0042E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Educational services, and health care and social assistance']
'DP03_0043E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Arts, entertainment, and recreation, and accommodation and food services']
'DP03_0044E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Other services, except public administration']
'DP03_0045E', # ['INDUSTRY', 'Civilian employed population 16 years and over', 'Public administration']
'DP03_0051E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households']
'DP03_0052E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', 'Less than $10,000']
'DP03_0053E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$10,000 to $14,999']
'DP03_0054E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$15,000 to $24,999']
'DP03_0055E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$25,000 to $34,999']
'DP03_0056E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$35,000 to $49,999']
'DP03_0057E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$50,000 to $74,999']
'DP03_0058E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$75,000 to $99,999']
'DP03_0059E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$100,000 to $149,999']
'DP03_0060E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$150,000 to $199,999']
'DP03_0061E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', '$200,000 or more']
'DP03_0062E', # ['INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)', 'Total households', 'Median household income (dollars)']
'DP03_0095E', # ['HEALTH INSURANCE COVERAGE', 'Civilian noninstitutionalized population']
'DP03_0096E', # ['HEALTH INSURANCE COVERAGE', 'Civilian noninstitutionalized population', 'With health insurance coverage']
'DP03_0099E', # ['HEALTH INSURANCE COVERAGE', 'Civilian noninstitutionalized population', 'No health insurance coverage']
]

df = DP03_ALL[cols_economic]
acs_economic_header = df[df.index.isin([0])]
acs_economic = df.drop(0,axis=0)

# for col in cols_economic: print('\'{}\', #'.format(col), df[col][0].split("!!")[1:])
# acs_economic.info()

In [20]:
# Cleaning economic data: convert GEO_ID, datatypes, rename columns

clean_acs_economic = acs_economic

# replace missing values '-' with null
clean_acs_economic = clean_acs_economic.replace('-', None)
clean_acs_economic = clean_acs_economic.replace('250,000+', '250000')
clean_acs_economic = clean_acs_economic.replace('2,500-','2500')

# define new column names
new_col_names = {'GEO_ID' : 'ID',
'DP03_0009E' : 'unemployment_rate',
'DP03_0032E' : 'industry_tot', 
'DP03_0033E' : 'industry_ag_for',  
'DP03_0034E' : 'industry_const', 
'DP03_0035E' : 'industry_manu', 
'DP03_0036E' : 'industry_wtrade', 
'DP03_0037E' : 'industry_rtrade', 
'DP03_0038E' : 'industry_transp', 
'DP03_0039E' : 'industry_info', 
'DP03_0040E' : 'industry_fin', 
'DP03_0041E' : 'industry_prof', 
'DP03_0042E' : 'industry_ed_hc',
'DP03_0043E' : 'industry_arts_ent',
'DP03_0044E' : 'industry_other', 
'DP03_0045E' : 'industry_pub_admin',
'DP03_0051E' : 'income_tot',
'DP03_0052E' : 'income_under10k',
'DP03_0053E' : 'income_10to15k',
'DP03_0054E' : 'income_15to25k',
'DP03_0055E' : 'income_25to35k',
'DP03_0056E' : 'income_35to50k',
'DP03_0057E' : 'income_50to75k',
'DP03_0058E' : 'income_75to100k',
'DP03_0059E' : 'income_100to150k',
'DP03_0060E' : 'income_150to200k',
'DP03_0061E' : 'income_over200k',
'DP03_0062E' : 'income_median', 
'DP03_0095E' : 'health_ins_tot',
'DP03_0096E' : 'health_ins_yes',
'DP03_0099E' : 'health_ins_no',
}

# cast numeric data to float
num_data = cols_economic[1:]
clean_acs_economic[num_data] = clean_acs_economic[num_data].astype(float)
# convert GEO_ID
clean_acs_economic['GEO_ID'] = clean_acs_economic['GEO_ID'].apply(convert_ID)
# rename columns
clean_acs_economic = clean_acs_economic.rename(columns=new_col_names)

clean_acs_economic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 1 to 85395
Data columns (total 30 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  85395 non-null  int64  
 1   industry_tot        85395 non-null  float64
 2   industry_ag_for     85395 non-null  float64
 3   industry_const      85395 non-null  float64
 4   industry_manu       85395 non-null  float64
 5   industry_wtrade     85395 non-null  float64
 6   industry_rtrade     85395 non-null  float64
 7   industry_transp     85395 non-null  float64
 8   industry_info       85395 non-null  float64
 9   industry_fin        85395 non-null  float64
 10  industry_prof       85395 non-null  float64
 11  industry_ed_hc      85395 non-null  float64
 12  industry_arts_ent   85395 non-null  float64
 13  industry_other      85395 non-null  float64
 14  industry_pub_admin  85395 non-null  float64
 15  income_tot          85395 non-null  float64
 16  inco

## Merge

In [21]:
merged = pd.merge(clean_acs_race, clean_acs_poverty, how='inner', on='ID')
merged = pd.merge(merged, clean_acs_economic, how='inner', on='ID')
merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 0 to 85394
Data columns (total 64 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   ID                                 85395 non-null  int64  
 1   NAME                               85395 non-null  object 
 2   pop_tot                            85395 non-null  float64
 3   pop_white_only                     85395 non-null  float64
 4   pop_black_only                     85395 non-null  float64
 5   pop_amInd_alNat_only               85395 non-null  float64
 6   pop_asian_only                     85395 non-null  float64
 7   pop_natHI_PI_only                  85395 non-null  float64
 8   pop_white                          85395 non-null  float64
 9   pop_black                          85395 non-null  float64
 10  pop_amInd_alNat                    85395 non-null  float64
 11  pop_asian                          85395 non-null  flo

In [27]:
merged.to_csv("./acs_all_tracts_cleaned_new-dec1.csv", index=False)

In [29]:
# new version
new = pd.read_csv('./acs_all_tracts_cleaned.csv', encoding='latin-1', low_memory=False)
# old version
old = pd.read_csv('./acs_all_tracts_cleaned_original_copy_01-12-2023.csv', encoding='latin-1', low_memory=False)

In [31]:
new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 0 to 85394
Data columns (total 64 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   ID                                 85395 non-null  int64  
 1   NAME                               85395 non-null  object 
 2   pop_tot                            85395 non-null  float64
 3   pop_white_only                     85395 non-null  float64
 4   pop_black_only                     85395 non-null  float64
 5   pop_amInd_alNat_only               85395 non-null  float64
 6   pop_asian_only                     85395 non-null  float64
 7   pop_natHI_PI_only                  85395 non-null  float64
 8   pop_white                          85395 non-null  float64
 9   pop_black                          85395 non-null  float64
 10  pop_amInd_alNat                    85395 non-null  float64
 11  pop_asian                          85395 non-null  flo

In [32]:
old.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85395 entries, 0 to 85394
Data columns (total 61 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   ID                                 85395 non-null  int64  
 1   NAME                               85395 non-null  object 
 2   pop_tot                            85395 non-null  float64
 3   pop_white_only                     85395 non-null  float64
 4   pop_black_only                     85395 non-null  float64
 5   pop_amInd_alNat_only               85395 non-null  float64
 6   pop_asian_only                     85395 non-null  float64
 7   pop_natHI_PI_only                  85395 non-null  float64
 8   pop_white                          85395 non-null  float64
 9   pop_black                          85395 non-null  float64
 10  pop_amInd_alNat                    85395 non-null  float64
 11  pop_asian                          85395 non-null  flo