# Data Gatthering and Geoenabling

In [2]:
# Import the requisite packages
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from census import Census 
from us import states 

In [3]:
from helpers import CENSUS_KEY

In [4]:
# Set API key
c = Census(CENSUS_KEY)

In [36]:
# Grabbing a few variables of interest pertaining to the economic vitality of an area
fields = [
    "NAME",
    "B01003_001E", #"Total Population"
    "B25077_001E", #"Median value of owner occupied units"
    "B25026_001E", #"Total population in occupied housing units"
    "B25008_002E", #"Total number of owner occupied units"
    "B25008_003E", #"Total number of renter occupied units"
    "B06009_002E", #"Population with less than a high school diploma"
    "B06009_003E", #"Population with high school diploma or equivalent"
    "B06009_004E", #"Population with some college/associates degree"
    "B06009_005E", #"Population with bachelors degree"
    "B06009_006E", #"Population with a graduate degree"
    "B01002_001E", #"Median age"
    "B06010_004E", #"Population with income less than 9999"
    "B06010_005E", #"Population with income between 10000 and 14999"
    "B06010_006E", #"Population with income between 15000 and 24999"
    "B06010_007E", #"Population with income between 25000 and 34999"
    "B06010_008E", #"Population with income between 35000 and 49999"
    "B06010_009E", #"Population with income between 50000 and 64999"
    "B06010_010E", #"Population with income between 65000 and 74999"
    "B06010_011E", #"Population with income of 75000 or more"
    "B28007_009E", #"Population in labor force and unemployed"
    "B19059_002E", #"Population that is retired with retirement income"
    "B19059_003E", #"Retired without retirement income"
    "B08013_001E", #"Travel time to work in minutes"
    "B17013_002E", #"Population with income below poverty level in past 12 months"
    "C02003_001E", # Detailed Race
    "B99021_001E", # Allocation of Race 
    "B02001_001E" # Race
        ]

>2022 Data

In [62]:
# Sources: https://api.census.gov/data/2019/acs/acs5/variables.html; https://pypi.org/project/census/
nm_census_2022 = c.acs5.state_county_tract(
    fields = fields,
    state_fips = states.NM.fips,
    county_fips = "*",
    tract = "*",
    year = 2022
    )

nm_df_2022 = pd.DataFrame(nm_census_2022)
nm_df_2022.shape

(612, 31)

In [63]:
# Sources: https://api.census.gov/data/2019/acs/acs5/variables.html; https://pypi.org/project/census/
nm_census_2019 = c.acs5.state_county_tract(
    fields = fields,
    state_fips = states.NM.fips,
    county_fips = "*",
    tract = "*",
    year = 2019
    )

nm_df_2019 = pd.DataFrame(nm_census_2019)
nm_df_2019.shape

(499, 31)

In [64]:
# Sources: https://api.census.gov/data/2019/acs/acs5/variables.html; https://pypi.org/project/census/
nm_census_2016 = c.acs5.state_county_tract(
    fields = [x for x in fields if x != 'B28007_009E'],
    state_fips = states.NM.fips,
    county_fips = "*",
    tract = "*",
    year = 2016
    )

nm_df_2016 = pd.DataFrame(nm_census_2016)
nm_df_2016.shape

(499, 30)

> NM fips: 35
> EPSG: 2258

In [65]:
nm_tract_2022 = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2022/TRACT/tl_2022_35_tract.zip")
nm_tract_2022 = nm_tract_2022.to_crs(epsg = 2258)
nm_tract_2022.shape

(612, 13)

In [66]:
nm_tract_2019 = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2019/TRACT/tl_2019_35_tract.zip")
nm_tract_2019 = nm_tract_2019.to_crs(epsg = 2258)
nm_tract_2019.shape

(499, 13)

In [67]:
nm_tract_2016 = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2016/TRACT/tl_2016_35_tract.zip")
nm_tract_2016 = nm_tract_2016.to_crs(epsg = 2258)
nm_tract_2016.shape

(499, 13)

>Create GEOID variable to merge with census data

In [68]:
nm_df_2022['GEOID'] = nm_df_2022.state + nm_df_2022.county + nm_df_2022.tract
nm_df_2019['GEOID'] = nm_df_2019.state + nm_df_2019.county + nm_df_2019.tract
nm_df_2016['GEOID'] = nm_df_2016.state + nm_df_2016.county + nm_df_2016.tract

In [69]:
nm_df_2022 = nm_df_2022.drop(columns=['state', 'county', 'tract'])
nm_df_2019 = nm_df_2019.drop(columns=['state', 'county', 'tract'])
nm_df_2016 = nm_df_2016.drop(columns=['state', 'county', 'tract'])

In [70]:
nm_merge_2022 = nm_tract_2022.merge(nm_df_2022, on='GEOID')
nm_merge_2019 = nm_tract_2019.merge(nm_df_2019, on='GEOID')
nm_merge_2016 = nm_tract_2016.merge(nm_df_2016, on='GEOID')
print(f'{nm_merge_2022.shape=}\n{nm_merge_2019.shape=}\n{nm_merge_2016.shape=}')

nm_merge_2022.shape=(612, 41)
nm_merge_2019.shape=(499, 41)
nm_merge_2016.shape=(499, 40)


In [71]:
# Renaming variables in the data set
for df in (nm_merge_2022, nm_merge_2019, nm_merge_2016):
    df.rename(columns={
    "B01003_001E":"TotPop", #"Total Population"
    "B25077_001E":"MedVal_OwnOccUnit", #"Median value of owner occupied units"
    "B25026_001E":"TotPopOccUnits", #"Total population in occupied housing units"
    "B25008_002E":"TotNumOwnOccUnit", #"Total number of owner occupied units"
    "B25008_003E":"TotNumRentOccUnit", #"Total number of renter occupied units"
    "B06009_002E":"PopLTHSDip", #"Population with less than a high school diploma"
    "B06009_003E":"PopHSDip", #"Population with high school diploma or equivalent"
    "B06009_004E":"PopAssoc", #"Population with some college/associates degree"
    "B06009_005E":"PopBA", #"Population with bachelors degree"
    "B06009_006E":"PopGrad", #"Population with a graduate degree"
    "B01002_001E":"MedAge", #"Median age"
    "B06010_004E":"PopIncLT10", #"Population with income less than 9999"
    "B06010_005E":"PopInc1015", #"Population with income between 10000 and 14999"
    "B06010_006E":"PopInc1525", #"Population with income between 15000 and 24999"
    "B06010_007E":"PopInc2535", #"Population with income between 25000 and 34999"
    "B06010_008E":"PopInc3550", #"Population with income between 35000 and 49999"
    "B06010_009E":"PopInc5065", #"Population with income between 50000 and 64999"
    "B06010_010E":"PopInc6575", #"Population with income between 65000 and 74999"
    "B06010_011E":"PopIncGT75", #"Population with income of 75000 or more"
    "B28007_009E":"UnempPop", #"Population in labor force and unemployed"
    "B19059_002E":"RetPop", #"Population that is retired with retirement income"
    "B19059_003E":"RetPopNoRetInc", #"Retired without retirement income"
    "B08013_001E":"TrvTimWrk", #"Travel time to work in minutes"
    "B17013_002E":"PopBlwPovLvl", #"Population with income below poverty level in past 12 months"
    "C02003_001E": "RaceDet", 
    "B99021_001E": "RaceAlloc", # Allocation of Race 
    "B02001_001E": "Race" # Race
}
                , inplace=True
               )

>Create a subset of variables of interest

In [72]:
var_list = [
"TotPop", #"Total Population"
"TotPopOccUnits", #"Total population in occupied housing units"
"TotNumOwnOccUnit", #"Total number of owner occupied units"
"TotNumRentOccUnit", #"Total number of renter occupied units"
"PopLTHSDip", #"Population with less than a high school diploma"
"PopHSDip", #"Population with high school diploma or equivalent"
"PopAssoc", #"Population with some college/associates degree"
"PopBA", #"Population with bachelors degree"
"PopGrad", #"Population with a graduate degree"
"PopIncLT10", #"Population with income less than 9999"
"PopInc1015", #"Population with income between 10000 and 14999"
"PopInc1525", #"Population with income between 15000 and 24999"
"PopInc2535", #"Population with income between 25000 and 34999"
"PopInc3550", #"Population with income between 35000 and 49999"
"PopInc5065", #"Population with income between 50000 and 64999"
"PopInc6575", #"Population with income between 65000 and 74999"
"PopIncGT75", #"Population with income of 75000 or more"
"UnempPop", #"Population in labor force and unemployed"
"RetPop", #"Population that is retired with retirement income"
"RetPopNoRetInc", #"Retired without retirement income"
"PopBlwPovLvl", #"Population with income below poverty level in past 12 months"
"MedVal_OwnOccUnit", #"Median value of owner occupied units"
"MedAge", #"Median age"
"TrvTimWrk", #"Travel time to work in minutes"
"RaceDet", 
"RaceAlloc", # Allocation of Race 
"Race" # Race
]

In [73]:
var_list.append('geometry')

In [74]:
[x for x in var_list if x != "UnempPop"]

['TotPop',
 'TotPopOccUnits',
 'TotNumOwnOccUnit',
 'TotNumRentOccUnit',
 'PopLTHSDip',
 'PopHSDip',
 'PopAssoc',
 'PopBA',
 'PopGrad',
 'PopIncLT10',
 'PopInc1015',
 'PopInc1525',
 'PopInc2535',
 'PopInc3550',
 'PopInc5065',
 'PopInc6575',
 'PopIncGT75',
 'RetPop',
 'RetPopNoRetInc',
 'PopBlwPovLvl',
 'MedVal_OwnOccUnit',
 'MedAge',
 'TrvTimWrk',
 'RaceDet',
 'RaceAlloc',
 'Race',
 'geometry']

In [75]:
nm_merge2_2022 = nm_merge_2022[var_list]
nm_merge2_2019 = nm_merge_2019[var_list]
nm_merge2_2016 = nm_merge_2016[[x for x in var_list if x != "UnempPop"]]

In [76]:
var_list.remove('geometry')

>Remove areas with no population

In [78]:
nm_merged_2022 = nm_merge2_2022[nm_merge2_2022.TotPop > 0]
nm_merged_2022.reset_index(inplace=True)
nm_merged_2022.shape

(604, 29)

In [79]:
nm_merged_2019 = nm_merge2_2019[nm_merge2_2019.TotPop > 0]
nm_merged_2019.reset_index(inplace=True)
nm_merged_2019.shape

(498, 29)

In [80]:
nm_merged_2016 = nm_merge2_2016[nm_merge2_2016.TotPop > 0]
nm_merged_2016.reset_index(inplace=True)
nm_merged_2016.shape

(498, 28)

In [81]:
nm_merged_2022.to_file('../../data/local/nm_census_2022.gpkg')
nm_merged_2019.to_file('../../data/local/nm_census_2019.gpkg')
nm_merged_2016.to_file('../../data/local/nm_census_2016.gpkg')

In [82]:
nm_tract_2022.to_file('../../data/local/nm_tract_2022.gpkg')
nm_tract_2019.to_file('../../data/local/nm_tract_2019.gpkg')
nm_tract_2016.to_file('../../data/local/nm_tract_2016.gpkg')