In [1]:
import os
import re
import random
import string
import requests

import numpy as np
import pandas as pd
import geopandas as gpd

import arcpy
import arcpy.sa as sa
import arcpy.mp as mp
arcpy.CheckOutExtension("Spatial")

import time

import shutil

import jpl_spatial

Johann's spatial functions loaded successfully!


#### Define a function to clean text to make appropriate table names:

In [2]:
def func_clean_table_name(filename):
    # Replace forbidden characters with underscores
    cleaned_name = re.sub(r'[^a-zA-Z0-9_]', '_', os.path.splitext(filename)[0])
    # Ensure the name doesn't start with a number
    if cleaned_name[0].isdigit():
        cleaned_name = f"_{cleaned_name}"
    return cleaned_name

#### Get the path of this notebook and use it to generate path for input shapefile:

In [3]:
current_dir = os.getcwd()
shapefile_cbg = r"input\zones\cbg_kontur.shp"
shapefile_cbg_path = os.path.join(current_dir, shapefile_cbg)
output_folder = os.path.join(current_dir, "output")

print(f"Location of this notebook: {current_dir}")
print(f"Location of CBG SHP: {shapefile_cbg_path}")
print(f"Location for output: {output_folder}")

Location of this notebook: C:\github\CCSVI\Scripts\Combined
Location of CBG SHP: C:\github\CCSVI\Scripts\Combined\input\zones\cbg_kontur.shp
Location for output: C:\github\CCSVI\Scripts\Combined\output


#### Define output folder and temp folder and CREATE if don't exist. Then clean it in case it has files already:

In [4]:
os.makedirs(output_folder, exist_ok=True)
jpl_spatial.func_clean_folder(output_folder)
temp_folder = os.path.join(output_folder, "temp")
os.makedirs(temp_folder, exist_ok=True)
jpl_spatial.func_clean_folder(temp_folder)

#### The following lines run the zonal statistics function for each raster independently.
#### The outputs are all generated as DBF files with names originating from the rasters.
#### As many of these lines can be added as necessary, just change the inputs for the appropriate raster and statistic:

In [5]:
jpl_spatial.func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\staterf_inann.tif", output_folder, "MEAN")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\staterf_inann_mean.dbf'

In [6]:
jpl_spatial.func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\igtn_prob_test.tif", output_folder, "MAXIMUM")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\igtn_prob_test_maximum.dbf'

In [7]:
jpl_spatial.func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\n10_landslide_susc.tif", output_folder, "MAXIMUM")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\n10_landslide_susc_maximum.dbf'

In [8]:
jpl_spatial.func_zonal_stats(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\cat4_mom_slosh_hightide.tif", output_folder, "MAXIMUM")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\cat4_mom_slosh_hightide_maximum.dbf'

In [9]:
jpl_spatial.func_vector_acres(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\slrxa_3pt2ft.shp", output_folder)

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\slrxa_3pt2ft_acres.shp'

In [10]:
jpl_spatial.func_vector_match(shapefile_cbg_path, "GEOIDFQ", r"input\environmental\FEMA_SFHA.shp", output_folder, "INTERSECT")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\FEMA_SFHA_intersect.dbf'

In [11]:
jpl_spatial.func_spatialcount(shapefile_cbg_path, "GEOIDFQ", r"input\facilities\emergency_shelters.shp", output_folder, "0.0 miles")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\emergency_shelters_count.shp'

In [12]:
jpl_spatial.func_spatialcount(shapefile_cbg_path, "GEOIDFQ", r"input\facilities\wastewater_plants.shp", output_folder, "0.0 miles")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\wastewater_plants_count.shp'

In [13]:
jpl_spatial.func_spatialcount(shapefile_cbg_path, "GEOIDFQ", r"input\facilities\police_stations.shp", output_folder, "0.0 miles")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\police_stations_count.shp'

In [14]:
jpl_spatial.func_spatialcount(shapefile_cbg_path, "GEOIDFQ", r"input\facilities\fire_stations.shp", output_folder, "0.0 miles")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\fire_stations_count.shp'

In [15]:
jpl_spatial.func_spatialcount(shapefile_cbg_path, "GEOIDFQ", r"input\infrastructure\onsite_sewage_disposal.shp", output_folder, "0.0 miles")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\temp\\onsite_sewage_disposal_count.shp'

#### Combine the analysis output tables into one big table:

In [16]:
jpl_spatial.func_combine_tables(temp_folder, output_folder, "GEOIDFQ", "combined_spatial_stats.csv")

'C:\\github\\CCSVI\\Scripts\\Combined\\output\\combined_spatial_stats.csv'

## The following section pulls Census data from the API:

#### Create variables for each part of the Census API URL. This makes it easy to alter the values later:

In [17]:
# Default values
census_base_url = "https://api.census.gov/"
census_dataset_url = "data/2022/acs/acs5"
decennial_dataset_url = "data/2020/dec/pl"
census_variables1 = "GEO_ID,NAME,B01001_001E,B25034_006E,B25034_007E,B25034_008E,B25034_009E,B25034_010E,B25034_011E,B25046_001E,B27010_017E,B27010_033E,B27010_050E,B27010_066E,B28002_013E,C16002_004E,C16002_007E,C16002_010E,C16002_013E,B09019_005E,B09019_008E,B19101_001E,C17002_002E,C17002_003E,C17002_004E,C17002_005E,C17002_006E,C17002_007E,B20005_002E,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_020E,B01001_021E"
census_variables2 = "GEO_ID,B05013_017E,B05013_018E,B05013_019E,B01001_017E,B01001_018E,B01001_019E,B01001_016E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B25008_003E"
decennial_variables3 = "GEO_ID,P5_002N,P5_003N,P5_004N,P5_005N,P5_008N,P5_009N,P5_010N"
# Geography is very important to structure correctly. Sometimes if it's incorrect, the data pull still works but
# will generate an error for higher counts of variables (like we're pulling here)
in_geography = r"&for=block%20group:*&in=state:15%20county:*%20tract:*"

In [18]:
census_combined_url = f"{census_base_url}{census_dataset_url}"
decennial_combined_url = f"{census_base_url}{decennial_dataset_url}"

cbg_url1 = f"{census_combined_url}?get={census_variables1}{in_geography}"
cbg_url2 = f"{census_combined_url}?get={census_variables2}{in_geography}"
cbg_url3 = f"{decennial_combined_url}?get={decennial_variables3}{in_geography}"

#### Create variables for the Census variable lookup API URLs.  
#### Variable lookup links:  
##### https://api.census.gov/data/2020/dec/pl/variables.html  
##### https://api.census.gov/data/2022/acs/acs5/variables.html  
##### https://api.census.gov/data/2022/acs/acs5/subject/variables.html    
#### Right now we just need the first 3 but may need others if some variables are added:

In [19]:
census_fields_url1 = f"{census_combined_url}/variables.json" #detailed variable lookup
census_fields_url2 = f"{census_combined_url}/subject/variables.json" #subject variable lookup with stats
census_fields_url3 = f"{decennial_combined_url}/variables.json"
#census_fields_url7 = f"{census_combined_url}/profile/variables.json" #data profiles lookup for demographic/econ data
#census_fields_url8 = f"{census_combined_url}/cprofile/variables.json" #comparison profiles lookup for 5yr to 5yr comps
#census_fields_url9 = f"{census_combined_url}/spt/variables.json" #supplemental estimates lookup

print("Click link to test URL for first set of variables: " + cbg_url1 + "\r")
print("Click link to test URL for second set of variables: " + cbg_url2 + "\r")
print("Click link to test URL for third set of variables (Decennial Census 2020): " + cbg_url3 + "\r")
print("Click link to test URL for the field/variable list with descriptions: " + census_fields_url1 + "\r")
print("Click link to test URL for the field/variable list with descriptions: " + census_fields_url2 + "\r")
print("Click link to test URL for the field/variable list with descriptions: " + census_fields_url3 + "\r")

Click link to test URL for first set of variables: https://api.census.gov/data/2022/acs/acs5?get=GEO_ID,NAME,B01001_001E,B25034_006E,B25034_007E,B25034_008E,B25034_009E,B25034_010E,B25034_011E,B25046_001E,B27010_017E,B27010_033E,B27010_050E,B27010_066E,B28002_013E,C16002_004E,C16002_007E,C16002_010E,C16002_013E,B09019_005E,B09019_008E,B19101_001E,C17002_002E,C17002_003E,C17002_004E,C17002_005E,C17002_006E,C17002_007E,B20005_002E,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_020E,B01001_021E&for=block%20group:*&in=state:15%20county:*%20tract:*
Click link to test URL for second set of variables: https://api.census.gov/data/2022/acs/acs5?get=GEO_ID,B05013_017E,B05013_018E,B05013_019E,B01001_017E,B01001_018E,B01001_019E,B01001_016E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B25008_003E&for=block%20group:*&in=state:15%20county:*%20tract:*
Click link to test URL for third

#### Pull multiple variable listings from Census API and store each in a dataframe:

In [20]:
fields_dict1 = requests.get(census_fields_url1).json()
fields_df1 = pd.DataFrame.from_dict(fields_dict1["variables"], orient="index").reset_index()
fields_df1.head()

Unnamed: 0,index,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required
0,for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,True,,,
1,in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,True,,,
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,True,True,,
3,B24022_060E,Estimate!!Total:!!Female:!!Service occupations...,Sex by Occupation and Median Earnings in the P...,int,B24022,0,,,"B24022_060EA,B24022_060M,B24022_060MA",
4,B19001B_014E,"Estimate!!Total:!!$100,000 to $124,999",Household Income in the Past 12 Months (in 202...,int,B19001B,0,,,"B19001B_014EA,B19001B_014M,B19001B_014MA",


In [21]:
fields_dict2 = requests.get(census_fields_url2).json()
fields_df2 = pd.DataFrame.from_dict(fields_dict2["variables"], orient="index").reset_index()
fields_df2.head()

Unnamed: 0,index,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required
0,for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,True,,,
1,in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,True,,,
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,True,True,,
3,S0804_C04_068E,Estimate!!Public transportation (excluding tax...,Means of Transportation to Work by Selected Ch...,float,S0804,0,,,"S0804_C04_068EA,S0804_C04_068M,S0804_C04_068MA",
4,S0503_C02_078E,Estimate!!Foreign born; Born in Europe!!Civili...,Selected Characteristics of the Foreign-Born P...,float,S0503,0,,,"S0503_C02_078EA,S0503_C02_078M,S0503_C02_078MA",


In [22]:
fields_dict3 = requests.get(census_fields_url3).json()
fields_df3 = pd.DataFrame.from_dict(fields_dict3["variables"], orient="index").reset_index()
fields_df3.head()

Unnamed: 0,index,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required
0,for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,True,,,
1,in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,True,,,
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,True,True,,
3,P4_003N,!!Total:!!Not Hispanic or Latino:,"HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO...",int,P4,0,,,P4_003NA,
4,P4_015N,!!Total:!!Not Hispanic or Latino:!!Population...,"HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO...",int,P4,0,,,P4_015NA,


In [23]:
fields_df = pd.concat([fields_df1, fields_df2, fields_df3], ignore_index=True)
print("Fields aka variables data have been concatenated")

Fields aka variables data have been concatenated


#### Pull CBG level data from Census API and store in dataframe (first set of variables):

In [24]:
cbg_response = requests.get(cbg_url1)
cbg_data = cbg_response.json()
cbg_df = pd.DataFrame(cbg_data[1:], columns=cbg_data[0])
cbg_df.head()

Unnamed: 0,GEO_ID,NAME,B01001_001E,B25034_006E,B25034_007E,B25034_008E,B25034_009E,B25034_010E,B25034_011E,B25046_001E,...,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_020E,B01001_021E,state,county,tract,block group
0,1500000US150010201001,Block Group 1; Census Tract 201; Hawaii County...,1462,82,294,87,17,5,9,813,...,37,39,76,46,15,24,15,1,20100,1
1,1500000US150010201002,Block Group 2; Census Tract 201; Hawaii County...,602,24,7,57,28,29,76,505,...,9,9,26,3,14,16,15,1,20100,2
2,1500000US150010201003,Block Group 3; Census Tract 201; Hawaii County...,1339,148,53,70,141,67,49,1399,...,14,14,36,32,12,27,15,1,20100,3
3,1500000US150010201004,Block Group 4; Census Tract 201; Hawaii County...,1096,32,48,56,45,24,144,789,...,8,13,62,37,19,13,15,1,20100,4
4,1500000US150010202021,Block Group 1; Census Tract 202.02; Hawaii Cou...,1279,43,31,24,35,22,72,561,...,0,122,9,9,3,0,15,1,20202,1


In [25]:
print(cbg_response.status_code)

200


#### Pull CBG level data from Census API and store in dataframe (second set of variables):

In [26]:
cbg_response2 = requests.get(cbg_url2)
cbg_data2 = cbg_response2.json()
cbg_df2 = pd.DataFrame(cbg_data2[1:], columns=cbg_data2[0])
cbg_df2.head()

Unnamed: 0,GEO_ID,B05013_017E,B05013_018E,B05013_019E,B01001_017E,B01001_018E,B01001_019E,B01001_016E,B01001_044E,B01001_045E,...,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B25008_003E,state,county,tract,block group
0,1500000US150010201001,,,,32,8,27,63,3,0,...,14,450,418,21,381,720,15,1,20100,1
1,1500000US150010201002,,,,33,10,38,9,5,9,...,0,125,28,69,156,103,15,1,20100,2
2,1500000US150010201003,,,,140,2,41,36,22,54,...,0,272,65,1,563,150,15,1,20100,3
3,1500000US150010201004,,,,41,0,32,18,19,39,...,0,310,70,36,534,256,15,1,20100,4
4,1500000US150010202021,,,,79,4,33,70,20,1,...,1,166,254,22,238,113,15,1,20202,1


In [27]:
cbg_response3 = requests.get(cbg_url3)
#print(cbg_response3.status_code)

cbg_data3 = cbg_response3.json()
cbg_df3 = pd.DataFrame(cbg_data3[1:], columns=cbg_data3[0])
cbg_df3.head()

Unnamed: 0,GEO_ID,P5_002N,P5_003N,P5_004N,P5_005N,P5_008N,P5_009N,P5_010N,state,county,tract,block group
0,1500000US150010201002,0,0,0,0,0,0,0,15,1,20100,2
1,1500000US150010201001,0,0,0,0,0,0,0,15,1,20100,1
2,1500000US150010201003,0,0,0,0,0,0,13,15,1,20100,3
3,1500000US150010201004,0,0,0,0,0,0,0,15,1,20100,4
4,1500000US150010202021,320,320,0,0,0,0,0,15,1,20202,1


#### Find duplicate fields in the second DF and remove all except GEO_ID which is needed to join
#### Merge tables on GEO_ID and add suffix to duplicated field (should just be GEO_ID):

In [28]:
duplicate_columns = [col for col in cbg_df2.columns if col in cbg_df.columns and col != 'GEO_ID']

# Drop them from cbg_df2
cbg_df2 = cbg_df2.drop(columns=duplicate_columns)

# Now safe to join but still add a suffix if a field is duplicated
cbg_df = cbg_df.join(cbg_df2, how='left', lsuffix='', rsuffix='_2')

#cbg_df = cbg_df0

cbg_df.head()

Unnamed: 0,GEO_ID,NAME,B01001_001E,B25034_006E,B25034_007E,B25034_008E,B25034_009E,B25034_010E,B25034_011E,B25046_001E,...,B01001_048E,B01001_049E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B25008_003E
0,1500000US150010201001,Block Group 1; Census Tract 201; Hawaii County...,1462,82,294,87,17,5,9,813,...,8,21,178,0,14,450,418,21,381,720
1,1500000US150010201002,Block Group 2; Census Tract 201; Hawaii County...,602,24,7,57,28,29,76,505,...,5,29,222,2,0,125,28,69,156,103
2,1500000US150010201003,Block Group 3; Census Tract 201; Hawaii County...,1339,148,53,70,141,67,49,1399,...,7,29,435,3,0,272,65,1,563,150
3,1500000US150010201004,Block Group 4; Census Tract 201; Hawaii County...,1096,32,48,56,45,24,144,789,...,4,2,146,0,0,310,70,36,534,256
4,1500000US150010202021,Block Group 1; Census Tract 202.02; Hawaii Cou...,1279,43,31,24,35,22,72,561,...,5,0,584,14,1,166,254,22,238,113


#### Find duplicate fields in third dataframe and remove all except GEO_ID which is needed to join
#### Merge tables on GEO_ID and add suffix to duplicated field (should just be GEO_ID):

In [29]:
duplicate_columns = [col for col in cbg_df3.columns if col in cbg_df.columns and col != 'GEO_ID']

# Drop them from cbg_df2
cbg_df3 = cbg_df3.drop(columns=duplicate_columns)

# Now safe to join but still add a suffix if a field is duplicated
cbg_df = cbg_df.join(cbg_df3, how='left', lsuffix='', rsuffix='_2')

cbg_df.head()

Unnamed: 0,GEO_ID,NAME,B01001_001E,B25034_006E,B25034_007E,B25034_008E,B25034_009E,B25034_010E,B25034_011E,B25046_001E,...,B02001_008E,B25008_003E,GEO_ID_2,P5_002N,P5_003N,P5_004N,P5_005N,P5_008N,P5_009N,P5_010N
0,1500000US150010201001,Block Group 1; Census Tract 201; Hawaii County...,1462,82,294,87,17,5,9,813,...,381,720,1500000US150010201002,0,0,0,0,0,0,0
1,1500000US150010201002,Block Group 2; Census Tract 201; Hawaii County...,602,24,7,57,28,29,76,505,...,156,103,1500000US150010201001,0,0,0,0,0,0,0
2,1500000US150010201003,Block Group 3; Census Tract 201; Hawaii County...,1339,148,53,70,141,67,49,1399,...,563,150,1500000US150010201003,0,0,0,0,0,0,13
3,1500000US150010201004,Block Group 4; Census Tract 201; Hawaii County...,1096,32,48,56,45,24,144,789,...,534,256,1500000US150010201004,0,0,0,0,0,0,0
4,1500000US150010202021,Block Group 1; Census Tract 202.02; Hawaii Cou...,1279,43,31,24,35,22,72,561,...,238,113,1500000US150010202021,320,320,0,0,0,0,0


#### Create path for output folder and store it as a variable:

In [30]:
output_folder_path = os.path.join(os.getcwd(), "output")
print ("Output folder: " + output_folder_path)

Output folder: C:\github\CCSVI\Scripts\Combined\output


#### Generate table names from the URL segments so that it's clear what year/data is included in the output table:

In [31]:
#general_table_name = func_clean_table_name(f"{census_dataset_url}")
#fields_table_name = f"{general_table_name}_Variables"
#cbg_table_name = f"{general_table_name}_CBG"

fields_table_name = f"census_variables"
cbg_table_name = f"census_cbg_data"

#### Generate output folder and use path to store CBG data pull as a CSV file

In [32]:
os.makedirs(output_folder_path, exist_ok=True)
cbg_csv_path = os.path.join(output_folder_path, f"{cbg_table_name}.csv")
cbg_df.to_csv(cbg_csv_path, index=False, encoding="utf-8")
print(f"CSV file created successfully: {cbg_csv_path}")

CSV file created successfully: C:\github\CCSVI\Scripts\Combined\output\census_cbg_data.csv


#### Generate path to store Census variable list as a CSV file:

In [33]:
fields_csv_path = os.path.join(output_folder_path, f"{fields_table_name}.csv")
fields_df.to_csv(fields_csv_path, index=False, encoding="utf-8")
print(f"CSV file created successfully: {fields_csv_path}")

CSV file created successfully: C:\github\CCSVI\Scripts\Combined\output\census_variables.csv


In [34]:
print("Census pull complete! Output files are located at: " + output_folder_path)

Census pull complete! Output files are located at: C:\github\CCSVI\Scripts\Combined\output


### Merge spatial statistics and Census data to a new CSV:

In [35]:

# Load the two CSV files
combined_stats = pd.read_csv("output/combined_spatial_stats.csv")
census_data = pd.read_csv("output/census_cbg_data.csv")

# Merge on GEOIDFQ
merged_df = pd.merge(
    combined_stats,
    census_data,
    left_on="GEOIDFQ",
    right_on="GEO_ID",
    how="left"
)

# Save the result to a new CSV file
merged_df.to_csv("output/spatial_stats_and_census.csv", index=False)

In [36]:
import shutil

In [37]:
shutil.move("output/combined_spatial_stats.csv", "output/temp/combined_spatial_stats.csv")
shutil.move("output/census_cbg_data.csv", "output/temp/census_cbg_data.csv")

'output/temp/census_cbg_data.csv'

In [38]:
print("Output files created at: " + output_folder_path)

Output files created at: C:\github\CCSVI\Scripts\Combined\output


### Cleanup the output CSV:

In [56]:
# Load the CSV into a DataFrame
input_path = "output/spatial_stats_and_census.csv"
workingdf = pd.read_csv(input_path)

# Move the original CSV to output/temp/
#temp_dir = "output/temp/"
#os.makedirs(temp_dir, exist_ok=True)  # Ensure the temp folder exists
#shutil.move(input_path, os.path.join(temp_dir, "census_cbg_data.csv"))

# Remove GEO_ID
#workingdf = workingdf.drop(columns=["GEOIDFQ"])

# Reorder columns
cols = workingdf.columns.tolist() #Create list of columns
#cols.remove("GEO_ID") #remove from the LIST
#cols.remove("NAME") #remove from the LIST

#choose front columns to move to beginning
front_cols = [col for col in ["GEO_ID", "NAME"] if col in cols]

# Build the remaining list, excluding front_cols
remaining_cols = [col for col in cols if col not in front_cols]

workingdf = workingdf[front_cols + remaining_cols]

# Drop unneeded fields
#workingdf = workingdf.drop(columns=["GEOIDFQ"])
#workingdf = workingdf.drop(columns=["GEO_ID2.1"])

# make list of fields to drop
pattern = re.compile(r"^GEO_ID_2(\..*)?$")
cols_to_drop = [col for col in workingdf.columns if re.match(pattern, col)]
workingdf = workingdf.drop(columns=cols_to_drop)

# drop additional fields
workingdf = workingdf.drop(columns=["GEOIDFQ"])

In [57]:
workingdf.head()

Unnamed: 0,GEO_ID,NAME,cat4_mom_slosh_hightide_maximum,emergency_shelters_count,FEMA_SFHA_intersect,fire_stations_count,igtn_prob_test_maximum,n10_landslide_susc_maximum,onsite_sewage_disposal_count,police_stations_count,...,B02001_007E,B02001_008E,B25008_003E,P5_002N,P5_003N,P5_004N,P5_005N,P5_008N,P5_009N,P5_010N
0,1500000US150030102051,Block Group 1; Census Tract 102.05; Honolulu C...,12.0,1,1,1,0.4,81.0,339,1,...,9,657,401,0,0,0,0,0,0,26
1,1500000US150030080011,Block Group 1; Census Tract 80.01; Honolulu Co...,12.0,1,1,1,0.84,17.0,1,1,...,5,619,1098,0,0,0,0,0,0,0
2,1500000US150070408001,Block Group 1; Census Tract 408; Kauai County;...,11.0,1,1,1,0.91,81.0,64,1,...,2,269,1925,0,0,0,0,0,0,30
3,1500000US150010202021,Block Group 1; Census Tract 202.02; Hawaii Cou...,13.0,1,1,1,0.87,81.0,210,1,...,22,238,113,320,320,0,0,0,0,0
4,1500000US150010221021,Block Group 1; Census Tract 221.02; Hawaii Cou...,9.0,1,0,1,0.97,81.0,418,1,...,0,146,141,0,0,0,0,0,0,0


In [58]:
# Write the DataFrame to a new CSV file
output_path = "output/cbg_demo_enviro.csv"
workingdf.to_csv(output_path, index=False)