In [1]:
import pandas as pd
import rasterio
import geopandas as gpd
import numpy as np
from rasterstats import zonal_stats

# Define TIF File Paths

In [None]:
main_path_mac = "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/"
main_path_win = "G:/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/"

GHI= "GHI-09188ce2.tif"
protected_land = "Protected_Land-5745a356.tif"
habitat= "Habitat-32079c87.tif"
slope= "slope_only-2c1658fa.tif"
popl_dens= "Popl_Density-714f0a64.tif"
Substation = "distance_to_substation_only-f02c9129.tif"
land_cover="Land_Cover-8a2691e6.tif"
# Define the TIF file paths as a list
tif_paths = [GHI, protected_land, habitat, slope, popl_dens, Substation, land_cover]
tif_paths_full = [main_path_mac + path for path in tif_paths]
tif_paths_full

In [None]:
block_group_bounding_boxes = pd.read_csv("../../data/block_group_clean/bounding_box_full_bg.csv", dtype={'GEOID': str, 'STATEFP': str, 'COUNTYFP': str, 'TRACTCE': str, 'BLKGRPCE': str})


bg_bounding_box_file_path = ""
bg_bb_full = gpd.read_file(bg_bounding_box_file_path)[['GEOID', 'geometry']] # Not in repo due to size

# Merge the bounding boxes with the block group geometries
block_group_bounding_boxes = bg_bb_full.merge(block_group_bounding_boxes, on='GEOID', how='left')

block_group_bounding_boxes.head()

In [None]:
from utils import process_tif_files
col_names = ["GHI", "Protected_Land", "Habitat", "Slope", "Population_Density", "Distance_to_Substation", "Land_Cover"]

block_group_bounding_boxes_4326 = block_group_bounding_boxes.to_crs("EPSG:4326")
block_group_suitability_scores = process_tif_files(tif_paths_full, block_group_bounding_boxes_4326[['geometry', 'GEOID', 'County Name', 'State', 'TRACTCE', "BLKGRPCE"]], nodata_value=np.nan, bg=True)

# Fix weird errors with Conneticut

In [1]:
mapper = {
    '110': 'Hartford', '190': 'Fairfield', '170': 'Litchfield', 
    '140': 'Middlefield', '120': 'New Haven', '130': 'Tolland',
    '160': 'Windham', '180': 'New London', '150': 'New London'
}

def fix_data_Connecticut(series):
    
    if series['GEOID'][:2] == '09':
        series['State'] = 'Connecticut'
        series['County Name'] = mapper[series['GEOID'][2:5]]
        series['TRACTCE'] = series['GEOID'][5:11]
        series['BLKGRPCE'] = series['GEOID'][11:]

    return series

In [None]:
block_group_suitability_scores = block_group_suitability_scores.apply(fix_data_Connecticut, axis=1)

# Fix certain states and tracts that are not needed/weird code matching errors

In [2]:
FIPS = pd.read_csv("../../data/extras/US_FIPS_Codes.csv", dtype={'FIPS State': str, 'FIPS County': str})

state_dict = FIPS.set_index('FIPS State')['State'].to_dict()

# County dict for mapping but requires to match the FIPS State as well
county_dict = FIPS.set_index(['FIPS State', 'FIPS County'])['County Name'].to_dict()
def fix_tract(series):
    if series['GEOID'] != np.nan:
        if series['GEOID'][:2] == '02':
            series['State'] = 'Alaska'
        elif series['GEOID'][:2] == '15':
            series['State'] = 'Hawaii'
        else:
            try:
                series['State'] = state_dict[series['GEOID'][:2]]
            except:
                series['State'] = np.nan
        series['TRACTCE'] = series['GEOID'][5:11]
        series['BLKGRPCE'] = series['GEOID'][11:]
        try:
            series['County Name'] = county_dict[(series['GEOID'][:2], series['GEOID'][2:5])]
        except:
            series['County Name'] = np.nan
        
    return series

In [None]:
block_group_suitability_scores = block_group_suitability_scores.apply(fix_tract, axis=1)
block_group_suitability_scores = block_group_suitability_scores.dropna(subset=['State', 'County Name'])
block_group_suitability_scores.to_csv('block_group_suitability_scores.csv', index=False)