In [1]:
import os
import pandas as pd
import rasterio
import rioxarray as rxr
import geopandas as gpd
import numpy as np
from rasterstats import zonal_stats

## Define all paths for the tif files

In [2]:
main_path = "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/"

GHI= "GHI-09188ce2.tif"
protected_land = "Protected_Land-5745a356.tif"
habitat= "Habitat-32079c87.tif"
slope= "slope_only-2c1658fa.tif"
popl_dens= "Popl_Density-714f0a64.tif"
Substation = "distance_to_substation_only-f02c9129.tif"
land_cover="Land_Cover-8a2691e6.tif"

# Define the TIF file paths as a list
tif_paths = [GHI, protected_land, habitat, slope, popl_dens, Substation, land_cover]
tif_paths_full = [main_path + path for path in tif_paths]
tif_paths_full

["/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/GHI-09188ce2.tif",
 "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/Protected_Land-5745a356.tif",
 "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/Habitat-32079c87.tif",
 "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/slope_only-2c1658fa.tif",
 "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/Popl_Density-714f0a64.tif",
 "/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/distance_to_substation_only-f02c9129.tif",
 "/

## Load the Counties Bounding Boxes and get the state and county names

In [3]:
# Load FIPS and county data for names
county_bounding_boxes = pd.read_csv("/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Solar NIMBY Final/Solar-NIMBY/data cleaning/data/county_bounding_boxes.csv", dtype={"FIPS State": str, "FIPS County": str})

county_bounding_boxes.head()

Unnamed: 0,GEOID,State,County Name,area km2,area mi2,FIPS State,FIPS County
0,1001,Alabama,Autauga,1565.322757,604.374247,1,1
1,1003,Alabama,Baldwin,4352.548564,1680.527706,1,3
2,1005,Alabama,Barbour,2342.545642,904.461557,1,5
3,1007,Alabama,Bibb,1622.29567,626.371603,1,7
4,1009,Alabama,Blount,1685.09807,650.619735,1,9


In [4]:
# load the full county boxes 
county_bounding_boxes_full = gpd.read_file("/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Solar NIMBY Final/Solar-NIMBY/county_box/US County Boundary 2018/cb_2018_us_county_500k.shp", dtype={'STATEFP': str}).rename(columns={"STATEFP": "FIPS State", "COUNTYFP": "FIPS County"})
county_bounding_boxes_full.head()

Unnamed: 0,FIPS State,FIPS County,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,21,7,516850,0500000US21007,21007,Ballard,6,639387454,69473325,"POLYGON ((-89.18137 37.04630, -89.17938 37.053..."
1,21,17,516855,0500000US21017,21017,Bourbon,6,750439351,4829777,"POLYGON ((-84.44266 38.28324, -84.44114 38.283..."
2,21,31,516862,0500000US21031,21031,Butler,6,1103571974,13943044,"POLYGON ((-86.94486 37.07341, -86.94346 37.074..."
3,21,65,516879,0500000US21065,21065,Estill,6,655509930,6516335,"POLYGON ((-84.12662 37.64540, -84.12483 37.646..."
4,21,69,516881,0500000US21069,21069,Fleming,6,902727151,7182793,"POLYGON ((-83.98428 38.44549, -83.98246 38.450..."


In [5]:
# Merge the Files
county_bounding_boxes_full = county_bounding_boxes_full.merge(county_bounding_boxes, on=["FIPS State", "FIPS County"], how="left")

county_bounding_boxes_full.head()

Unnamed: 0,FIPS State,FIPS County,COUNTYNS,AFFGEOID,GEOID_x,NAME,LSAD,ALAND,AWATER,geometry,GEOID_y,State,County Name,area km2,area mi2
0,21,7,516850,0500000US21007,21007,Ballard,6,639387454,69473325,"POLYGON ((-89.18137 37.04630, -89.17938 37.053...",21007.0,Kentucky,Ballard,708.542173,273.56955
1,21,17,516855,0500000US21017,21017,Bourbon,6,750439351,4829777,"POLYGON ((-84.44266 38.28324, -84.44114 38.283...",21017.0,Kentucky,Bourbon,755.280071,291.615146
2,21,31,516862,0500000US21031,21031,Butler,6,1103571974,13943044,"POLYGON ((-86.94486 37.07341, -86.94346 37.074...",21031.0,Kentucky,Butler,1117.793121,431.58216
3,21,65,516879,0500000US21065,21065,Estill,6,655509930,6516335,"POLYGON ((-84.12662 37.64540, -84.12483 37.646...",21065.0,Kentucky,Estill,662.202105,255.677557
4,21,69,516881,0500000US21069,21069,Fleming,6,902727151,7182793,"POLYGON ((-83.98428 38.44549, -83.98246 38.450...",21069.0,Kentucky,Fleming,909.915069,351.320028


In [6]:
col_names = ["GHI", "Protected_Land", "Habitat", "Slope", "Population_Density", "Distance_to_Substation", "Land_Cover"]

def calculate_zonal_stats(tif_path, geodataframe, nodata_value):
    with rasterio.open(tif_path) as src:
        affine = src.transform
        array = src.read(1)  # Read the first band
        array = np.where(np.isnan(array), nodata_value, array)  # Replace NaNs with nodata_value
        # Debugging: Check raster data and affine transformation
        print(f"Raster data shape: {array.shape}")
        print(f"Affine transformation: {affine}")
        # Check the CRS of the raster
        raster_crs = src.crs
        print(f"Raster CRS: {raster_crs}")
        if geodataframe.crs != raster_crs:
            geodataframe = geodataframe.to_crs(raster_crs)
            
        # filter out values that are less than 100
        array = np.where(array > 101, nodata_value, array)

    # Calculate zonal statistics
    stats = zonal_stats(geodataframe, array, affine=affine, stats="mean", nodata=nodata_value, all_touched=True)
    print(stats)
    # Extract mean values and add to GeoDataFrame
    mean_values = [stat['mean'] for stat in stats]
    return mean_values

def process_tif_files(tif_filepaths, bounding_box, nodata_value=-9999):
    x = bounding_box.copy()

    # Initialize results DataFrame
    results = pd.DataFrame(index=x.index, columns=col_names)

    for tif_path, col_name in zip(tif_filepaths, col_names):
        print(f"Processing {tif_path} for {col_name}")

        # Calculate mean values using zonal stats
        mean_values = calculate_zonal_stats(tif_path, x, nodata_value)

        # Update results DataFrame
        results[col_name] = mean_values

    # Add county and state information
    results["County Name"] = bounding_box["County Name"]
    results["State"] = bounding_box["State"]

    return results

In [8]:
# project the bounding box to the same crs as the tif files
county_bounding_boxes_full = county_bounding_boxes_full.to_crs("EPSG:4326")
technoecon_suitability_scores = process_tif_files(tif_paths_full, county_bounding_boxes_full[['geometry', 'County Name', 'State']], nodata_value=np.nan)

Processing /Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Jenny's Regression/Data Sources/GHI-09188ce2.tif for GHI
Raster data shape: (14885, 27519)
Affine transformation: | 250.00, 0.00,-14099538.89|
| 0.00,-250.00, 6430122.05|
| 0.00, 0.00, 1.00|
Raster CRS: EPSG:3857
[{'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 5.0}, {'mean': 8.130010807006961}, {'mean': 5.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 20.0}, {'mean': 20.0}, {'mean': 20.0}, {'mean': 20.0}, {'mean': 20.0}, {'mean': 15.0}, {'mean': 20.0}, {'mean': 15.0}, {'mean': 20.0}, {'mean': 17.45108896271687}, {'mean': None}, {'mean': None}, {'mean': None}, {'mean': None}, {'mean': 20.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 18.340024580090127}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 15.0}, {'mean': 41.042323684791995}, {'mean': 42

In [9]:
technoecon_suitability_scores.head()

Unnamed: 0,GHI,Protected_Land,Habitat,Slope,Population_Density,Distance_to_Substation,Land_Cover,County Name,State
0,15.0,91.547835,33.748285,91.02953,98.634118,53.241671,67.860969,Ballard,Kentucky
1,15.0,99.583685,82.840678,71.72398,96.682895,60.58299,85.691192,Bourbon,Kentucky
2,15.0,99.896136,39.236428,42.759798,98.23951,50.0,61.839634,Butler,Kentucky
3,15.0,97.733883,29.82652,11.827322,93.3528,52.380053,56.737948,Estill,Kentucky
4,15.0,98.000821,49.732293,32.705892,99.466845,50.0,71.555286,Fleming,Kentucky


In [10]:
technoecon_suitability_scores[technoecon_suitability_scores['county'] == 'Autauga']

KeyError: 'county'

In [11]:
technoecon_suitability_scores.to_csv('technoecon_suitability_scores.csv', index=False)