In [54]:
import os
import pandas as pd
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

RESOLUTION = 250

raster_paths = [
    (fr"Boundary/HUC8_{RESOLUTION}m.tif", f"huc8",'int64'),
    (fr"Boundary/HUC12_{RESOLUTION}m.tif", f"huc12",'int64'),
    (fr"Boundary/BaseRaster_{RESOLUTION}m.tif", f"Domain",'int32'),
    (fr"Boundary/COUNTY_{RESOLUTION}m.tif", f"COUNTY",'int32'), 
    (fr"Geomorphology/Glacial_Landsystems_{RESOLUTION}m.tif", f"GeoLandSy",'int32'), 
    (fr'Geomorphology/Aquifer_Characteristics_Of_Glacial_Drift_{RESOLUTION}m.tif',f'AQU_CHAR','int32'),
    (fr"Geomorphology/MI_geol_poly_{RESOLUTION}m.tif", f"GeologUnit",'int32'),
    (fr"NHDPlusData\NHDPlusID_{RESOLUTION}m.tif", f"NHDPlusID", 'int64'), 
    (fr"DEM\DEM_{RESOLUTION}m.tif", f"Elevation", "float32"), ### CONSIDE THAT THIS IS THE BASE MODEL FOR INDEXING
    (fr"Geomorphology/landforms_{RESOLUTION}m_250Dis.tif", f"landforms",'int32'),
    (fr"Geomorphology/geomorphons_{RESOLUTION}m_250Dis.tif", f"geomorph",'int64'),
    (fr"LandUse/landuse_{RESOLUTION}m.tif", f"landuse",'int32'),
    (fr"Soil/Soil_STATSGO_{RESOLUTION}m.tif", f"STATSGO","int64"),
    (fr"Soil/gSSURGO_swat_{RESOLUTION}m.tif", f"gSSURGO","int64"),
   (fr"Krigging_results/kriging_output_TRANSMSV_1_{RESOLUTION}m.tif",   f"S_TRSMV_1",  "float64"),
   (fr"Krigging_results/kriging_output_TRANSMSV_2_{RESOLUTION}m.tif",   f"S_TRSMV_2",  "float64"),
   (fr"Krigging_results/kriging_output_AQ_THK_1_{RESOLUTION}m.tif",   f"S_AQTHK_1",  "float64"),
   (fr"Krigging_results/kriging_output_AQ_THK_2_{RESOLUTION}m.tif",   f"S_AQTHK_2",  "float64"),
   (fr"Krigging_results/kriging_output_H_COND_1_{RESOLUTION}m.tif",   f"S_HCOND_1",  "float64"),
   (fr"Krigging_results/kriging_output_H_COND_2_{RESOLUTION}m.tif",   f"S_HCOND_2",  "float64"),
   (fr"Krigging_results/kriging_output_V_COND_1_{RESOLUTION}m.tif",   f"S_VCOND_1",  "float64"),
   (fr"Krigging_results/kriging_output_V_COND_2_{RESOLUTION}m.tif",   f"S_VCOND_2",  "float64"),
   (fr"Krigging_results/kriging_output_SWL_{RESOLUTION}m.tif",   f"S_SWL",      "float64"),
   (fr"Krigging_results/kriging_stderr_SWL_{RESOLUTION}m.tif",   f"er_SWL",     "float64"),
    (fr"Krigging_results/kriging_stderr_AQ_THK_1_{RESOLUTION}m.tif",   f"er_AQTHK_1", "float64"),
    (fr"Krigging_results/kriging_stderr_AQ_THK_2_{RESOLUTION}m.tif",   f"er_AQTHK_2", "float64"),
    (fr"Krigging_results/kriging_stderr_H_COND_1_{RESOLUTION}m.tif",   f"er_HCOND_1", "float64"),
    (fr"Krigging_results/kriging_stderr_H_COND_2_{RESOLUTION}m.tif",   f"er_HCOND_2", "float64"),
    (fr"Krigging_results/kriging_stderr_V_COND_1_{RESOLUTION}m.tif",   f"er_VCOND_1", "float64"),
    (fr"Krigging_results/kriging_stderr_V_COND_2_{RESOLUTION}m.tif",   f"er_VCOND_2", "float64"),
  (fr"Krigging_results/kriging_stderr_TRANSMSV_1_{RESOLUTION}m.tif",   f"er_TRSMV_1", "float64"),
  (fr"Krigging_results/kriging_stderr_TRANSMSV_2_{RESOLUTION}m.tif",   f"er_TRSMV_2", "float64") 
]


dic = '/data/MyDataBase/SWATGenXAppData/'
output_dir = os.path.join(dic, "all_rasters")

# Ensure temp directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

reference_raster = os.path.join(dic, f"DEM/DEM_{RESOLUTION}m.tif")
# Read properties from the reference (minimum) raster
with rasterio.open(reference_raster) as src:
    min_transform = src.transform
    min_crs = src.crs
    min_height = src.height
    min_width = src.width

output_paths = []
col_names = []
cols_dtype = []

# Loop through each raster to clip and align
for raster_path, col_name, dtype in raster_paths:
    full_raster_path = os.path.join(dic, raster_path)
    
    with rasterio.open(full_raster_path) as src:
        # Set the parameters for reprojection to match reference_raster
        transform, width, height = calculate_default_transform(
            src.crs, min_crs, src.width, src.height, *src.bounds)
        
        # Initialize destination raster specs
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': min_crs,
            'transform': min_transform,
            'width': min_width,
            'height': min_height
        })
        # Output path for the clipped and aligned raster
        output_path = os.path.join(output_dir, os.path.basename(raster_path))
        output_paths.append(output_path)
        col_names.append(col_name)
        cols_dtype.append(dtype)
        # Perform the reprojection (clip and align)
        with rasterio.open(output_path, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=min_transform,
                    dst_crs=min_crs,
                    resampling=Resampling.nearest
                )

    print(f"Raster {raster_path} has been clipped and aligned.")

Raster Boundary/HUC8_250m.tif has been clipped and aligned.
Raster Boundary/HUC12_250m.tif has been clipped and aligned.
Raster Boundary/BaseRaster_250m.tif has been clipped and aligned.
Raster Boundary/COUNTY_250m.tif has been clipped and aligned.
Raster Geomorphology/Glacial_Landsystems_250m.tif has been clipped and aligned.
Raster Geomorphology/Aquifer_Characteristics_Of_Glacial_Drift_250m.tif has been clipped and aligned.
Raster Geomorphology/MI_geol_poly_250m.tif has been clipped and aligned.
Raster NHDPlusData\NHDPlusID_250m.tif has been clipped and aligned.
Raster DEM\DEM_250m.tif has been clipped and aligned.
Raster Geomorphology/landforms_250m_250Dis.tif has been clipped and aligned.
Raster Geomorphology/geomorphons_250m_250Dis.tif has been clipped and aligned.
Raster LandUse/landuse_250m.tif has been clipped and aligned.
Raster Soil/Soil_STATSGO_250m.tif has been clipped and aligned.
Raster Soil/gSSURGO_swat_250m.tif has been clipped and aligned.
Raster Krigging_results/krigi

In [55]:
# Initialize an empty list to store the DataFrames

dfs = []
# Initialize geometry and coordinate columns to None (they will be populated later)
geometry_col = None
x_coords_transformed = None
y_coords_transformed = None

for i, output_path in enumerate(output_paths):
    print(output_path)
    with rasterio.open(output_path) as src:
        band1 = src.read(1)  # Read band 1
    flattened_array = band1.flatten()
    df = pd.DataFrame({col_names[i]: flattened_array})
    if geometry_col is None:
        rows, cols = band1.shape
        print(f'number of rows:{rows}', f"number of cols: {cols}")
        # Use np.indices to generate row and column indices
        row_inds, col_inds = np.indices((rows, cols))
        # Use the affine transformation to convert pixel indices to geographic coordinates
        x_coords_transformed = []
        y_coords_transformed = []
        for r, c in zip(row_inds.ravel(), col_inds.ravel()):
            x, y = src.transform * (c, r)
            x_coords_transformed.append(x)
            y_coords_transformed.append(y)
        # Create a list of Point geometries from the x and y coordinates
        geometry_col = [Point(x, y) for x, y in zip(x_coords_transformed, y_coords_transformed)]

    # Add geometry and coordinate columns to the DataFrame
    df['x_coord'] = x_coords_transformed
    df['y_coord'] = y_coords_transformed
    df['geometry'] = geometry_col
    # Convert DataFrame to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs=src.crs)
    dfs.append(gdf.drop(columns=['x_coord', 'y_coord', 'geometry']))

centroid_data_df = pd.concat(dfs, axis=1)

centroid_df = gpd.GeoDataFrame(centroid_data_df, geometry=geometry_col, crs=src.crs)
centroid_df.replace([np.inf, -np.inf], np.nan, inplace=True)
centroid_df = centroid_df[centroid_df.NHDPlusID>0]
centroid_df = centroid_df[centroid_df.Domain==1]
centroid_df = centroid_df[centroid_df.gSSURGO>0] 
centroid_df = centroid_df[centroid_df.STATSGO>0]
centroid_df = centroid_df[centroid_df.AQU_CHAR>0]
centroid_df = centroid_df[centroid_df.GeoLandSy>0]
centroid_df = centroid_df[centroid_df.S_AQTHK_1>0]
print('number of unique NHDPlusIDs:',centroid_df.NHDPlusID.unique().shape[0])
county=gpd.read_file(r"D:\MyDataBase\NHDPlusData\Counties_(v17a)\Counties_(v17a).shp")
county.rename(columns={'COUNTY':'COUNTY_shape'}, inplace=True)
centroid_df = centroid_df.merge(county[['OBJECTID','COUNTY_shape']], left_on='COUNTY', right_on='OBJECTID')
centroid_df.drop(columns=['OBJECTID'], inplace=True)
centroid_df.drop(columns=['COUNTY'], inplace=True)
centroid_df.rename(columns={'COUNTY_shape':'COUNTY'}, inplace=True)
centroid_df = centroid_df.dropna(subset=['NHDPlusID','GeoLandSy','AQU_CHAR','GeologUnit','STATSGO','gSSURGO'])
centroid_df[['NHDPlusID','STATSGO','gSSURGO']] = centroid_df[['NHDPlusID','STATSGO','gSSURGO']].astype('int64')
centroid_df[['huc12','huc8']] = centroid_df[['huc12','huc8']].astype('int64')

centroid_df[['geomorph']] = centroid_df[['geomorph']].astype('int32')
centroid_df[['GeoLandSy','AQU_CHAR','GeologUnit','landuse']]=centroid_df[['GeoLandSy','AQU_CHAR','GeologUnit','landuse']].astype('int16')
cols_to_modify = ['Elevation', 'S_TRSMV_1', 'S_TRSMV_2', 'S_AQTHK_1', 'S_AQTHK_2', 'S_HCOND_1',
                  'S_HCOND_2', 'S_VCOND_1', 'S_SWL', 'er_SWL', 'er_AQTHK_1', 'er_AQTHK_2',
                  'er_HCOND_1', 'er_HCOND_2', 'er_VCOND_1', 'er_VCOND_2', 'er_TRSMV_1', 'er_TRSMV_2']

for col in cols_to_modify:
    centroid_df[col] = centroid_df[col].astype(float).round(2)
# Save DataFrame to pickle
centroid_df.to_pickle(os.path.join(dic, 'observations', f'rasters_{RESOLUTION}m.pk1'))

/data/MyDataBase/SWATGenXAppData/all_rasters\HUC8_250m.tif
number of rows:1849 number of cols: 1458
/data/MyDataBase/SWATGenXAppData/all_rasters\HUC12_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\BaseRaster_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\COUNTY_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\Glacial_Landsystems_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\Aquifer_Characteristics_Of_Glacial_Drift_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\MI_geol_poly_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\NHDPlusID_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\DEM_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\landforms_250m_250Dis.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\geomorphons_250m_250Dis.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\landuse_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\Soil_STATSGO_250m.tif
/data/MyDataBase/SWATGenXAppData/all_rasters\gSSURGO_swat_250m.tif
/data/MyDataBase

In [56]:
print(centroid_df.huc12.unique())

[40601051136 40700030976           0 40700039168 40700051456 40700059648
 40601038848 40601030656 40700071936 40601018368 40801009664 40601010176
 40802009088 40601022464 40802021376 40801021952 40801030144 40801038336
 40802050048 40900009984 40802058240 40802029568 40500060160 40802041856
 40500051968 40900030464 40500072448 40500068352 40500031488 40500019200
 40900022272 40500039680 41000128512 40900038656 41000009728 41000022016
 40500011008 41000058880 41000030208]


In [74]:
import rasterio
import numpy as np

# Open the raster file
with rasterio.open('D:\\MyDataBase\\Boundary\\HUC12_250m.tif') as src:
    # Read the raster into a numpy array
    data = src.read(1)

    # Count unique values, excluding NaN values
    unique_values = np.unique(data[~np.isnan(data)].astype('int64'))

# Print the unique values and their count
print(f"The raster file has {len(unique_values)} unique values.")

# If you want to see the unique values themselves:
# print(unique_values)


The raster file has 42 unique values.


In [68]:
test.huc12

0       40500010803
1       40500011201
2       40500011605
3       40500011701
4       40500011702
           ...     
1755    41000120304
1756    41000120405
1757    41000120503
1758    41000030403
1759    41000030601
Name: huc12, Length: 1760, dtype: int64

In [None]:
# Load the pickled DataFrames
import pandas as pd
import geopandas as gpd
import numpy as np

centroid_df = pd.read_pickle(dic + f"observations/rasters_{RESOLUTION}m.pk1")
centroid_df = gpd.GeoDataFrame(centroid_df,geometry='geometry')
centroid_df = centroid_df[centroid_df.huc8!=0].sort_values('huc12').reset_index(drop=True)
centroid_df['OBJECTID'] = np.arange(1, len(centroid_df)+1)
observation_df = pd.read_pickle(os.path.join(dic , 'observations/observations.pk1'))
observation_df = gpd.GeoDataFrame(observation_df,geometry='geometry')
observation_df.drop(columns=['COUNTY','huc12','huc8','huc4'], inplace=True)
# Step 1: Buffer the points to create small polygons
centroid_df['geometry'] = centroid_df['geometry'].buffer(int(RESOLUTION/2))
# Step 2: Create bounding rectangles
centroid_df['geometry'] = centroid_df['geometry'].envelope
# Now, you can use spatial join to find the points within the rectangles
grids_observations = gpd.sjoin(observation_df, centroid_df, how="right", predicate="within")
grids_observations['x'] = grids_observations.geometry.centroid.x
grids_observations['y'] = grids_observations.geometry.centroid.y
grids_observations['geometry'] = grids_observations.geometry.centroid
grids_observations.to_pickle(os.path.join(dic,'observations',f'rasters_{RESOLUTION}m_with_observations.pk1'))

print('writing observations.....')
#grids_observations[['OBJECTID','geometry']].to_file(os.path.join(dic,'observations',f'rasters_{RESOLUTION}m_with_observations'))
grids_observations.drop(columns='geometry').to_pickle(os.path.join(dic,'observations',f'rasters_{RESOLUTION}m_with_observations_without_geometry.pk1'))

In [4]:
glob.glob(os.path.join(dic,"NHDPlusData" ,"*.pk1" ))

['/data/MyDataBase/SWATGenXAppData/NHDPlusData\\catchments.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\Flowlines.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDFlowline.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusCatchment.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusEROMMA.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusFlowlineVAA.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusIncrPrecip.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusIncrPrecipMA.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusIncrTemp.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDPlusIncrTempMA.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\NHDWaterbody.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\streams.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\subbasins.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData\\waterbodies.pk1',
 '/data/MyDataBase/SWATGenXAppData/NHDPlusData

In [53]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import glob
dic="/data/MyDataBase/SWATGenXAppData/"
RESOLUTION = 250
NHDPlusEROMMA=pd.read_pickle(os.path.join(dic,"NHDPlusData" ,"NHDPlusEROMMA.pk1" ))[['NHDPlusID','QAMA']]  #QAMA: mean annual flow, cubic feet per second 
#NHDPlusIncrTempMA=pd.read_pickle(os.path.join(dic,"NHDPlusData" ,"NHDPlusIncrTempMA.pk1" ))[['NHDPlusID','TempMA']]  #QAMA: mean annual flow, cubic feet per second 
#NHDPlusIncrPrecipMA=pd.read_pickle(os.path.join(dic,"NHDPlusData" ,"NHDPlusIncrPrecipMA.pk1" ))[['NHDPlusID','PrecipMA']]  #QAMA: mean annual flow, cubic feet per second 
NHDPlusFlowlineVAA=pd.read_pickle(os.path.join(dic,"NHDPlusData" ,"NHDPlusFlowlineVAA.pk1" ))[['NHDPlusID', 'StreamLeve', 'StreamOrde','SlopeLenKm','AreaSqKm','MaxElevSmo', 'MinElevSmo', 'Slope']]
grids_observations=pd.read_pickle(os.path.join(dic,'observations',f'rasters_{RESOLUTION}m_with_observations_without_geometry.pk1')).drop(columns='index_left')
grids_observations = grids_observations.merge(NHDPlusEROMMA, on="NHDPlusID")
#grids_observations = grids_observations.merge(NHDPlusIncrTempMA, on="NHDPlusID")
#grids_observations = grids_observations.merge(NHDPlusIncrPrecipMA, on="NHDPlusID")
grids_observations = grids_observations.merge(NHDPlusFlowlineVAA, on="NHDPlusID")
grids_observations.to_pickle((os.path.join(dic,'observations',f'rasters_{RESOLUTION}m_with_observations_without_geometry_with_NHDPlusData.pk1')))

In [None]:
### Test of geometry transform correcness. 
import pandas as pd
import os
RESOLUTION = 250
dic=r"/data/MyDataBase/SWATGenXAppData/"
df=pd.read_pickle(os.path.join(dic,'observations',f'observations.pk1'))  
df[df.COUNTY=='Barry'].TRANSMSV_1.dropna()