In [22]:
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import rioxarray as rxr
import xarray as xr

from exactextract import exact_extract
from rasterio.crs import CRS
from rasterio.mask import mask
from shapely.geometry import box

### To save one file with correct CRS

In [4]:
nhd_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_buffered_json/HUC2_01/NHDPLUS_H_0101_HU4_GDB_prepped_buffered_max.json'

In [5]:
nhd = gpd.read_file(nhd_path)

In [6]:
nhd = nhd.to_crs('4236')

In [7]:
nhd = nhd[['NHDPlusID', 'geometry']]

In [None]:
save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/'
save_file = 'NHDPLUS_H_0101_HU4_GDB_prepped_buffered_max_TEST.json'

nhd.to_file(filename=os.path.join(save_path, save_file), driver='GeoJSON')

### Project NLCD

In [28]:
nhd = gpd.read_file(filename='/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHDPLUS_H_0101_HU4_GDB_prepped_buffered_max_TEST.json',
                    columns=['NHDPlusID', 'geometry'])

In [18]:
# bounds = nhd.total_bounds
# bbox = box(bounds[0], bounds[1], bounds[2], bounds[3])
# geo = gpd.GeoDataFrame({'geometry': [bbox]}, crs="EPSG:4326") # WGS 84

In [29]:
nlcd_path = '/nas/cee-water/cjgleason/data/NLCD/Annual_NLCD_LndCov_2023_CU_C1V0.tif'

In [23]:
with rasterio.open(nlcd_path) as raster:
    nhd = nhd.to_crs(raster.crs)

In [26]:
raster

<closed DatasetReader name='/nas/cee-water/cjgleason/data/NLCD/Annual_NLCD_LndCov_2023_CU_C1V0.tif' mode='r'>

In [30]:
nlcd = rxr.open_rasterio(nlcd_path, masked=True)

In [35]:
nlcd_crs = nlcd.rio.crs

In [38]:
nhd = nhd.to_crs(nlcd_crs)

In [18]:
# target_crs = CRS.from_string('EPSG:4326')

In [None]:
# nlcd_reproj = nlcd.rio.reproject(target_crs)

### Testing exactextract

In [13]:
# rast = '/nas/cee-water/cjgleason/data/NLCD/Annual_NLCD_LndCov_2023_CU_C1V0.tif'
# polys = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHDPLUS_H_0101_HU4_GDB_prepped_buffered_max_TEST.json'

In [45]:
test = exact_extract(rast=nlcd, vec=nhd, ops=['variety', 'mode', 'minority'], include_cols=["NHDPlusID"], output='pandas')

In [46]:
test

Unnamed: 0,NHDPlusID,variety,mode,minority
0,5.000100e+12,4,90.0,22.0
1,5.000100e+12,4,81.0,21.0
2,5.000100e+12,5,90.0,81.0
3,5.000100e+12,9,11.0,82.0
4,5.000100e+12,4,11.0,71.0
...,...,...,...,...
14481,5.000100e+12,1,90.0,90.0
14482,5.000100e+12,3,90.0,43.0
14483,5.000100e+12,7,90.0,95.0
14484,5.000100e+12,3,42.0,43.0


### Land Cover

In [None]:
tif_path = '/nas/cee-water/cjgleason/data/NLCD/Annual_NLCD_LndCov_2023_CU_C1V0.tif'
poly_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/all_wbd_no_great_lakes.parquet/'

In [None]:
# Read the raster data
with rasterio.open(tif_path) as src:
    print(src.crs)
    raster_img = src.read(1)
    raster_profile = src.profile

In [None]:
raster_profile

In [None]:
test = raster_img[10000:11000, 10000:11000]

In [None]:
test

In [None]:
np.unique(test)

In [None]:
plt.imshow(test, cmap='tab20')
plt.colorbar()

In [None]:
wbd = gpd.read_parquet(poly_path)

In [None]:
temp = wbd.iloc[[0]]

In [None]:
if temp.crs != raster_profile['crs']:
    temp = temp.to_crs(raster_profile['crs'])

In [None]:
geoms = [shapes for shapes in temp.geometry]

In [None]:
pwd

In [None]:
with rasterio.open('../narrow_rivers_PIXC_data/clipped_raster.tif', 'w', **raster_profile) as dest:
        out_image, out_transform = mask(dataset=raster_img, shapes=geoms, crop=True)
        dest.write(out_image, 1)