## Reload library

In [1]:
import importlib
import geoai.label_utils
import geoai.utils

import sys
sys.path.append("../")

importlib.reload(geoai)
importlib.reload(geoai.label_utils)
importlib.reload(geoai.utils)

<module 'geoai.utils' from '/Users/dikaizm/Documents/PROGRAMMING/ml-ai/research-crop-mapping-thesis/geoai/geoai/utils.py'>

## Assign nodata to S2

In [1]:
import rasterio
import numpy as np

in_path = "/Users/dikaizm/Library/CloudStorage/GoogleDrive-dikamaah@gmail.com/My Drive/S2_Annual_15d_sm_stockton/S2H_2022_2022_05_16.tif"
out_path = "../data/raw/images/S2H_2022_2022_05_16_nodata.tif"

with rasterio.open(in_path) as src:
    profile = src.profile
    profile.update(
        dtype="float64",  
        nodata=-9999.0
    )
    data = src.read().astype(np.float64)
    
    # Replace BOTH invalid values AND NaNs with nodata
    mask = (data < 0) | np.isnan(data) | np.isinf(data)
    fixed = np.where(mask, -9999.0, data)
    
    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(fixed)

print("Saved:", out_path)

# Verify the output
with rasterio.open(out_path) as src:
    data = src.read(masked=True)  # This respects the nodata value
    print(f"\nStatistics (excluding nodata={src.nodata}):")
    print(f"Min: {data.min()}")
    print(f"Max: {data.max()}")
    print(f"Mean: {data.mean()}")
    print(f"Valid pixels: {data.count()}")
    print(f"Nodata pixels: {data.mask.sum()}")

Saved: ../data/raw/images/S2H_2022_2022_05_16_nodata.tif

Statistics (excluding nodata=-9999.0):
Min: 0.0
Max: 18440.0
Mean: 1841.4221456917764
Valid pixels: 390103569
Nodata pixels: 1637631


In [None]:
s2_path = "../data/raw/images/S2H_2022_2022_05_16_nodata.tif"

geoai.get_raster_info(s2_path)

{'driver': 'GTiff',
 'width': 7400,
 'height': 5882,
 'count': 9,
 'dtype': 'float64',
 'crs': 'EPSG:4326',
 'transform': Affine(8.983152841195215e-05, 0.0, -121.69063928936463,
        0.0, -8.983152841195215e-05, 38.24316810706468),
 'bounds': BoundingBox(left=-121.69063928936463, bottom=37.71477905694558, right=-121.02588597911618, top=38.24316810706468),
 'resolution': (8.983152841195215e-05, 8.983152841195215e-05),
 'nodata': -9999.0,
 'band_stats': [{'band': 1,
   'min': 0.0,
   'max': 18440.0,
   'mean': 736.397507075871,
   'std': 399.5078052658123},
  {'band': 2,
   'min': 0.0,
   'max': 17176.0,
   'mean': 999.1946307750904,
   'std': 434.1955835654333},
  {'band': 3,
   'min': 28.0,
   'max': 16408.0,
   'mean': 1160.9328011377409,
   'std': 612.636560165441},
  {'band': 4,
   'min': 0.0,
   'max': 16016.0,
   'mean': 1501.952374274484,
   'std': 562.0497565206477},
  {'band': 5,
   'min': 0.0,
   'max': 15845.0,
   'mean': 2257.6599684954435,
   'std': 692.8072474568361},
 

In [4]:
cdl_path = "../data/raw/cdl/stockton/2022_30m_cdls.tif"

geoai.get_raster_info(cdl_path)

{'driver': 'GTiff',
 'width': 3091,
 'height': 2418,
 'count': 1,
 'dtype': 'uint8',
 'crs': 'EPSG:4326',
 'transform': Affine(0.00032711129828534287, 0.0, -121.866409315,
        0.0, -0.00032711129818031263, 38.374180603),
 'bounds': BoundingBox(left=-121.866409315, bottom=37.583225484, right=-120.855308292, top=38.374180603),
 'resolution': (0.00032711129828534287, 0.00032711129818031263),
 'nodata': None,
 'band_stats': [{'band': 1,
   'min': 0.0,
   'max': 242.0,
   'mean': 63.62810852714423,
   'std': 65.9296354745592}]}

## Filter CDL Label

In [9]:
import sys
sys.path.append("../")

from utils import label

label.label_filtering(
    in_path="../data/raw/cdl/sacramento_2/2023_30m_cdls.tif",
    out_path="../data/raw/cdl/sacramento_2/2023_30m_cdls_7c.tif",
    keep_classes=[1, 3, 24, 54, 69, 75, 76]
)

✅ Saved filtered raster: ../data/raw/cdl/sacramento_2/2023_30m_cdls_7c.tif
