In [23]:
import os
from os import path as osp 
from netCDF4 import Dataset
import netCDF4 as NC
import numpy as np
from pyproj import Transformer
from saveload import save,load
from ml_sample_generator import load_fire_detection, load_topography, load_meteorology


In [20]:
# The file paths
tif = osp.join('C:/', 'Users', 'T-Spe', 'OneDrive', 'School', "Fall '25", "Master's Project", 'test')
local = osp.join('C:/', 'Users', 'T-Spe', 'Downloads')

file_paths = {"elevation_path": osp.join(tif ,'LH20_Elev_220.tif'),
            "slope_path": osp.join(tif, 'LH20_SlpP_220.tif'),
            "aspect_path": osp.join(tif, 'LH20_Asp_220.tif'),
            "process_path": osp.join(local, 'processed_output.nc'),
            "fire_path": osp.join(local,'ml_data')
                }

In [21]:
def create_netcdf_with_row_col_and_mask(
    lon_array,
    lat_array,
    raster_crs,
    transform,
    raster_shape,
    output_file,
    debug=False
):
    """
    Create a NetCDF file with row, column indices, and a spatial mask for provided lon/lat data.
    
    Args:
        lon_array (np.ndarray): Array of longitudes in WGS84.
        lat_array (np.ndarray): Array of latitudes in WGS84.
        raster_crs (str): CRS of the raster (e.g., "EPSG:5070").
        transform (Affine): Rasterio affine transform of the raster.
        raster_shape (tuple): Shape of the raster (rows, cols).
        output_file (str): Path to the output NetCDF file.
        debug (bool): Whether to enable debug messages.
    """
    print(f"Creating NetCDF file at {output_file}...")

    # Initialize transformer
    transformer = Transformer.from_crs("EPSG:4326", raster_crs, always_xy=True)

    # Reproject lon/lat to raster CRS
    print("Transforming coordinates to raster CRS...")
    raster_lon, raster_lat = transformer.transform(lon_array, lat_array)

    # Calculate row and column indices
    inv_transform = ~transform
    cols, rows = inv_transform * (raster_lon, raster_lat)

    # Round indices and convert to integers
    rows = np.round(rows).astype(int)
    cols = np.round(cols).astype(int)

    # Create a valid mask based on raster shape
    valid_mask = (
        (rows >= 0) & (rows < raster_shape[0]) &
        (cols >= 0) & (cols < raster_shape[1])
    )
    rows_valid = rows[valid_mask]
    cols_valid = cols[valid_mask]

    if debug:
        print(f"Valid row indices: {rows_valid}")
        print(f"Valid col indices: {cols_valid}")
        print(f"Valid mask shape: {valid_mask.shape}")
        print(f"Number of valid points: {np.sum(valid_mask)}")

    # Write to NetCDF
    with Dataset(output_file, 'w', format='NETCDF4') as nc_file:
        # Define dimensions
        nc_file.createDimension('points', len(rows_valid))
        nc_file.createDimension('mask', len(valid_mask))

        # Create variables
        rows_var = nc_file.createVariable('rows', 'i4', ('points',), zlib=True)
        cols_var = nc_file.createVariable('cols', 'i4', ('points',), zlib=True)
        mask_var = nc_file.createVariable('valid_mask', 'i1', ('mask',), zlib=True)

        # Write data
        rows_var[:] = rows_valid
        cols_var[:] = cols_valid
        mask_var[:] = valid_mask.astype(int)

    print(f"NetCDF file saved: {output_file}")

In [22]:
# Load meteorology data
# meteorology = load_meteorology(file_paths)
# time_lb = meteorology['times'].min()
# time_ub = meteorology['times'].max()
# print(f"Meteorology time range: {time_lb} to {time_ub}")

# Load topography data
topography = load_topography(file_paths)

# Load and filter fire detection data
fire_detection_data = load_fire_detection(file_paths, time_lb, time_ub, confidence_threshold)

# Extract relevant data
lon_array = fire_detection_data['lon']
lat_array = fire_detection_data['lat']
#dates_fire = fire_detection_data['dates_fire']
#labels = fire_detection_data['labels']

Loading topography data...
Trying to open C:/Users\T-Spe\OneDrive\School\Fall '25\Master's Project\test\LH20_Elev_220.tif as <open DatasetReader name='C:/Users\T-Spe\OneDrive\School\Fall '25\Master's Project\test\LH20_Elev_220.tif' mode='r'>


NameError: name 'time_lb' is not defined