In [1]:
# Load packages
import os
import zipfile
import shutil
import glob
import netCDF4 as nc
from netCDF4 import Dataset
import rasterio
from rasterio.transform import from_origin
import numpy as np
from osgeo import gdal, osr
import xarray as xr
import dask
import rioxarray as rio
import matplotlib
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geopandas as gpd
import pandas as pd
import pyproj
import shapefile
from shapely.geometry import shape
import rasterio.mask
import fiona


In [5]:
# 1 Unzip and extract all .ZIP files in base directory & then delete original .ZIP files

# Set the base directory
base_directory = "/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data"

# Define function
def unzip_and_delete(directory):
    """Unzips all zip files in a directory and then deletes the original zip files."""
    for filename in os.listdir(directory):
        if filename.endswith(".ZIP"):
            file_path = os.path.join(directory, filename)
            try:
                with zipfile.ZipFile(file_path, 'r') as zip_ref:
                    zip_ref.extractall(directory)
                os.remove(file_path)
                print(f"Unzipped and deleted: {filename}")
            except zipfile.BadZipFile:
                print(f"Skipping invalid zip file: {filename}")
            except Exception as e:
                print(f"An error occurred processing {filename}: {e}")

# Execute step
unzip_and_delete(base_directory)

# Loop through all items in the base directory
for item in os.listdir(base_directory):
    item_path = os.path.join(base_directory, item)

    # Process only directories that end with ".SEN3" (case-insensitive)
    if os.path.isdir(item_path) and item.upper().endswith('.SEN3'):
        new_folder_name = item[16:31] + "_use" # Designate which filename characters to use for rename
        new_folder_path = os.path.join(base_directory, new_folder_name)
        
        # Check to ensure the new folder name doesn't already exist
        if not os.path.exists(new_folder_path):
            os.rename(item_path, new_folder_path)
            print(f"Renamed folder: {item} -> {new_folder_name}")

            # Update base_directory to the newly renamed folder
            base_directory = new_folder_path  # **Update base_directory globally**
            print(f"New working directory: {base_directory}")

files_to_keep = [
    "cloud.nc", "common_flags.nc", "cqsf.nc", "geo_coordinates.nc",
    "iop_nn.nc", "par.nc", "tie_geo_coordinates.nc", "tie_geometries.nc",
    "time_coordinates.nc", "tsm_nn.nc", "wqsf.nc"
]

# **Only process the updated base_directory (not all folders in parent directory)**
if os.path.isdir(base_directory) and base_directory.endswith("_use"):
    print(f"Processing folder for cleanup: {base_directory}")

    for file in os.listdir(base_directory):
        file_path = os.path.join(base_directory, file)

        if os.path.isfile(file_path) and file not in files_to_keep:
            os.remove(file_path)
            print(f"Deleted: {file_path}")
        else:
            print(f"Kept: {file_path}")


# Explicitly change to the new working directory before Step 4
os.chdir(base_directory)
print(f"New working directory set for Step 4: {os.getcwd()}")

print("Cleanup process complete.")

Unzipped and deleted: EN1_MDSI_MER_FRS_2P_20110731T182333_20110731T182636_049251_0099_20180103T203937_0100.ZIP
Renamed folder: ENV_ME_2_FRG____20110731T182333_20110731T182636_________________0183_105_099______DSI_R_NT____.SEN3 -> 20110731T182333_use
New working directory: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use
Processing folder for cleanup: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use
Deleted: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/M03_rho_w.nc
Deleted: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/M02_rho_w.nc
Deleted: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/mgvi.nc
Deleted: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/M10_rho_TOA.nc
Deleted: /Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_

In [6]:
# Add lat/lon as spatial dimensions to TSM netcdf from geo_coordinates netcdf

# A) Open the NetCDF files

# NetCDF with data but missing lat/lon dimensions
tsm_nc = os.path.join(base_directory, "tsm_nn.nc")
# NetCDF containing lat/lon dimensions
geocoord_nc = os.path.join(base_directory, "geo_coordinates.nc")

if not os.path.exists(tsm_nc) or not os.path.exists(geocoord_nc):
    raise FileNotFoundError("One or both NetCDF files are missing in the directory.")

ds_tsm = xr.open_dataset(tsm_nc)
ds_geocoord = xr.open_dataset(geocoord_nc)

# B) Extract latitude & longitude
lat = ds_geocoord["latitude"].values  # Extract lat as NumPy array
lon = ds_geocoord["longitude"].values  # Extract lon as NumPy array

# C) Ensure lat/lon are correctly formatted
if lat.ndim == 2:  # If 2D, extract unique values along the correct axis
    lat = lat[:, 0]  # Take the first column (assuming lat is constant across rows)
if lon.ndim == 2:
    lon = lon[0, :]  # Take the first row (assuming lon is constant across columns)

print(f"Latitude shape after fix: {lat.shape}")  # Should be (4289,)
print(f"Longitude shape after fix: {lon.shape}")  # Should be (4481,)

# D) Extract Data & Ensure Correct Shape
var_name = list(ds_tsm.data_vars.keys())[0]  # Get first variable name
data_values = ds_tsm[var_name].values  # Extract data

# If data is 3D (e.g., time, lat, lon), select the first time step
if data_values.ndim == 3:
    data_values = data_values[0, :, :]

# Ensure the data shape matches (lat, lon)
if data_values.shape != (len(lat), len(lon)):
    raise ValueError(
        f"Mismatch: data shape {data_values.shape} vs expected ({len(lat)}, {len(lon)})"
    )

print(f"Final Data shape: {data_values.shape}")  # Should match (4289, 4481)

# E) Create a new NetCDF dataset with correct dimensions
ds_new = xr.Dataset(
    {
        var_name: (["latitude", "longitude"], data_values)
    },
    coords={
        "latitude": ("latitude", lat),  # Assign dimensions explicitly
        "longitude": ("longitude", lon)
    }
)

# F) Save the updated dataset
tsm_nc_spdm = os.path.join(base_directory, "tsm_nn_spdm.nc")
ds_new.to_netcdf(tsm_nc_spdm)

Latitude shape after fix: (4161,)
Longitude shape after fix: (4481,)
Final Data shape: (4161, 4481)


In [None]:
# Confirm lat/lon were added as spatial dimensions to netCDF 
tsm_nc_spdm_view = xr.open_dataset("/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20120407T182134_use/tsm_nn_spdm.nc")
tsm_nc_spdm_view

In [None]:
# Show min and max variable values in netCDF

def find_min_max(file_path, variable_name):
    """
    Finds the minimum and maximum values of a specified variable within a NetCDF file.

    Args:
        file_path (str): The path to the NetCDF file.
        variable_name (str): The name of the variable to analyze.

    Returns:
        tuple: A tuple containing the minimum and maximum values.
    """
    try:
        with Dataset(file_path, 'r') as nc_file:
            if variable_name not in nc_file.variables:
                raise ValueError(f"Variable '{variable_name}' not found in the file.")
            
            variable_data = nc_file.variables[variable_name][:]
            
            if np.size(variable_data) == 0:
                 raise ValueError(f"Variable '{variable_name}' has no data.")

            min_value = np.nanmin(variable_data)
            max_value = np.nanmax(variable_data)
            
            if np.isnan(min_value) or np.isnan(max_value):
                raise ValueError(f"Could not determine min/max values for '{variable_name}'. Ensure the data does not exclusively contain NaN values.")

            return min_value, max_value

    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {file_path}")
    except Exception as e:
         raise Exception(f"An error occurred: {e}")

if __name__ == '__main__':
    file_path = '/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20120407T182134_use/tsm_nn_spdm.nc'  # Replace with the actual path to your NetCDF file
    variable_name = 'TSM_NN'  # Replace with the name of the variable you want to analyze

    try:
        min_val, max_val = find_min_max(file_path, variable_name)
        print(f"Minimum value of '{variable_name}': {min_val}")
        print(f"Maximum value of '{variable_name}': {max_val}")
    except Exception as e:
        print(f"Error: {e}")

In [None]:
# Show values of first 20 pixels in netCDF

def print_netcdf_to_dataframe(file_path, variable_name):
    """
    Prints data from a NetCDF file to a Pandas DataFrame for the first 10 pixels.

    Args:
        file_path (str): Path to the NetCDF file.
        variable_name (str): Name of the variable to extract.
    """
    try:
        with nc.Dataset(file_path, 'r') as nc_file:
            variable_data = nc_file.variables[variable_name][:]

            if variable_data.ndim < 2:
                print("Variable must have at least 2 dimensions (e.g., latitude, longitude).")
                return

            num_pixels = min(20, variable_data.shape[-1])
            data_values = variable_data[..., :num_pixels]

            df = pd.DataFrame(data=data_values.reshape(-1, num_pixels))
            print(df)

    except FileNotFoundError:
        print(f"Error: File not found: {file_path}")
    except KeyError:
        print(f"Error: Variable '{variable_name}' not found in the file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


# Path to netcdf you want to look at
file_path = '/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20120407T182134_use/tsm_nn_spdm.nc'

# Specify the variable you want to look at
variable_name = 'TSM_NN'

# Display the values
print_netcdf_to_dataframe(file_path, variable_name)

In [None]:
# Clip TSM netCDF using lat/lon ROI shapefile

def crop_netcdf_with_shapefile(nc_file, shapefile, output_nc, data_var="TSM_NN"):
    """
    Crops a NetCDF file using a shapefile and logs a message if no data is found.

    :param nc_file: Path to the input NetCDF file.
    :param shapefile: Path to the shapefile defining the crop region.
    :param output_nc: Path to save the cropped NetCDF file.
    :param data_var: The variable to extract from the NetCDF.
    """
    # Load NetCDF
    ds = xr.open_dataset(nc_file)

    # Ensure the dataset is georeferenced
    if "spatial_ref" not in ds:
        ds = ds.rio.write_crs("EPSG:4326")  # Set default CRS if missing

    # Load the shapefile
    gdf = gpd.read_file(shapefile)

    # Convert to dataset CRS if different
    if gdf.crs is not None and gdf.crs != ds.rio.crs:
        gdf = gdf.to_crs(ds.rio.crs)

    # Get geometry from the shapefile
    geom = [mapping(geometry) for geometry in gdf.geometry]

    # Define log file path
    log_file = os.path.join(os.path.dirname(output_nc), "log.txt")

    try:
        # Attempt to clip the NetCDF using the shapefile geometry
        clipped_ds = ds.rio.clip(geom, gdf.crs, drop=True)

        # Check if there is data inside the clipped region
        if data_var in clipped_ds and clipped_ds[data_var].count() == 0:
            raise NoDataInBounds(f"No data found in bounds. Data variable: {data_var}")

        # Save the cropped NetCDF
        clipped_ds.to_netcdf(output_nc)
        print(f"✅ Cropped NetCDF saved to: {output_nc}")

    except NoDataInBounds as e:
        # Write log file if NoDataInBounds error occurs
        with open(log_file, "w") as log:
            log.write(f"NoDataInBounds: No data found in ROI bounds. Data variable: {data_var}.\n")
        print(f"⚠️ No data found in ROI bounds. Log saved: {log_file}")

# Example Usage
tsm_nc_spdm = "/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/tsm_nn_spdm.nc"
shp_ll = "/Users/lopezama/Documents/Blackwood/MERIS/ROI/west_us_poly_ll/west_us_poly_ll.shp"
tsm_nc_spdm_clip = "/Users/lopezama/Documents/Blackwood/MERIS/scripts/earth_data/test_data/20110731T182333_use/tsm_nn_spdm_clip.nc"

crop_netcdf_with_shapefile(tsm_nc_spdm, shp_ll, tsm_nc_spdm_clip)