In [None]:
# Import packages

import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pyart
import glob
import matplotlib.patheffects as path_effects
from matplotlib.colors import BoundaryNorm, LinearSegmentedColormap
from pyproj import Transformer
import radlib
import os
import h5py
import matplotlib.dates as mdates
from scipy.ndimage import gaussian_filter1d
from cartopy.feature import NaturalEarthFeature
import xmltodict, geopandas, geojson, xml, json #xml and json do not exist
from datetime import datetime, timedelta, timezone
import geopy.distance
import numpy.matlib as npm
import copy
from scipy.signal import convolve2d
from astropy.convolution import convolve
import scipy.ndimage as ndi
import re
from skimage.draw import polygon


from pprint import pprint
from pysteps import io, motion, rcparams
from pysteps.utils import conversion, transformation
from pysteps.visualization import plot_precip_field, quiver

import json

from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm

import polars as pl

import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

import geopandas as gpd
from shapely.geometry import Point

from mpl_toolkits.mplot3d import Axes3D

from shapely.geometry import Polygon


from geopandas import GeoDataFrame
import datetime


os.environ["library_metranet_path"] = "/store_new/mch/msrad/idl/lib/radlib4/" # needed for pyradlib
os.environ["METRANETLIB_PATH"] = "/store_new/mch/msrad/idl/lib/radlib4/" # needed for pyart_mch


# Calculate Swiss grid coordinates into composite raster points
def swiss_to_grid_index(swiss_x, swiss_y, clons, clats, zh_shape):
    # Initialize transformers
    transformer_swiss_to_3035 = Transformer.from_crs(21781, 3035, always_xy=True)
    
    # Transform Swiss coordinates to EPSG:3035
    x_3035, y_3035 = transformer_swiss_to_3035.transform(swiss_x, swiss_y)
    
    # Calculate distances
    distances = np.sqrt((clons - x_3035)**2 + (clats - y_3035)**2)
    
    # Find the index of the minimum distance
    y_idx, x_idx = np.unravel_index(np.argmin(distances), distances.shape)
    
    # Create a function to get values at specific vertical levels
    def get_value_at_level(zh_array, level):
        if 0 <= level < zh_shape[2]:
            return zh_array[y_idx, x_idx, level]
        else:
            raise ValueError(f"Level must be between 0 and {zh_shape[2]-1}")
    
    return y_idx, x_idx, get_value_at_level

# Function to retrieve cross sections for all variables of the wind composite
def load_and_create_cross_sections(year, month, day, valid_time):
    # Load the .npz file
    data = np.load(f'/scratch/mch/fackerma/orders/full_composite_npz/{year}{month}{day}{valid_time}00_conv_wind_composite_data.npz')
    
    # Access the specific arrays
    ZH = data['ZH_max']
    rad_shear = data['RAD_SHEAR_LLSD_max']
    az_shear = data['AZ_SHEAR_LLSD_abs_max']
    RVEL = data['RVEL_DE_abs_max']
    KDP = data['KDP_max']
    ZDR = data['ZH_max']
    
    # Create cross-sections and 2D projections for each array
    arrays = [ZH, rad_shear, az_shear, RVEL, KDP, ZDR]
    names = ['ZH', 'rad_shear', 'az_shear', 'RVEL', 'KDP', 'ZDR']
    
    results = {}
    
    for arr, name in zip(arrays, names):
        # Create cross-sections
        results[f'{name}_x_cross'] = arr[y_idx, x_start:x_end, :]
        results[f'{name}_y_cross'] = arr[y_start:y_end, x_idx, :]
        
        # Create 2D max projection
        results[f'{name}_2d'] = np.nanmax(arr, axis=2)
    
    return results
# All results are stored in a dictionary, with keys formatted as '{name}_x_cross', '{name}_y_cross', and '{name}_2d'


# Define the Swiss grid (adjusted to match data dimensions)
chx = np.arange(255000, 255000 + 710 * 1000, 1000)  # Easting values (710 points)
chy = sorted(np.arange(-160000, -160000 + 640 * 1000, 1000), reverse=True)  # Northing values (640 points)
X, Y = np.meshgrid(chx, chy)

# Initialize transformer for Swiss grid to WGS84 (EPSG:21781 to EPSG:4326 PlateCarree)
transformer = Transformer.from_crs(21781, 4326, always_xy=True)
clons, clats = transformer.transform(X, Y)





## You are using the Python ARM Radar Toolkit (Py-ART), an open source
## library for working with weather radar data. Py-ART is partly
## supported by the U.S. Department of Energy as part of the Atmospheric
## Radiation Measurement (ARM) Climate Research Facility, an Office of
## Science user facility.
##
## If you use this software to prepare a publication, please cite:
##
##     JJ Helmus and SM Collis, JORS 2016, doi: 10.5334/jors.119

Pysteps configuration file found at: /scratch/mch/fackerma/miniforge3/envs/testenv/lib/python3.12/site-packages/pysteps/pystepsrc



In [22]:
# Import the data
import os
import pandas as pd
import numpy as np

# Define the base directory and file names
base_dir = "/scratch/mch/fackerma/orders/TRT_processing_2/"
yearly_files = [
    "TRT_2019_05-10.pkl",
    "TRT_2020_05-10.pkl",
    "TRT_2021_05-10.pkl",
    "TRT_2022_05-10.pkl",
    "TRT_2023_05-10.pkl",
]

# Load and merge dataframes
dfs = []
for file_name in yearly_files:
    file_path = os.path.join(base_dir, file_name)
    if os.path.exists(file_path):
        print(f"Loading {file_name}...")
        df = pd.read_pickle(file_path)
        dfs.append(df)
    else:
        print(f"⚠️ File not found: {file_name}")

if not dfs:
    print("No data loaded. Exiting.")
    exit()

merged_df = pd.concat(dfs, ignore_index=True)
print(f"\nMerged dataframe shape: {merged_df.shape}")



Loading TRT_2019_05-10.pkl...
Loading TRT_2020_05-10.pkl...
Loading TRT_2021_05-10.pkl...
Loading TRT_2022_05-10.pkl...
Loading TRT_2023_05-10.pkl...


  return GeometryArray(data, crs=_get_common_crs(to_concat))



Merged dataframe shape: (2240266, 91)


In [71]:
import pandas as pd
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
from pyproj import Transformer

from pyproj import Transformer
import numpy as np


# Define the Swiss grid (adjusted to match data dimensions)
chx = np.arange(255000, 255000 + 710 * 1000, 1000)  # Easting values (710 points)
chy = sorted(np.arange(-160000, -160000 + 640 * 1000, 1000), reverse=True)  # Northing values (640 points)
X, Y = np.meshgrid(chx, chy)

# Initialize transformer for Swiss grid to WGS84 (EPSG:21781 to EPSG:4326 PlateCarree)
transformer = Transformer.from_crs(21781, 4326, always_xy=True)
clons, clats = transformer.transform(X, Y)



def filter_rows_by_datetime(merged_df, datetime_str):
    # Ensure the 'timestamp' column is in datetime format
    merged_df['timestamp'] = pd.to_datetime(merged_df['timestamp'])
    
    # Convert input datetime string to a datetime object
    target_datetime = pd.to_datetime(datetime_str)
    
    # Filter rows for the exact datetime
    filtered_df = merged_df[merged_df['timestamp'] == target_datetime]
    return filtered_df

def load_npz_file(datetime_obj):
    # Extract components from datetime
    year = datetime_obj.strftime('%Y')
    month = datetime_obj.strftime('%m')
    day = datetime_obj.strftime('%d')
    valid_time = datetime_obj.strftime('%H%M')  # Format: '1535' for 15:35
    
    # Load .npz file
    file_path = f'/scratch/mch/maregger/hailclass/convective_wind/full_composite_npz/{year}{month}{day}{valid_time}00_conv_wind_composite_data.npz'
    return np.load(file_path)

# Example usage
if __name__ == "__main__":
    # Define your datetime (including time)
    datetime_str = '2019-06-15 15:40:00'
    
    # Filter rows by exact datetime
    filtered_df = filter_rows_by_datetime(merged_df, datetime_str)
    print("Filtered DataFrame:")
    print(filtered_df)
    
    # Load corresponding .npz file
    if not filtered_df.empty:
        data = load_npz_file(pd.to_datetime(datetime_str))
        ZH, rad_shear, KDP = data['ZH_max'], data['RAD_SHEAR_LLSD_max'], data['KDP_max']
        print("\nLoaded Arrays:")
        print("ZH:", ZH.shape)  # Check array dimensions instead of printing NaNs

    # Anchor the fields in the Swiss grid
    anchored_data = {
        'ZH': ZH,
        'rad_shear': rad_shear,
        'KDP': KDP,
        'easting': X,
        'northing': Y,
        'longitude': clons,
        'latitude': clats
    }



Filtered DataFrame:
                timestamp                                           geometry  \
65824 2019-06-15 15:40:00  POLYGON ((8.4515 45.9795, 8.4514 45.9705, 8.43...   
65825 2019-06-15 15:40:00  POLYGON ((7.3669 46.6047, 7.3669 46.5777, 7.35...   
65826 2019-06-15 15:40:00  POLYGON ((7.6538 46.5596, 7.6277 46.5416, 7.60...   
65827 2019-06-15 15:40:00  POLYGON ((8.356 46.4391, 8.3559 46.4301, 8.342...   
65828 2019-06-15 15:40:00  POLYGON ((8.1463 46.3237, 8.1201 46.3059, 8.11...   
65829 2019-06-15 15:40:00  POLYGON ((8.0921 46.1351, 8.0919 46.1171, 8.07...   
65830 2019-06-15 15:40:00  POLYGON ((8.1563 46.0987, 8.1562 46.0897, 8.14...   
65831 2019-06-15 15:40:00  POLYGON ((6.6971 45.9546, 6.6975 45.9187, 6.71...   
65832 2019-06-15 15:40:00  POLYGON ((5.2968 44.4882, 5.2845 44.479, 5.259...   
65833 2019-06-15 15:40:00  POLYGON ((7.947 45.839, 7.934 45.8301, 7.9212 ...   
65834 2019-06-15 15:40:00  POLYGON ((7.4965 45.8041, 7.4965 45.7862, 7.53...   
65835 2019-06-15 15:

In [41]:
from shapely.geometry import Point, Polygon

# Example: Iterate over polygons in filtered_df
for index, row in filtered_df.iterrows():
    polygon = row['geometry']  # Assuming 'geometry' contains shapely Polygon objects
    
    # Create a mask for grid points inside the polygon
    mask = np.array([
        polygon.contains(Point(lon, lat))
        for lon, lat in zip(clons.flatten(), clats.flatten())
    ]).reshape(clons.shape)  # Reshape mask to match grid dimensions
    
    # Extract values from ZH, rad_shear, KDP within the polygon
    ZH_in_polygon = ZH[mask]
    rad_shear_in_polygon = rad_shear[mask]
    KDP_in_polygon = KDP[mask]
    
    # Aggregate or process values as needed (e.g., mean, max)
    ZH_mean = np.nanmean(ZH_in_polygon)
    rad_shear_mean = np.nanmean(rad_shear_in_polygon)
    KDP_mean = np.nanmean(KDP_in_polygon)
    
    print(f"Polygon {index}: ZH_mean={ZH_mean}, rad_shear_mean={rad_shear_mean}, KDP_mean={KDP_mean}")


Polygon 65824: ZH_mean=33.42625899280576, rad_shear_mean=0.8936572488584474, KDP_mean=0.2168462150155302
Polygon 65825: ZH_mean=30.273853211009175, rad_shear_mean=1.264913480168181, KDP_mean=0.2577336938752685
Polygon 65826: ZH_mean=30.45323696565357, rad_shear_mean=1.4282164039450904, KDP_mean=0.36946686962852826
Polygon 65827: ZH_mean=32.52226093689508, rad_shear_mean=1.0485383174016945, KDP_mean=0.2083675434631018
Polygon 65828: ZH_mean=31.240995957368614, rad_shear_mean=1.0089569160997733, KDP_mean=0.2543439922548533
Polygon 65829: ZH_mean=29.94159511974695, rad_shear_mean=0.7412602045496238, KDP_mean=0.11283771152843185
Polygon 65830: ZH_mean=27.208668197474168, rad_shear_mean=0.5035538222582387, KDP_mean=0.06948033516390824
Polygon 65831: ZH_mean=24.098255569567044, rad_shear_mean=0.373651704940849, KDP_mean=0.28318756170629866
Polygon 65832: ZH_mean=nan, rad_shear_mean=nan, KDP_mean=nan
Polygon 65833: ZH_mean=28.680182599355533, rad_shear_mean=0.6422176839293054, KDP_mean=0.1436

In [47]:
import numpy as np
from shapely.geometry import Point
from scipy.ndimage import center_of_mass

def calculate_metrics(filtered_df, clons, clats, ZH, rad_shear, KDP):
    # Initialize lists to store results
    ZH_com_height = []
    ZH_percent_above_45 = []
    KDP_com_height = []
    KDP_percent_above_2 = []
    rad_shear_max = []
    rad_shear_percent_above_2 = []

    # Iterate over rows in filtered_df
    for index, row in filtered_df.iterrows():
        polygon = row['geometry']  # Shapely Polygon object
        
        # Create a mask for grid points inside the polygon (2D mask)
        mask_2d = np.array([
            polygon.contains(Point(lon, lat))
            for lon, lat in zip(clons.flatten(), clats.flatten())
        ]).reshape(clons.shape)  # Reshape mask to match grid dimensions
        
        # Extend the mask to 3D by repeating along the vertical dimension
        mask_3d = np.repeat(mask_2d[:, :, np.newaxis], ZH.shape[2], axis=2)
        
        # Extract values within the polygon for ZH, KDP, rad_shear
        ZH_in_polygon = ZH[mask_3d]
        KDP_in_polygon = KDP[mask_3d]
        rad_shear_in_polygon = rad_shear[mask_3d]
        
        # Calculate metrics for ZH
        if np.any(ZH_in_polygon > 0):  # Avoid division by zero
            com_ZH = center_of_mass(ZH * mask_3d)  # Center of mass height
            ZH_com_height.append(com_ZH[2])  # Use the vertical dimension (z-axis)
            ZH_percent_above_45.append(np.sum(ZH_in_polygon > 45) / np.size(ZH_in_polygon) * 100)
        else:
            ZH_com_height.append(np.nan)
            ZH_percent_above_45.append(0)
        
        # Calculate metrics for KDP
        if np.any(KDP_in_polygon > 0):  # Avoid division by zero
            com_KDP = center_of_mass(KDP * mask_3d)  # Center of mass height
            KDP_com_height.append(com_KDP[2])  # Use the vertical dimension (z-axis)
            KDP_percent_above_2.append(np.sum(KDP_in_polygon > 2) / np.size(KDP_in_polygon) * 100)
        else:
            KDP_com_height.append(np.nan)
            KDP_percent_above_2.append(0)
        
        # Calculate metrics for rad_shear
        if np.any(rad_shear_in_polygon > 0):  # Avoid division by zero
            rad_shear_max.append(np.nanmax(rad_shear_in_polygon))
            rad_shear_percent_above_2.append(np.sum(rad_shear_in_polygon > 2) / np.size(rad_shear_in_polygon) * 100)
        else:
            rad_shear_max.append(np.nan)
            rad_shear_percent_above_2.append(0)

    # Add results as new columns in filtered_df
    filtered_df['ZH_com_height'] = ZH_com_height
    filtered_df['ZH_percent_above_45'] = ZH_percent_above_45
    filtered_df['KDP_com_height'] = KDP_com_height
    filtered_df['KDP_percent_above_2'] = KDP_percent_above_2
    filtered_df['rad_shear_max'] = rad_shear_max
    filtered_df['rad_shear_percent_above_2'] = rad_shear_percent_above_2

    return filtered_df

# Example usage:
# Assuming clons, clats, ZH, rad_shear, KDP are already defined and filtered_df contains 'geometry' column.
filtered_df = calculate_metrics(filtered_df, clons, clats, ZH, rad_shear, KDP)

# Display updated DataFrame with new columns
print(filtered_df.head())


                timestamp                                           geometry  \
65824 2019-06-15 15:40:00  POLYGON ((8.4515 45.9795, 8.4514 45.9705, 8.43...   
65825 2019-06-15 15:40:00  POLYGON ((7.3669 46.6047, 7.3669 46.5777, 7.35...   
65826 2019-06-15 15:40:00  POLYGON ((7.6538 46.5596, 7.6277 46.5416, 7.60...   
65827 2019-06-15 15:40:00  POLYGON ((8.356 46.4391, 8.3559 46.4301, 8.342...   
65828 2019-06-15 15:40:00  POLYGON ((8.1463 46.3237, 8.1201 46.3059, 8.11...   

      CS Marker STA Marker ESWD Marker Gust_Flag             traj_ID  \
65824         0          0           0         -  2019061515350113.0   
65825         0          0           0         -  2019061514550083.0   
65826         0          0           0         -  2019061515300099.0   
65827         0          0           0         -  2019061515350107.0   
65828         0          0           0         -  2019061515200086.0   

               time     lon      lat  ... nrPOHthr070 nrPOHthr080 nrPOHthr090  \
65824

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ZH_com_height'] = ZH_com_height
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ZH_percent_above_45'] = ZH_percent_above_45
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['KDP_com_height'] = KDP_com_height
A value is trying to be set on a copy of a slice from a 

In [None]:
import numpy as np
from shapely.geometry import Point
from scipy.ndimage import center_of_mass

def calculate_metrics(filtered_df, clons, clats, ZH, rad_shear, KDP):
    # Initialize lists to store results
    ZH_com_height = []
    ZH_percent_above_45 = []
    KDP_com_height = []
    KDP_percent_above_2 = []
    rad_shear_max = []
    rad_shear_percent_above_2 = []

    # Iterate over rows in filtered_df
    for index, row in filtered_df.iterrows():
        polygon = row['geometry']  # Shapely Polygon object
        
        # Create a mask for grid points inside the polygon (2D mask)
        mask_2d = np.array([
            polygon.contains(Point(lon, lat))
            for lon, lat in zip(clons.flatten(), clats.flatten())
        ]).reshape(clons.shape)  # Reshape mask to match grid dimensions
        
        # Extend the mask to 3D by repeating along the vertical dimension
        mask_3d = np.repeat(mask_2d[:, :, np.newaxis], ZH.shape[2], axis=2)
        
        # Apply the mask directly to retain 3D structure
        ZH_masked = np.where(mask_3d, ZH, np.nan)  # Replace values outside the polygon with NaN
        KDP_masked = np.where(mask_3d, KDP, np.nan)  # Replace values outside the polygon with NaN
        rad_shear_masked = np.where(mask_3d, rad_shear, np.nan)  # Replace values outside the polygon with NaN
        
        # Replace NaN values with 0 for calculations
        ZH_masked[np.isnan(ZH_masked)] = 0
        KDP_masked[np.isnan(KDP_masked)] = 0
        rad_shear_masked[np.isnan(rad_shear_masked)] = 0
        
        # Calculate metrics for ZH
        if np.any(ZH_masked > 0):  # Check if there are valid values
            com_ZH = center_of_mass(ZH_masked)  # Center of mass height
            ZH_com_height.append(com_ZH[2])  # Use the vertical dimension (z-axis)
            ZH_percent_above_45.append(np.sum(ZH_masked > 45) / np.size(ZH_masked) * 100)
        else:
            ZH_com_height.append(np.nan)
            ZH_percent_above_45.append(0)
        
        # Calculate metrics for KDP
        if np.any(KDP_masked > 0):  # Check if there are valid values
            com_KDP = center_of_mass(KDP_masked)  # Center of mass height
            KDP_com_height.append(com_KDP[2])  # Use the vertical dimension (z-axis)
            KDP_percent_above_2.append(np.sum(KDP_masked > 2) / np.size(KDP_masked) * 100)
        else:
            KDP_com_height.append(np.nan)
            KDP_percent_above_2.append(0)
        
        # Calculate metrics for rad_shear
        if np.any(rad_shear_masked > 0):  # Check if there are valid values
            rad_shear_max.append(np.nanmax(rad_shear_masked))
            rad_shear_percent_above_2.append(np.sum(rad_shear_masked > 2) / np.size(rad_shear_masked) * 100)
        else:
            rad_shear_max.append(np.nan)
            rad_shear_percent_above_2.append(0)

    # Add results as new columns in filtered_df
    filtered_df['ZH_com_height'] = ZH_com_height
    filtered_df['ZH_percent_above_45'] = ZH_percent_above_45
    filtered_df['KDP_com_height'] = KDP_com_height
    filtered_df['KDP_percent_above_2'] = KDP_percent_above_2
    filtered_df['rad_shear_max'] = rad_shear_max
    filtered_df['rad_shear_percent_above_2'] = rad_shear_percent_above_2

    return filtered_df

# Example usage:
# Assuming clons, clats, ZH, rad_shear, KDP are already defined and filtered_df contains 'geometry' column.
filtered_df = calculate_metrics(filtered_df, clons, clats, ZH, rad_shear, KDP)

# Display updated DataFrame with new columns
print(filtered_df.head())


In [64]:
import numpy as np
from shapely.geometry import Point

def process_one_row(row, clons, clats, ZH, KDP):
    polygon = row['geometry']  # Shapely Polygon object
    
    # Step 1: Create a 2D mask for points inside the polygon
    mask_2d = np.array([
        polygon.contains(Point(lon, lat))
        for lon, lat in zip(clons.flatten(), clats.flatten())
    ]).reshape(clons.shape)  # Reshape mask to match grid dimensions
    
    print("2D Mask Shape:", mask_2d.shape)
    print("Number of points inside polygon:", np.sum(mask_2d))
    
    # Step 2: Extend the 2D mask to 3D
    mask_3d = np.repeat(mask_2d[:, :, np.newaxis], ZH.shape[2], axis=2)
    print("3D Mask Shape:", mask_3d.shape)
    
    # Step 3: Apply the mask directly to retain 3D structure
    ZH_masked = np.where(mask_3d, ZH, np.nan)  # Retain original shape with NaN outside the polygon
    KDP_masked = np.where(mask_3d, KDP, np.nan)  # Retain original shape with NaN outside the polygon
    
    print("ZH_masked shape:", ZH_masked.shape)
    print("KDP_masked shape:", KDP_masked.shape)
    
    # Step 4: Calculate center of mass for ZH (only within valid values)
    if np.any(~np.isnan(ZH_masked)):  # Check if there are any valid values
        total_mass_ZH = np.nansum(ZH_masked)
        if total_mass_ZH > 0:
            z_indices = np.arange(ZH.shape[2])  # Vertical indices (z-axis)
            com_ZH_height = np.nansum(np.nansum(ZH_masked, axis=(0, 1)) * z_indices) / total_mass_ZH
        else:
            com_ZH_height = None
    else:
        com_ZH_height = None
    
    print("ZH_com_height (manual):", com_ZH_height)
    
    # Step 5: Calculate center of mass for KDP (only within valid values)
    if np.any(~np.isnan(KDP_masked)):  # Check if there are any valid values
        total_mass_KDP = np.nansum(KDP_masked)
        if total_mass_KDP > 0:
            z_indices = np.arange(KDP.shape[2])  # Vertical indices (z-axis)
            com_KDP_height = np.nansum(np.nansum(KDP_masked, axis=(0, 1)) * z_indices) / total_mass_KDP
        else:
            com_KDP_height = None
    else:
        com_KDP_height = None
    
    print("KDP_com_height (manual):", com_KDP_height)

# Example usage:
# Assuming filtered_df contains 'geometry', clons, clats, ZH, and KDP are already defined.
row = filtered_df.iloc[0]  # Select one row for debugging
process_one_row(row, clons, clats, ZH, KDP)


2D Mask Shape: (640, 710)
Number of points inside polygon: 65
3D Mask Shape: (640, 710, 93)
ZH_masked shape: (640, 710, 93)
KDP_masked shape: (640, 710, 93)
ZH_com_height (manual): 18.861113032783734
KDP_com_height (manual): 10.932824382256758


In [70]:
import numpy as np
from shapely.geometry import Point
from scipy.ndimage import center_of_mass

def process_one_row(row, clons, clats, ZH, KDP):
    polygon = row['geometry']  # Shapely Polygon object
    
    # Step 1: Create a 2D mask for points inside the polygon
    mask_2d = np.array([
        polygon.contains(Point(lon, lat))
        for lon, lat in zip(clons.flatten(), clats.flatten())
    ]).reshape(clons.shape)  # Reshape mask to match grid dimensions
    
    print("2D Mask Shape:", mask_2d.shape)
    print("Number of points inside polygon:", np.sum(mask_2d))
    
    # Step 2: Extend the 2D mask to 3D
    mask_3d = np.repeat(mask_2d[:, :, np.newaxis], ZH.shape[2], axis=2)
    print("3D Mask Shape:", mask_3d.shape)
    
    # Step 3: Apply the mask directly to retain 3D structure
    ZH_masked = np.where(mask_3d, ZH, np.nan)  # Replace values outside the polygon with NaN
    KDP_masked = np.where(mask_3d, KDP, np.nan)  # Replace values outside the polygon with NaN
    
    print("ZH_masked shape:", ZH_masked.shape)
    print("KDP_masked shape:", KDP_masked.shape)
    
    # Debugging: Count NaN and non-NaN values in ZH_masked and KDP_masked
    nan_count_ZH = np.count_nonzero(np.isnan(ZH_masked))
    non_nan_count_ZH = np.count_nonzero(~np.isnan(ZH_masked))
    
    nan_count_KDP = np.count_nonzero(np.isnan(KDP_masked))
    non_nan_count_KDP = np.count_nonzero(~np.isnan(KDP_masked))
    
    print(f"ZH_masked - NaN count: {nan_count_ZH}, Non-NaN count: {non_nan_count_ZH}")
    print(f"KDP_masked - NaN count: {nan_count_KDP}, Non-NaN count: {non_nan_count_KDP}")
    
    # Step 4: Replace NaN values with 0
    ZH_masked[np.isnan(ZH_masked)] = 0
    KDP_masked[np.isnan(KDP_masked)] = 0
    
    print("After replacing NaNs:")
    nan_count_ZH_after = np.count_nonzero(np.isnan(ZH_masked))
    non_nan_count_ZH_after = np.count_nonzero(~np.isnan(ZH_masked))
    
    nan_count_KDP_after = np.count_nonzero(np.isnan(KDP_masked))
    non_nan_count_KDP_after = np.count_nonzero(~np.isnan(KDP_masked))
    
    print(f"ZH_masked - NaN count after: {nan_count_ZH_after}, Non-NaN count after: {non_nan_count_ZH_after}")
    print(f"KDP_masked - NaN count after: {nan_count_KDP_after}, Non-NaN count after: {non_nan_count_KDP_after}")
    
    # Step 5: Calculate center of mass using scipy.ndimage.center_of_mass
    try:
        com_ZH = center_of_mass(ZH_masked)
        ZH_com_height = com_ZH[2] if not np.isnan(com_ZH[2]) else None
    except Exception as e:
        print("Error calculating ZH center of mass:", e)
        ZH_com_height = None
    
    print("ZH_com_height (center_of_mass):", ZH_com_height)
    
    try:
        com_KDP = center_of_mass(KDP_masked)
        KDP_com_height = com_KDP[2] if not np.isnan(com_KDP[2]) else None
    except Exception as e:
        print("Error calculating KDP center of mass:", e)
        KDP_com_height = None
    
    print("KDP_com_height (center_of_mass):", KDP_com_height)

# Example usage:
# Assuming filtered_df contains 'geometry', clons, clats, ZH, and KDP are already defined.
row = filtered_df.iloc[0]  # Select one row for debugging
process_one_row(row, clons, clats, ZH, KDP)


2D Mask Shape: (640, 710)
Number of points inside polygon: 65
3D Mask Shape: (640, 710, 93)
ZH_masked shape: (640, 710, 93)
KDP_masked shape: (640, 710, 93)
ZH_masked - NaN count: 42257254, Non-NaN count: 1946
KDP_masked - NaN count: 42257052, Non-NaN count: 2148
After replacing NaNs:
ZH_masked - NaN count after: 0, Non-NaN count after: 42259200
KDP_masked - NaN count after: 0, Non-NaN count after: 42259200
ZH_com_height (center_of_mass): 18.861113032783734
KDP_com_height (center_of_mass): 10.93282438225676
