In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import os

#  Validate HW definition (daily_grid_hw.nc)

In [2]:
ds_daily_hw = xr.open_dataset('/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/research_results/summary/daily_grid_hw.nc')

In [3]:
ds_daily_hw.info()

In [4]:
location_ID_ds = xr.open_dataset('/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/research_results/summary/location_IDs.nc')

In [5]:
location_ID_ds.info()

In [6]:
# Find the lat and lon indices where the location_ID matches the given ID
location_mask = location_ID_ds.location_ID == 35793
if location_mask.sum() == 0:
    print("Location ID not found.")

In [7]:
# Extract the first instance of matching lat and lon coordinates (assuming unique ID)
location = location_mask.where(location_mask, drop=True)
if len(location.lat) == 0 or len(location.lon) == 0:
    print("Location ID not found or multiple entries exist.")

# Use the extracted coordinates to select data
lat_coord = location.lat.values[0]
lon_coord = location.lon.values[0]

# Extract the HW timeseries data for the identified lat and lon
hw_timeseries = ds_daily_hw.HW.sel(lat=lat_coord, lon=lon_coord, method="nearest")

In [8]:
# Filter out null (NaN) values
hw_timeseries = hw_timeseries.dropna(dim='time')
print(hw_timeseries)

In [9]:
   # Print only the non-null dates
print("Dates with non-null HW values:")
for date in hw_timeseries.time.values:
    print(date)

#  Validate HW data in Zarr files

In [82]:
ds_zarr = xr.open_zarr('/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/research_results/zarr/HW', chunks='auto')

In [70]:
hw_timeseries_zarr = ds_zarr.HW.sel(lat=lat_coord, lon=lon_coord, method="nearest")
# Filter out null (NaN) values
hw_timeseries_zarr = hw_timeseries_zarr.dropna(dim='time')
print(hw_timeseries_zarr)
# Print only the non-null dates
print("Dates with non-null HW values:")
for date in hw_timeseries_zarr.time.values:
    print(date)

In [76]:
ds_month = ds_zarr.sel(time='1987-06')[['TSA_U', 'TSA_R']].compute()  # Select all days for given month and year
print(ds_month)

In [72]:

df_month = ds_month.to_dataframe(['time', 'lat', 'lon'])  # Convert to DataFrame and drop NA values

In [73]:
df_month.dropna()

In [51]:
df_month = df_month.reset_index()
df_month

In [42]:

df_month[(df_month['lat'] == lat_coord) & (df_month['lon'] == lon_coord)]

In [83]:
import xarray as xr
import pandas as pd

# Load the dataset
ds_zarr = xr.open_zarr('/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/research_results/zarr/HW', chunks='auto')

# Select HW and TSA_U at the nearest coordinates
hw_timeseries_zarr = ds_zarr.HW.sel(lat=lat_coord, lon=lon_coord, method="nearest")
tsa_u_timeseries_zarr = ds_zarr.TSA_U.sel(lat=lat_coord, lon=lon_coord, method="nearest")

# Filter data for May 1987
start_date = '1985-06-01'
end_date = '1987-06-30'
hw_timeseries_may87 = hw_timeseries_zarr.sel(time=slice(start_date, end_date))
tsa_u_timeseries_may87 = tsa_u_timeseries_zarr.sel(time=slice(start_date, end_date))

# Drop NaN values for both HW and TSA_U timeseries for May 1987
hw_valid_times_may87 = hw_timeseries_may87.dropna(dim='time').time.values
tsa_u_valid_times_may87 = tsa_u_timeseries_may87.dropna(dim='time').time.values

# Finding intersection of times with valid HW and TSA_U values for May 1987
valid_times_may87 = np.intersect1d(hw_valid_times_may87, tsa_u_valid_times_may87)

print("Dates and values for non-null HW and TSA_U in May 1987:")
for time in valid_times_may87:
    hw_value = hw_timeseries_zarr.sel(time=time).values
    tsa_u_value = tsa_u_timeseries_zarr.sel(time=time).values
    print(f"Date: {time}, HW: {hw_value}, TSA_U: {tsa_u_value}")


In [79]:
ds_zarr.TSA_U.isel(time=0).plot()

#  Validate HW data in Parquet files

In [27]:
df_parquet = pd.read_parquet('/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/research_results/parquet/ALL_HW_1985.parquet', columns=['lat', 'lon', 'time', 'HW'])

In [22]:
ds_parquet = xr.Dataset.from_dataframe(df_parquet)

In [23]:
hw_timeseries_parquet = ds_parquet.HW.sel(lat=lat_coord, lon=lon_coord, method="nearest")
hw_timeseries_parquet = hw_timeseries_parquet.dropna(dim='time')
print(hw_timeseries_parquet)
# Print only the non-null dates
print("Dates with non-null HW values:")
for date in hw_timeseries_parquet.time.values:
    print(date)

In [28]:
df_parquet =df_parquet.reset_index()
df_parquet

In [29]:

filtered_df = df_parquet[(df_parquet['lat'] == lat_coord) & (df_parquet['lon'] == lon_coord)]
filtered_df