In [1]:
#IMPORTING LIBRARIES
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr
import os; import time; from datetime import timedelta
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
import pickle
import h5py
from tqdm import tqdm

In [2]:
#MAIN DIRECTORIES
mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
scratchDirectory='/home/air673/koa_scratch/'
codeDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Variable_Calculation/TimeSplitModelVariables'

In [3]:
#OUTPUT DIRECTORIES
outputDirectory=os.path.join(codeDirectory,'OUTPUT')
os.makedirs(outputDirectory, exist_ok=True)

#Data Output Directories
def MakeDataDirectories(outputDirectory,res,t_res,Nz_str):
    outputDataDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ModelData')
    outputParcelDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ParcelData')
    os.makedirs(outputDataDirectory, exist_ok=True)
    os.makedirs(outputParcelDirectory, exist_ok=True)

    return outputDataDirectory, outputParcelDirectory

In [4]:
########################################
#FUNCTIONS
########################################

In [5]:
#LOADING DATA
def GetDataDirectories(simulationNumber):
    if simulationNumber == 1:
        Directory=os.path.join(mainDirectory,'Model/cm1r20.3/run')
        res='1km'; t_res='5min'; Np_str='1e6'; Nz_str='34'
    elif simulationNumber == 2:
        Directory=scratchDirectory
        res='1km'; t_res='1min'; Np_str='50e6'; Nz_str='95'
    elif simulationNumber == 3:
        Directory=scratchDirectory
        res='250m'; t_res='1min'; Np_str='50e6'; Nz_str='95'
        
    dataDirectory = os.path.join(Directory, f"cm1out_{res}_{t_res}_{Nz_str}nz.nc")
    parcelDirectory = os.path.join(Directory,f"cm1out_pdata_{res}_{t_res}_{Np_str}np.nc")
    return dataDirectory, parcelDirectory, res,t_res,Np_str,Nz_str
    
def GetData(dataDirectory, parcelDirectory):
    dataNC = xr.open_dataset(dataDirectory, decode_timedelta=True) 
    parcelNC = xr.open_dataset(parcelDirectory, decode_timedelta=True) 
    return dataNC,parcelNC

def SubsetDataVars(dataNC):
    varList = ["thflux", "qvflux", "tsk", "cape", 
               "cin", "lcl", "lfc", "th",
               "prs", "rho", "qv", "qc",
               "qr", "qi", "qs","qg", 
               "buoyancy", "uinterp", "vinterp", "winterp",]
    
    varList += ["ptb_hadv", "ptb_vadv", "ptb_hidiff", "ptb_vidiff",
                "ptb_hturb", "ptb_vturb", "ptb_mp", "ptb_rdamp", 
                "ptb_rad", "ptb_div", "ptb_diss",]
    
    varList += ["qvb_hadv", "qvb_vadv", "qvb_hidiff", "qvb_vidiff", 
                "qvb_hturb", "qvb_vturb", "qvb_mp",]
    
    varList += ["wb_hadv", "wb_vadv", "wb_hidiff", "wb_vidiff",
                "wb_hturb", "wb_vturb", "wb_pgrad", "wb_rdamp", "wb_buoy",]

    return dataNC[varList]

In [6]:
def GetTimeStrings(times):
    timeStrings = [str(timedelta(seconds=float(s))) for s in times/1e9]
    return timeStrings

In [7]:
# def WriteTimesteps_NetCDF(dataNC, parcelNC, timeStrings, res,t_res,Np_str,Nz_str, outputDataDirectory,outputParcelDirectory):
        
#     for count,time in tqdm(enumerate(timeStrings), total=len(timeStrings), desc="Writing timesteps"):
#         timeClean = time.replace(":","-")
#         dataT = dataNC.isel(time=count)
#         parcelT = parcelNC.isel(time=count)
    
#         #making filenames
#         outputDataFile = os.path.join(outputDataDirectory, f"cm1out_{res}_{t_res}_{Nz_str}nz_{timeClean}.nc")
#         outputParcelFile = os.path.join(outputParcelDirectory, f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{timeClean}.nc")
        
#         dataT.to_netcdf(outputDataFile, engine="h5netcdf")
#         parcelT.to_netcdf(outputParcelFile, engine="h5netcdf")

# #EXAMPLE
# WriteTimesteps_NetCDF(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,outputDataDirectory,outputParcelDirectory)

In [8]:
def WriteTimesteps_H5(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,
                      outputDataDirectory, outputParcelDirectory):
    """
    Writes each timestep of dataNC and parcelNC to separate .h5 files,
    mirroring WriteTimestepsNetCDF but using h5py instead of xarray.to_netcdf.
    """

    for count, time in tqdm(enumerate(timeStrings), total=len(timeStrings), desc="Writing timesteps"):
        timeClean = time.replace(":", "-")

        # Extract single timestep
        dataT = dataNC.isel(time=count)
        parcelT = parcelNC.isel(time=count)

        # Build file names (same as NetCDF version)
        outputDataFile = os.path.join(
            outputDataDirectory,
            f"cm1out_{res}_{t_res}_{Nz_str}nz_{timeClean}.h5"
        )
        outputParcelFile = os.path.join(
            outputParcelDirectory,
            f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{timeClean}.h5"
        )

        # --- Write data timestep ---
        with h5py.File(outputDataFile, "w") as f_data:
            for var_name, da in dataT.data_vars.items():
                f_data.create_dataset(
                    var_name,
                    data=da.values,
                    dtype="float32",
                    compression="gzip"
                )
                # optionally save metadata
                for attr, val in da.attrs.items():
                    f_data[var_name].attrs[attr] = val

        # --- Write parcel timestep ---
        with h5py.File(outputParcelFile, "w") as f_parcel:
            for var_name, da in parcelT.data_vars.items():
                f_parcel.create_dataset(
                    var_name,
                    data=da.values,
                    dtype="float32",
                    compression="gzip"
                )
                for attr, val in da.attrs.items():
                    f_parcel[var_name].attrs[attr] = val

In [9]:
[dataDirectory,parcelDirectory, res,t_res,Np_str,Nz_str] = GetDataDirectories(simulationNumber=1)
[outputDataDirectory, outputParcelDirectory] = MakeDataDirectories(outputDirectory,res,t_res,Nz_str)
[dataNC,parcelNC] = GetData(dataDirectory, parcelDirectory)
dataNC =SubsetDataVars(dataNC)
timeStrings = GetTimeStrings(times=dataNC['time'].values)

WriteTimesteps_H5(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,outputDataDirectory,outputParcelDirectory)

In [None]:
#TESTING
##################

In [None]:
# #TESTING EQUALITY
# testDir="/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/TimeSplitting/OUTPUT/1km_5min_34nz/ModelData/cm1out_1km_5min_34nz_0-25-00.h5"
# t=5
# with h5py.File(testDir, "r") as f:
#     for key in f.keys():
#         # Convert both to numpy arrays
#         varH5 = np.array(f[key][:])
#         varCDF = np.array(dataNC[key].isel(time=t))

#         # Check equality (element-wise, then reduce to True/False)
#         is_equal = np.all(varH5 == varCDF)

#         print(f"{key}: {'MATCH' if is_equal else 'DIFFERENT'}  "
#               f"(shape H5={varH5.shape}, CDF={varCDF.shape})")