In [1]:
####################################
#ENVIRONMENT SETUP

In [2]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

In [3]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [4]:
#IMPORT CLASSES (from current directory)
sys.path.append(os.path.join(mainCodeDirectory,"Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class

In [5]:
####################################
#LOADING CLASSES

In [53]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)

=== CM1 Data Summary ===
 Simulation #:   1
 Resolution:     1km
 Time step:      5min
 Vertical levels:34
 Parcels:        1e6
 Data file:      /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
 Parcel file:    /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc
 Time steps:     133



In [7]:
####################################
#MAKING OUTPUT DIRECTORY

In [8]:
#OUTPUT DIRECTORIES
outputDirectory=os.path.join(mainDirectory,'Code','OUTPUT','Variable_Calculation','TimeSplitModelData')
os.makedirs(outputDirectory, exist_ok=True)

#Data Output Directories
def MakeDataDirectories(outputDirectory,res,t_res,Nz_str):
    outputDataDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ModelData')
    outputParcelDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ParcelData')
    os.makedirs(outputDataDirectory, exist_ok=True)
    os.makedirs(outputParcelDirectory, exist_ok=True)

    return outputDataDirectory, outputParcelDirectory

In [9]:
####################################
#FUNCTIONS

In [10]:
#LOADING DATA
def GetDataDirectories(simulationNumber):
    if simulationNumber == 1:
        Directory=os.path.join(mainDirectory,'Model/cm1r20.3/run')
        res='1km'; t_res='5min'; Np_str='1e6'; Nz_str='34'
    elif simulationNumber == 2:
        Directory=scratchDirectory
        res='1km'; t_res='1min'; Np_str='50e6'; Nz_str='95'
    elif simulationNumber == 3:
        Directory=scratchDirectory
        res='250m'; t_res='1min'; Np_str='50e6'; Nz_str='95'
        
    dataDirectory = os.path.join(Directory, f"cm1out_{res}_{t_res}_{Nz_str}nz.nc")
    parcelDirectory = os.path.join(Directory,f"cm1out_pdata_{res}_{t_res}_{Np_str}np.nc")
    return dataDirectory, parcelDirectory, res,t_res,Np_str,Nz_str

In [11]:
# def WriteTimesteps_NetCDF(dataNC, parcelNC, timeStrings, res,t_res,Np_str,Nz_str, outputDataDirectory,outputParcelDirectory):
        
#     for count,time in tqdm(enumerate(timeStrings), total=len(timeStrings), desc="Writing timesteps"):
#         dataT = dataNC.isel(time=count)
#         parcelT = parcelNC.isel(time=count)
    
#         #making filenames
#         outputDataFile = os.path.join(outputDataDirectory, f"cm1out_{res}_{t_res}_{Nz_str}nz_{time}.nc")
#         outputParcelFile = os.path.join(outputParcelDirectory, f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{time}.nc")
        
#         dataT.to_netcdf(outputDataFile, engine="h5netcdf")
#         parcelT.to_netcdf(outputParcelFile, engine="h5netcdf")

# #EXAMPLE
# WriteTimesteps_NetCDF(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,outputDataDirectory,outputParcelDirectory)

In [49]:
def WriteTimesteps_H5(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,
                      outputDataDirectory, outputParcelDirectory):
    """
    Writes each timestep of dataNC and parcelNC to separate .h5 files,
    mirroring WriteTimestepsNetCDF but using h5py instead of xarray.to_netcdf.
    """
    from tqdm import tqdm

    for count, time in tqdm(enumerate(timeStrings), total=len(timeStrings), desc="Writing timesteps"):
        # Extract single timestep
        dataT = dataNC.isel(time=count)
        parcelT = parcelNC.isel(time=count)

        # Build file names (same as NetCDF version)
        outputDataFile = os.path.join(
            outputDataDirectory,
            f"cm1out_{res}_{t_res}_{Nz_str}nz_{time}.h5"
        )
        outputParcelFile = os.path.join(
            outputParcelDirectory,
            f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{time}.h5"
        )

        # --- Write data timestep ---
        with h5py.File(outputDataFile, "w") as f_data:
            for var_name, da in dataT.data_vars.items():
                f_data.create_dataset(
                    var_name,
                    data=da.values,
                    dtype="float32",
                    compression="gzip"
                )
                # optionally save metadata
                for attr, val in da.attrs.items():
                    f_data[var_name].attrs[attr] = val

        # --- Write parcel timestep ---
        with h5py.File(outputParcelFile, "w") as f_parcel:
            for var_name, da in parcelT.data_vars.items():
                f_parcel.create_dataset(
                    var_name,
                    data=da.values,
                    dtype="float32",
                    compression="gzip"
                )
                for attr, val in da.attrs.items():
                    f_parcel[var_name].attrs[attr] = val

In [50]:
####################################
#RUNNING

In [51]:
#getting data
dataNC = ModelData.OpenData(); parcelNC = ModelData.OpenParcel()
dataNC = ModelData.SubsetDataVars(dataNC)

#getting output directories
[outputDataDirectory, outputParcelDirectory] = MakeDataDirectories(outputDirectory,ModelData.res,ModelData.t_res,ModelData.Nz_str)

#running
WriteTimesteps_H5(dataNC, parcelNC, ModelData.timeStrings, ModelData.res, ModelData.t_res, ModelData.Np_str, ModelData.Nz_str,outputDataDirectory,outputParcelDirectory)

Opened dataset: /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
Opened dataset: /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc


Writing timesteps:   2%|▏         | 2/133 [00:41<45:01, 20.62s/it]


In [None]:
####################################

In [None]:
# #TESTING EQUALITY
# testDir="/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/TimeSplitting/OUTPUT/1km_5min_34nz/ModelData/cm1out_1km_5min_34nz_0-25-00.h5"
# t=5
# with h5py.File(testDir, "r") as f:
#     for key in f.keys():
#         # Convert both to numpy arrays
#         varH5 = np.array(f[key][:])
#         varCDF = np.array(dataNC[key].isel(time=t))

#         # Check equality (element-wise, then reduce to True/False)
#         is_equal = np.all(varH5 == varCDF)

#         print(f"{key}: {'MATCH' if is_equal else 'DIFFERENT'}  "
#               f"(shape H5={varH5.shape}, CDF={varCDF.shape})")