In [None]:
####################################
#ENVIRONMENT SETUP

In [None]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
import h5py
from tqdm import tqdm

In [None]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [None]:
#IMPORT CLASSES (from current directory)
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class

In [107]:
####################################
#LOADING CLASSES

In [108]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=2)

=== CM1 Data Summary ===
 Simulation #:   2
 Resolution:     1km
 Time step:      1min
 Vertical levels:95
 Parcels:        50e6
 Data file:      /mnt/lustre/koa/scratch/air673/cm1out_1km_1min_95nz.nc
 Parcel file:    /mnt/lustre/koa/scratch/air673/cm1out_pdata_1km_1min_50e6np.nc
 Time steps:     661



In [118]:
#JOB ARRAY SETUP
UsingJobArray=True

def GetNumJobs(res,t_res):
    if res=='1km':
        if t_res=='5min':
            num_jobs=20
        elif t_res=='1min':
            num_jobs=100
    elif res=='250m': 
        if t_res=='1min':
            num_jobs=500
    return num_jobs
num_jobs = GetNumJobs(ModelData.res,ModelData.t_res)
SlurmJobArray = SlurmJobArray_Class(total_elements=ModelData.Ntime, num_jobs=num_jobs, UsingJobArray=UsingJobArray)
start_job = SlurmJobArray.start_job; end_job = SlurmJobArray.end_job

def GetNumElements():
    loop_elements = np.arange(ModelData.Ntime)[start_job:end_job]
    return loop_elements
loop_elements = GetNumElements()

Running timesteps from 0:6 



In [None]:
####################################
#MAKING OUTPUT DIRECTORY

In [None]:
#OUTPUT DIRECTORIES
outputDirectory=os.path.join(mainDirectory,'Code','OUTPUT','Variable_Calculation','TimeSplitModelData')
os.makedirs(outputDirectory, exist_ok=True)

#Data Output Directories
def MakeDataDirectories(outputDirectory,res,t_res,Nz_str):
    outputDataDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ModelData')
    outputParcelDirectory = os.path.join(outputDirectory,f"{res}_{t_res}_{Nz_str}nz",'ParcelData')
    os.makedirs(outputDataDirectory, exist_ok=True)
    os.makedirs(outputParcelDirectory, exist_ok=True)

    return outputDataDirectory, outputParcelDirectory

In [None]:
####################################
#FUNCTIONS

In [None]:
#LOADING DATA
def GetDataDirectories(simulationNumber):
    if simulationNumber == 1:
        Directory=os.path.join(mainDirectory,'Model/cm1r20.3/run')
        res='1km'; t_res='5min'; Np_str='1e6'; Nz_str='34'
    elif simulationNumber == 2:
        Directory=scratchDirectory
        res='1km'; t_res='1min'; Np_str='50e6'; Nz_str='95'
    elif simulationNumber == 3:
        Directory=scratchDirectory
        res='250m'; t_res='1min'; Np_str='50e6'; Nz_str='95'
        
    dataDirectory = os.path.join(Directory, f"cm1out_{res}_{t_res}_{Nz_str}nz.nc")
    parcelDirectory = os.path.join(Directory,f"cm1out_pdata_{res}_{t_res}_{Np_str}np.nc")
    return dataDirectory, parcelDirectory, res,t_res,Np_str,Nz_str

In [None]:
def GetTimeString(timeStrings, t):
    timeString = timeStrings[t]
    return timeString

In [None]:
# def WriteTimesteps_H5_V1(dataNC, parcelNC, timeStrings, res, t_res, Np_str, Nz_str,
#                       outputDataDirectory, outputParcelDirectory):
#     """
#     Writes each timestep of dataNC and parcelNC to separate .h5 files,
#     mirroring WriteTimestepsNetCDF but using h5py instead of xarray.to_netcdf.
#     """
#     from tqdm import tqdm

#     for count, time in tqdm(enumerate(timeStrings), total=len(timeStrings), desc="Writing timesteps"):
#         # Extract single timestep
#         dataT = dataNC.isel(time=count)
#         parcelT = parcelNC.isel(time=count)

#         # Build file names (same as NetCDF version)
#         outputDataFile = os.path.join(
#             outputDataDirectory,
#             f"cm1out_{res}_{t_res}_{Nz_str}nz_{time}.h5"
#         )
#         outputParcelFile = os.path.join(
#             outputParcelDirectory,
#             f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{time}.h5"
#         )

#         # --- Write data timestep ---
#         with h5py.File(outputDataFile, "w") as f_data:
#             for var_name, da in dataT.data_vars.items():
#                 f_data.create_dataset(
#                     var_name,
#                     data=da.values,
#                     dtype="float32",
#                     compression="gzip"
#                 )
#                 # optionally save metadata
#                 for attr, val in da.attrs.items():
#                     f_data[var_name].attrs[attr] = val
                    
#         # --- Write parcel timestep ---
#         with h5py.File(outputParcelFile, "w") as f_parcel:
#             for var_name, da in parcelT.data_vars.items():
#                 f_parcel.create_dataset(
#                     var_name,
#                     data=da.values,
#                     dtype="float32",
#                     compression="gzip"
#                 )
#                 for attr, val in da.attrs.items():
#                     f_parcel[var_name].attrs[attr] = val

In [None]:
def WriteTimesteps_H5_V2(dataNC, parcelNC, loop_elements,timeStrings, res, t_res, Np_str, Nz_str,
                         outputDataDirectory, outputParcelDirectory):
    """
    Writes each timestep of dataNC and parcelNC to separate .h5 files,
    mirroring WriteTimestepsNetCDF but using h5py instead of xarray.to_netcdf.
    """
    
    for loop_element in tqdm(loop_elements,total=len(loop_elements),desc="Writing timesteps"):
        # Extract single timestep
        dataT = dataNC.isel(time=loop_element)
        parcelT = parcelNC.isel(time=loop_element)

        # Get timeString for titles
        timeString = GetTimeString(timeStrings, loop_element)
        
        # Build file names (same as NetCDF version)
        outputDataFile = os.path.join(
            outputDataDirectory,
            f"cm1out_{res}_{t_res}_{Nz_str}nz_{timeString}.h5"
        )
        outputParcelFile = os.path.join(
            outputParcelDirectory,
            f"cm1out_pdata_{res}_{t_res}_{Np_str}np_{timeString}.h5"
        )
        print(f"outputting to {outputDataFile}")

        # --- Write data timestep ---
        with h5py.File(outputDataFile, "w", libver="latest") as f_data:
            for var_name, da in dataT.data_vars.items():
                dset = f_data.create_dataset(
                    var_name,
                    data=da.data,        # <-- key speedup
                    dtype="float32",
                    compression=None     # <-- disable compression
                )
                for attr, val in da.attrs.items():
                    dset.attrs[attr] = val

        # --- Write parcel timestep ---
        with h5py.File(outputParcelFile, "w", libver="latest") as f_parcel:
            for var_name, da in parcelT.data_vars.items():
                dset = f_parcel.create_dataset(
                    var_name,
                    data=da.data,
                    dtype="float32",
                    compression=None
                )
                for attr, val in da.attrs.items():
                    dset.attrs[attr] = val


In [None]:
####################################
#RUNNING

In [None]:
#getting data
dataNC = ModelData.OpenData(); parcelNC = ModelData.OpenParcel()
dataNC = ModelData.SubsetDataVars(dataNC)

#getting output directories
[outputDataDirectory, outputParcelDirectory] = MakeDataDirectories(outputDirectory,ModelData.res,ModelData.t_res,ModelData.Nz_str)

#running
WriteTimesteps_H5_V2(dataNC, parcelNC, loop_elements,ModelData.timeStrings, ModelData.res, ModelData.t_res, ModelData.Np_str, ModelData.Nz_str,outputDataDirectory,outputParcelDirectory)