In [1]:
####################################
#ENVIRONMENT SETUP

In [2]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

In [3]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [4]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [5]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","1_Domain_Profiles"))
from CLASSES_DomainProfiles import DomainProfiles_Class

In [6]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [27]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Domain_Profiles", dataName="Domain_Profiles",
                                dtype='float32',codeSection = "Project_Algorithms")

=== CM1 Data Summary ===
 Simulation #:   1
 Resolution:     1km
 Time step:      5min
 Vertical levels:34
 Parcels:        1e6
 Data file:      /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
 Parcel file:    /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc
 Time steps:     133

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Domain_Profiles
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-P

In [28]:
#JOB ARRAY SETUP
UsingJobArray=True

def GetNumJobs(res):
    if res=='1km':
        num_jobs=20
    elif res=='250m': 
        num_jobs=100
    return num_jobs
num_jobs = GetNumJobs(ModelData.res)
SlurmJobArray = SlurmJobArray_Class(total_elements=ModelData.Ntime, num_jobs=num_jobs, UsingJobArray=UsingJobArray)
start_job = SlurmJobArray.start_job; end_job = SlurmJobArray.end_job

def GetNumElements():
    num_elements = np.arange(ModelData.Ntime)[start_job:end_job]
    return num_elements
num_elements = GetNumElements()

Running timesteps from 0:7 



In [29]:
##############################################
#DATA LOADING FUNCTIONS

In [30]:
#DOMAIN SUBSETTING 
#finding time subset
def GetTimeSubset(noSubset=False):
    if noSubset == True:
        t_start=0; t_end=ModelData.Ntime+1
        print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    else:
        dt=ModelData.time[1].item()/1e9 #seconds per timestep
        dhours=(dt/60**2) #hours per timestep
    
        #Finding Left Boundary
        start_hour=4 #10:00 am
        t_start=int(start_hour/dhours)
        
        #Finding Right Boundary
        end_hour=11 #5pm
        t_end=int(end_hour/dhours)+1
        
        #printing
        print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    return tSubset

# def GetZSubset(noSubset=True): #(not in use)
#     if noSubset == True:
#         zh_start=0; zh_end=ModelData.Nzh
#         zf_start=0; zf_end=ModelData.Nzf
#     else:
#         #Finding Boundarys
#         zhs=ModelData.zh
#         zh_start=0; zh_end=int(np.where(zhs>=19)[0][0])
#         zfs=ModelData.zf
#         zf_start=0; zf_end=int(np.where(zfs>=20)[0][0])

#     print(f'zh in {zh_start}:{zh_end}'+f', zf in {zf_start}:{zf_end}')
#     zhSubset = np.arange(zh_start,zh_end+1)
#     zfSubset = np.arange(zf_start,zf_end+1)
#     return zhSubset, zfSubset

# def GetYSubset(noSubset=True): #(not in use)

def GetXSubset(noSubset=False):

    xh = ModelData.xh - ModelData.xh[0] 
    xf = ModelData.xf - ModelData.xf[0]
    xh_max = xh[-1] #total physical length
    xf_max = xf[-1]

    if noSubset == True:
        xh_start=0; xh_end = xh_max+1
        xf_start=0; xf_end = xf_max+1
    else:
        #Finding Left Boundary
        ocean_percent=0.25
        left_to_coast=ModelData.xh[0]+(ModelData.xh[-1]-ModelData.xh[0])*ocean_percent
        xh_start=np.where(ModelData.xh>=left_to_coast)[0][0]
        xf_start=np.where(ModelData.xf>=left_to_coast)[0][0]
        
        #Finding Right Boundary
        right_fraction=80/100
        
        # Find index where physical location exceeds 80% of domain
        xh_end = np.where(xh > right_fraction * xh_max)[0][0]+1
        xf_end = np.where(xf > right_fraction * xf_max)[0][0]+1
           
    print(f'x in {xh_start}:{xh_end} (from coast to 80% of domain, ocean excluded)')
    xhSubset = slice(xh_start,xh_end+1)
    xfSubset = slice(xf_start,xf_end+1)
    return xhSubset, xfSubset

tSubset = GetTimeSubset(noSubset=False)
xSubset = GetXSubset(noSubset=False)

t in 48:133
x in 128:410 (from coast to 80% of domain, ocean excluded)


In [31]:
def CallVariables(ModelData, DataManager, timeString, varNames,zInterpolate):
    varDictionary = {}
    for varName in varNames:
        varDictionary[varName] = CallVariable(ModelData, DataManager, timeString, 
                                              variableName=varName, zInterpolate=zInterpolate)
    return varDictionary

def GetBinaryArrays(DataManager, timeString, zInterpolate):
    varNames = ['A_g', 'A_c']
    VARs = CallVariables(ModelData, DataManager, timeString, varNames, zInterpolate)
    return (VARs[k] for k in varNames)

In [32]:
####################################
#RUN SETUP

In [33]:
########################################
#getting system arguments
def GetArg_dataName(default="Variables"):
    """
    Safely retrieve dataName from sys.argv.
    #Run One: python Eulerian_Profiles.py Variables
    #Run Two: python Eulerian_Profiles.py Entrainment
    #Run Three: python Eulerian_Profiles.py W_Budget
    #Run Four: python Eulerian_Profiles.py QV_Budget
    #Run Five: python Eulerian_Profiles.py TH_Budget
    """
    # If run inside Jupyter, sys.argv will include ipykernel arguments
    if any("ipykernel_launcher" in arg for arg in sys.argv):
        print(f"Using default dataName: {default}")
        return default

    # If a user-specified argument exists, use it
    if len(sys.argv) > 1:
        out=sys.argv[1]
        print(f"Using argument dataName: {out}")
        return out

    return default

dataName = GetArg_dataName()

Using default dataName: Variables


In [34]:
#data variable list
def GetVarNames(dataName): 
    if dataName=="Variables":
        zInterpolate = None
        varNames = ['winterp', 'qv', 'qc', 'qi', 'qcqi', 'RH_vapor', 'theta_v', 'theta_e', 'MSE', 'HMC','VMF_g','VMF_c']
    if dataName == "Entrainment":
        zInterpolate = None
        varNames = ['Entrainment_g','Entrainment_c',
                    'TransferEntrainment_g',
                    'TransferEntrainment_c']
        varNames += ['Detrainment_g','Detrainment_c',
                     'TransferDetrainment_g',
                     'TransferDetrainment_c']
    elif dataName=="W_Budgets":
        zInterpolate = True
        varNames = ["wb_hadv", "wb_vadv", "wb_hidiff", "wb_vidiff", 
                    "wb_hturb", "wb_vturb", "wb_pgrad", "wb_buoy"]
    elif dataName=="QV_Budgets":
        zInterpolate = None
        varNames = ["qvb_hadv", "qvb_vadv", "qvb_hidiff", "qvb_vidiff", 
                    "qvb_hturb", "qvb_vturb", "qvb_mp"]
    elif dataName=="TH_Budgets":
        zInterpolate = None
        varNames = ["ptb_hadv", "ptb_vadv", "ptb_hidiff", "ptb_vidiff", 
                 "ptb_hturb", "ptb_vturb", "ptb_mp", "ptb_rad", "ptb_div", "ptb_diss"]
    return varNames, zInterpolate

In [35]:
########################################
#RUNNING FUNCTIONS

In [36]:
def SaveProfile(Dictionary, dataName, datatype, timeString, masked): 
    if masked == True:
        profileType = "DomainProfiles"
    elif masked == False:
        profileType = "UpdraftProfiles"
    
    fileName = f"{dataName}_{profileType}_{datatype}_{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz_{timeString}.h5"
    filePath = os.path.join(DataManager.outputDataDirectory,fileName)
    

    with h5py.File(filePath, 'w') as f:
        for varName in Dictionary:
            varProfile = Dictionary[varName]
            f.create_dataset(f"{varName}_{datatype}_{timeString}", data=varProfile, compression="gzip")

    print(f"Saved output to {filePath}","\n")

In [37]:
########################################
#RUNNING

In [39]:
for t in num_elements:
    print(f"Processing timestep {t}/{ModelData.Ntime}")
    if t not in tSubset:
        print(f"Skipping time {t}. Outside of tSubset.")
        continue
    
    timeString = ModelData.timeStrings[t]
    
    # Get variable names and vertical interpolation info
    varNames, zInterpolate = GetVarNames(dataName)
    A_g, A_c = GetBinaryArrays(DataManager, timeString, zInterpolate)

    # Get variables
    VARs = CallVariables(ModelData, DataManager, timeString, varNames, zInterpolate=zInterpolate)

    # Domain Subsetting
    if xSubset is not None:
        print("Subsetting variables in x","\n")
        A_g = A_g[:, :, xSubset[0]]
        A_c = A_c[:, :, xSubset[0]]
        for var in VARs:
            VARs[var] = VARs[var][:, :, xSubset[0]]
    
    # Loop through masked types (general & cloudy)
    for datatype in ['general', 'cloudy']:
        
        # --- Case 1: masked = True (each data_type separately)
        Dictionary = DomainProfiles_Class.DomainProfile(VARs, datatype, A_g, A_c, ModelData, masked=True)
        SaveProfile(Dictionary, dataName, datatype=datatype, timeString=timeString, masked=True)

    # --- Case 2: masked = False (only run once, same for all)
    Dictionary = DomainProfiles_Class.DomainProfile(VARs, 'general', A_g, A_c, ModelData, masked=False)
    SaveProfile(Dictionary, dataName, datatype='all', timeString=timeString, masked=True)

Processing timestep 0/133
Skipping time 0. Outside of tSubset.


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = '/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/CalculateMoreVariables/1km_5min_34nz/Eulerian_Binary_Array/Eulerian_Binary_Array_1km_5min_34nz_0-00-00.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
########################################
#COMBINING ALL AVERAGES

In [None]:
# ######################################### #*#*
# #RECOMBINE SEPERATE JOB_ARRAYsS AFTER
# recombine=False #KEEP FALSE WHEN JOB ARRAY IS RUNNING
# # recombine=True

In [None]:
# def Recombine(num_jobs,budget_type):
#     dir2=dir+'Project_Algorithms/Domain_Profiles/'
#     print(f"Currently Working on {budget_type} Budgets")
    
#     data_types=['general','cloudy']
#     for data_type in data_types:
#         #MAKING OUTPUT FILE PATH
#         if data_type == "general":
#             output_file = dir2+f'OUTPUT/{budget_type}_BUDGET_general_eulerian_profiles_{res}_{t_res}_{Np_str}.h5'
#         elif data_type == "cloudy":
#             output_file = dir2+f'OUTPUT/{budget_type}_BUDGET_cloudy_eulerian_profiles_{res}_{t_res}_{Np_str}.h5'

#         def GetInputFile(data_type,job_id):
#             if data_type == "general":
#                 input_file = dir2+f'SBATCH/job_out/{budget_type}_BUDGET_general_eulerian_profiles_{res}_{t_res}_{Np_str}_{job_id}.h5'
#             elif data_type == "cloudy":
#                 input_file = dir2+f'SBATCH/job_out/{budget_type}_BUDGET_cloudy_eulerian_profiles_{res}_{t_res}_{Np_str}_{job_id}.h5'
#             return input_file
        
#         #MAKING PROFILES DICTIONARY
#         zhs = data1['zh'].values
#         profiles = {}  #  Store profiles for all variables
#         input_file=GetInputFile(data_type,job_id=1)
#         with h5py.File(input_file, 'r') as f:
#             var_names=list(f.keys())
#         for var in var_names:
#             profiles[var] = np.zeros((len(zhs), 3))  # column 1: var, column 2: counter, column 3: list of zhs
#             profiles[var][:, 2] = zhs 

#         for job_id in np.arange(1,num_jobs+1):
#             if np.mod(job_id,10)==0: print(f"job_id = {job_id}")
#             #CALLING IN DATA
#             input_file=GetInputFile(data_type,job_id)
    
#             #COMPILING PROFILES
#             with h5py.File(input_file, 'r') as f:
#                 for var in var_names:  
#                     profiles[var][:,0:1+1]+=f[f'{var}'][:,0:1+1]
        
#         #SAVING INTO FINAL FORM
#         with h5py.File(output_file, 'w') as f:
#             for var in profiles:
#                 profile_var = profiles[var]
#                 f.create_dataset(f'{var}', data=profile_var, compression="gzip")

In [None]:
# if recombine==True:
#     # Recombine(num_jobs=num_jobs,budget_type='WQVTH')
#     Recombine(num_jobs=num_jobs,budget_type='W')
#     Recombine(num_jobs=num_jobs,budget_type='QV')
#     Recombine(num_jobs=num_jobs,budget_type='TH')

In [None]:
####################################
#TESTING

In [None]:
#TESTING mass continuity using rho*w

In [None]:
# a=VARs["winterp"]
# b=VARs["rho"]

# c=a*b
# plt.plot(np.mean(c,axis=(1,2)),ModelData.zh)
# plt.ylabel("z (km)");plt.xlabel("rho*w (kg/m^3 * m/s)")
# plt.title("Average Vertical Profile")

In [None]:
# d=c[0:1000]
# plt.scatter(d.flatten(),np.arange(len(d.flatten())))
# plt.ylabel('count');plt.xlabel("rho*w (kg/m^3 * m/s)")
# plt.title("Histogram of 1000 Values")

In [None]:
# import numpy as np
# import matplotlib.pyplot as plt

# # Load or compute your variables
# a = VARs["winterp"]    # shape (34, 200, 512)
# b = VARs["rho"]        # shape (34, 200, 512)

# # Compute rho * w
# c = a * b              # shape (34, 200, 512)
# z = ModelData.zh       # vertical levels (length 34)

# # Flatten arrays
# c_flat = c.flatten()
# z_index = np.repeat(z, c.shape[1] * c.shape[2])

# # Sample 1000 random points to reduce density
# sample_size = 1000
# sample_idx = np.random.choice(len(c_flat), size=sample_size, replace=False)

# c_sample = c_flat[sample_idx]
# z_sample = z_index[sample_idx]

# # Plot 2D histogram (rho*w vs. height)
# plt.figure(figsize=(7, 5))
# plt.hist2d(
#     c_sample, z_sample,
#     bins=(20, len(z)),
#     range=[[-0.01, 0.01], [z.min(), z.max()]],
#     cmap='viridis'
# )

# plt.colorbar(label="Counts")
# plt.xlabel("ρw (kg m⁻² s⁻¹)")
# plt.ylabel("Height (m)")
# plt.title("2D Histogram of ρw vs. Height (sampled 1000 points)")
# plt.show()
