In [113]:
dataName = "UpdraftArea"

In [114]:
####################################
#ENVIRONMENT SETUP

In [115]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

In [116]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [117]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [118]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","1_Domain_Profiles"))
from CLASSES_DomainProfiles import DomainProfiles_Class, DomainProfiles_DataLoading_Class

In [119]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [None]:
# ============================================================
# SlurmJobArray_Class
# ============================================================

import os
import numpy as np

class SlurmJobArray_Class:
    def __init__(self, total_elements, 
                 num_jobs, UsingJobArray,
                 custom_job_id=None):
        self.total_elements = total_elements
        self.num_jobs = num_jobs
        self.UsingJobArray = UsingJobArray
        
        # Get job ID (default = 1 if not running under Slurm)
        if custom_job_id is None:
            self.job_id = int(os.environ.get('SLURM_ARRAY_TASK_ID', 0))
            if self.job_id == 0:
                self.job_id = 1
        elif custom_job_id is not None:
            self.job_id = custom_job_id
        
        # Precompute range info
        self.job_range = total_elements // num_jobs
        self.remaining = total_elements % num_jobs
        
        # Compute job range for this job
        self.start_job, self.end_job = self._get_job_range(self.job_id)

        # Print summary
        self.Summary()

    # ------------------------------------------------------------
    def _get_job_range(self, job_id):
        if self.UsingJobArray == True:
            """Compute start and end indices for this job."""
            job_id -= 1
            start_job = job_id * self.job_range + min(job_id, self.remaining)
            end_job = start_job + self.job_range + (1 if job_id < self.remaining else 0)
            if job_id == self.num_jobs - 1:
                end_job = self.total_elements
        elif self.UsingJobArray == False:
            start_job, end_job = 0, self.total_elements
        return start_job, end_job

    # ------------------------------------------------------------
    def TESTING(self):
        """Print start/end for all jobs to verify chunking logic."""
        start, end = [], []
        for job_id in range(1, self.num_jobs + 1):
            s, e = self._get_job_range(job_id)
            print(f"Job {job_id}: {s} → {e}")
            start.append(s)
            end.append(e)
        print("Unique starts:", len(np.unique(start)) == len(start))
        print("Unique ends:", len(np.unique(end)) == len(end))
        print("No zero-length ranges:", np.all(np.array(start) != np.array(end)))

    def Summary(self):
        print(f"Running timesteps from {self.start_job}:{self.end_job-1}","\n")

In [None]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Domain_Profiles", dataName=dataName,
                                dtype='float32',codeSection = "Project_Algorithms")

In [None]:
#JOB ARRAY SETUP
UsingJobArray=True

def GetNumJobs(res):
    if res=='1km':
        num_jobs=20
    elif res=='250m': 
        num_jobs=100
    return num_jobs
num_jobs = GetNumJobs(ModelData.res)
SlurmJobArray = SlurmJobArray_Class(total_elements=ModelData.Ntime, num_jobs=num_jobs, UsingJobArray=UsingJobArray)
start_job = SlurmJobArray.start_job; end_job = SlurmJobArray.end_job

def GetNumElements():
    num_elements = np.arange(ModelData.Ntime)[start_job:end_job]
    return num_elements
num_elements = GetNumElements()

In [None]:
##############################################
#DATA LOADING FUNCTIONS

In [None]:
#DOMAIN SUBSETTING 
#finding time subset
def GetTimeSubset(noSubset=False):
    if noSubset == True:
        t_start=0; t_end=ModelData.Ntime+1
        print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    else:
        dt=ModelData.time[1].item()/1e9 #seconds per timestep
        dhours=(dt/60**2) #hours per timestep
    
        #Finding Left Boundary
        start_hour=4 #10:00 am
        t_start=int(start_hour/dhours)
        
        #Finding Right Boundary
        end_hour=11 #5pm
        t_end=int(end_hour/dhours)+1
        
        #printing
        print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    return tSubset

# def GetZSubset(noSubset=True): #(not in use)
#     if noSubset == True:
#         zh_start=0; zh_end=ModelData.Nzh
#         zf_start=0; zf_end=ModelData.Nzf
#     else:
#         #Finding Boundarys
#         zhs=ModelData.zh
#         zh_start=0; zh_end=int(np.where(zhs>=19)[0][0])
#         zfs=ModelData.zf
#         zf_start=0; zf_end=int(np.where(zfs>=20)[0][0])

#     print(f'zh in {zh_start}:{zh_end}'+f', zf in {zf_start}:{zf_end}')
#     zhSubset = np.arange(zh_start,zh_end+1)
#     zfSubset = np.arange(zf_start,zf_end+1)
#     return zhSubset, zfSubset

# def GetYSubset(noSubset=True): #(not in use)

def GetXSubset(noSubset=False):

    xh = ModelData.xh - ModelData.xh[0] 
    xf = ModelData.xf - ModelData.xf[0]
    xh_max = xh[-1] #total physical length
    xf_max = xf[-1]

    if noSubset == True:
        xh_start=0; xh_end = xh_max+1
        xf_start=0; xf_end = xf_max+1
    else:
        #Finding Left Boundary
        ocean_percent=0.25
        left_to_coast=ModelData.xh[0]+(ModelData.xh[-1]-ModelData.xh[0])*ocean_percent
        xh_start=np.where(ModelData.xh>=left_to_coast)[0][0]
        xf_start=np.where(ModelData.xf>=left_to_coast)[0][0]
        
        #Finding Right Boundary
        right_fraction=80/100
        
        # Find index where physical location exceeds 80% of domain
        xh_end = np.where(xh > right_fraction * xh_max)[0][0]+1
        xf_end = np.where(xf > right_fraction * xf_max)[0][0]+1
           
    print(f'x in {xh_start}:{xh_end} (from coast to 80% of domain, ocean excluded)')
    xhSubset = slice(xh_start,xh_end+1)
    xfSubset = slice(xf_start,xf_end+1)
    return xhSubset, xfSubset

# tSubset = GetTimeSubset(noSubset=False) #used in recombining code at the bottom
xSubset = GetXSubset(noSubset=False)

In [None]:
def CallVariables(ModelData, DataManager, timeString, varNames,zInterpolate=None):
    varDictionary = {}
    for varName in varNames:
        varDictionary[varName] = CallVariable(ModelData, DataManager, timeString, 
                                              variableName=varName, zInterpolate=zInterpolate)
    return varDictionary

def GetBinaryArrays(DataManager, timeString):
    varNames = ['A_g', 'A_c']
    VARs = CallVariables(ModelData, DataManager, timeString, varNames)
    # return tuple(VARs[k] for k in varNames)
    return VARs['A_g'], VARs['A_c']

In [None]:
def callSlice(z,y,x, A):
    z = z if z is not None else slice(None)
    y = y if y is not None else slice(None)
    x = x if x is not None else slice(None)

    x_slice = A[z,y,x]
    return x_slice

def CalculateAreaProfile(z_ind,y_ind,x_ind, A,dx,dy, profile,profile_squares):
    for count, (z, y, x) in enumerate(zip(z_ind, y_ind, x_ind)):
        progress_step = max(len(z_ind) // 4, 1)
        if (count % progress_step == 0) or (count == len(z_ind) - 1):
            print(f"Currently {count * 100 / len(z_ind):.2f}% finished")


        #FINDING XLENGTH
        ########################################################
        x_slice = callSlice(z,y,None, A)

        if np.all(x_slice[x+1:]==1) or np.all(x_slice[:(x-1)+1]==1): #*AVOID BOUNDARY CASES*
            continue
        x_left=np.where(x_slice[:(x)+1]==0)[0][-1]
        x_right=np.where(x_slice[x:]==0)[0][0]+(x)
        x_length=(x_right-x_left)*dx

        #FINDING YLENGTH
        ########################################################
        y_slice = callSlice(z,None,x, A)

        if np.all(y_slice[y+1:]==1) or np.all(y_slice[:(y-1)+1]==1): #*TO AVOID BOUNDARY CASES*
            continue
        y_left=np.where(y_slice[:(y)+1]==0)[0][-1]
        y_right=np.where(y_slice[y:]==0)[0][0]+(y)
        y_length=(y_right-y_left)*dy

        #CALCULATING AREA
        ########################################################
        AREA=x_length*y_length #square area approximation 
        AREA*=np.pi/4 #include for oval area approximation 

        #ADDING TO PROFILE
        ########################################################

        profile[z,0]+=AREA;profile[z,1]+=1
        profile_squares[z,0]+=AREA**2;profile_squares[z,1]+=1

    return profile,profile_squares

def UpdraftAreaProfile(datatype, A_g, A_c, ModelData):
    """
    Construct updraft area profiles for 'general' or 'cloudy' types.
    """
    print(f"Currently working on {datatype} UpdraftArea")
    
    # --- Select data based on datatype ---
    if datatype == 'general':
        varName = "UpdraftArea_g"
        A = A_g.copy()
    elif datatype == 'cloudy':
        varName = "UpdraftArea_c"
        A = A_c.copy()
    else:
        raise ValueError(f"Unknown datatype '{datatype}'; must be 'general' or 'cloudy'.")
    
    # --- Initialize profiles ---
    VARs = {varName: varName}
    profiles, VARs_squares = DomainProfiles_Class.InitializeProfiles(VARs, ModelData)
    
    # --- Compute mask and indexes ---
    where_updraft = DomainProfiles_Class.GetUpdraftMask(datatype, A_g, A_c)
    z_ind, y_ind, x_ind = DomainProfiles_Class.GetIndexes(where_updraft)
    
    # --- Calculate area profile ---
    profile = profiles[varName]
    profile_squares = profiles[f"{varName}_squares"]
    profile_out, profile_out_squares = CalculateAreaProfile(
        z_ind, y_ind, x_ind, A, ModelData.dx, ModelData.dy, profile, profile_squares
    )
    
    # --- Store results and return ---
    profiles[varName] = profile_out
    profiles[f"{varName}_squares"] = profile_out_squares

    print(f"Finished","\n")
    return profiles

In [None]:
########################################
#RUNNING

In [None]:
for t in num_elements:
    if dataName == 'Entrainment' and t == ModelData.Ntime-1:
        continue
        
    print(f"#############################")
    print(f"Processing timestep {t}/{ModelData.Ntime}")
    timeString = ModelData.timeStrings[t]
    
    # Get variable names and vertical interpolation info
    [A_g, A_c] = GetBinaryArrays(DataManager, timeString)

    # Domain Subsetting
    if xSubset is not None:
        print("Subsetting variables in x","\n")
        A_g = A_g[:, :, xSubset[0]]
        A_c = A_c[:, :, xSubset[0]]

    # Loop through masked types (general & cloudy)
    for datatype in ['general', 'cloudy']:
        Dictionary = UpdraftAreaProfile(datatype, A_g, A_c, ModelData)
        DomainProfiles_DataLoading_Class.SaveProfile(
            ModelData, DataManager, Dictionary,
            dataName, datatype=datatype,
            timeString=timeString, masked=True
        )
        

In [None]:
######################
##################
#COMBINING ALL AVERAGES FOR EACH TIMESTEP

In [123]:
import re
def Recombine(dataName):
    print(f"Currently working on {dataName}","\n")

    #helper function
    def CleanVarName(varName):
        varName = re.sub(r'_(all|general|cloudy)(?=_\d{1,2}-\d{2}-\d{2}$)', '', varName)
        varName = re.sub(r'_\d{1,2}-\d{2}-\d{2}$', '', varName)
        return varName
    
    datatypes=['general','cloudy']
    maskeds = [True, True]
    
    for datatype, masked in zip(datatypes, maskeds):
        print(f"Currently working on {datatype}")
    
        initialDictionary = DomainProfiles_DataLoading_Class.LoadProfile(ModelData,DataManager, dataName,
                                        datatype,ModelData.timeStrings[100],masked,printstatement=False)
        profiles={}
        for varName in initialDictionary.keys():
            #making empty profiles to fill
            cleanVarName = CleanVarName(varName)
            profiles[cleanVarName] = np.zeros((ModelData.Nzh, 3))  # column 1: var, column 2: counter, column 3: list of zhs
            profiles[cleanVarName][:, 2] = ModelData.zh

        tSubset = GetTimeSubset(noSubset=False) #used in recombining code at the bottom
        for t in range(ModelData.Ntime):
            if t not in tSubset: #skipping certain timesteps
                # print(f"Skipping time {t}. Outside of tSubset.")
                continue
            elif dataName == "Entrainment" and t==ModelData.Ntime-1:
                print("skipping last time for Entrainment variables")
                continue
            else:
                if t % 10 == 0: print(f"Working on time {t}")
                    
            timeString = ModelData.timeStrings[t]
            
            Dictionary = DomainProfiles_DataLoading_Class.LoadProfile(ModelData,DataManager, dataName, datatype, timeString,masked,printstatement=False)
    
            for varName in Dictionary.keys():
                cleanVarName = CleanVarName(varName)
                profiles[cleanVarName][:,0:2] += Dictionary[varName][:, 0:2]

        print(profiles.keys())
        DomainProfiles_DataLoading_Class.SaveProfile(ModelData,DataManager, Dictionary=profiles, dataName=dataName, datatype=datatype, timeString="timeaverage", masked=masked)

In [124]:
def GetCorrespondingClass(dataName):
    #data manager class
    DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                    ModelData.Np_str, dataType="Domain_Profiles", dataName=dataName,
                                    dtype='float32',codeSection = "Project_Algorithms")
    return DataManager

In [125]:
recombine=False #KEEP FALSE WHEN JOB ARRAY IS RUNNING
recombine=True

if recombine==True:
    for dataName in ["UpdraftArea"]:
        DataManager = GetCorrespondingClass(dataName)
        Recombine(dataName=dataName)

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Domain_Profiles
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ParcelData
 outputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Domain_Profiles/1km_5min_34nz/UpdraftArea

Currently working on UpdraftArea 

Currently working on general
t in 48:133
Working on time 50
Working on time 60
Working on time 70
Working on time 80
Working on ti