In [None]:
dataName = "UpdraftArea"

In [None]:
####################################
#ENVIRONMENT SETUP

In [None]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py
from tqdm import tqdm

In [None]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [None]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [None]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

In [None]:
#data manager class (for saving data)
DataManager_TrackedProfiles = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracked_Profiles", dataName="Tracked_Profiles",
                                dtype='float32',codeSection = "Project_Algorithms")

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, Results_InputOutput_Class, TrackedParcel_Loading_Class

In [None]:
# IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","3_Tracked_Profiles"))
from CLASSES_TrackedProfiles import TrackedProfiles_DataLoading_CLASS

In [None]:
##############################################
#JOB ARRAY

In [None]:
#JOB ARRAY SETUP
UsingJobArray=True

def GetNumJobs(res):
    if res=='1km':
        num_jobs=20
    elif res=='250m': 
        num_jobs=100
    return num_jobs
num_jobs = GetNumJobs(ModelData.res)
SlurmJobArray = SlurmJobArray_Class(total_elements=ModelData.Ntime, num_jobs=num_jobs, UsingJobArray=UsingJobArray)
start_job = SlurmJobArray.start_job; end_job = SlurmJobArray.end_job

def GetNumElements():
    num_elements = np.arange(ModelData.Ntime)[start_job:end_job]
    return num_elements
num_elements = GetNumElements()

In [None]:
##############################################
#DATA LOADING FUNCTIONS

In [None]:
def MakeDataDictionary(variableNames,t,printstatement=False):
    timeString = ModelData.timeStrings[t]
    # print(f"Getting data from {timeString}","\n")
    
    dataDictionary = {variableName: CallLagrangianArray(ModelData, DataManager, timeString, variableName=variableName, printstatement=printstatement) 
                      for variableName in variableNames}      
    return dataDictionary
    
def GetSpatialData(t):    
    variableNames = ['Z']
    dataDictionary = MakeDataDictionary(variableNames,t)
    [Z] = (dataDictionary[k] for k in variableNames)
    return Z

In [None]:
####################################
#RUN SETUP

In [None]:
#data variable list
def GetVarNames(dataName): 
    if dataName=="UpdraftArea":
        varNames = ['UpdraftArea_g', 'UpdraftArea_c']
    return varNames

In [None]:
########################################
#RUNNING FUNCTIONS

In [None]:
#Functions for Initializing Profile Arrays
def CopyStructure(dictionary, placeholder=None):
    """Deep-copy dictionary structure, replacing leaves with a given placeholder."""
    if isinstance(dictionary, dict):
        return {k: CopyStructure(v, placeholder) for k, v in dictionary.items()}
    else:
        return placeholder

def InitializeProfileArrays(trackedArrays, varNames, zhs=ModelData.zh):
    """
    Create a new dictionary with the same nested structure as trackedArrays,
    and for each variable name, create:
        - 'profile_array' / 'profile_array_squares'
        - 'profile_array_left' / 'profile_array_left_squares'
        - 'profile_array_right' / 'profile_array_right_squares'
    Each array has shape (len(zhs), 3) and zhs in the last column.
    """
    profileArraysDictionary = {}

    for category, depth_dict in trackedArrays.items():  # e.g. 'CL', 'SBF'
        profileArraysDictionary[category] = {}

        for depth_type in depth_dict.keys():  # e.g. 'ALL', 'SHALLOW', 'DEEP'
            profileArraysDictionary[category][depth_type] = {}

            for varName in varNames:
                # Create base profile array
                base_profile = np.zeros((len(zhs), 3))
                base_profile[:, 2] = zhs

                profileArraysDictionary[category][depth_type][varName] = {
                    # Main / all parcels
                    "profile_array": base_profile.copy(),
                    "profile_array_squares": base_profile.copy(),

                    # Left subset (-1)
                    "profile_array_left": base_profile.copy(),
                    "profile_array_left_squares": base_profile.copy(),

                    # Right subset (+1)
                    "profile_array_right": base_profile.copy(),
                    "profile_array_right_squares": base_profile.copy(),
                }
    return profileArraysDictionary

In [None]:
def GetParcelNumbers(trackedArray, t):
    """
    Return all parcel indices (p) and their corresponding row indices
    for parcels that are active at time t.
    Vectorized, no row-by-row loops.
    """
    t_start = trackedArray[:, 1]
    t_end   = np.minimum(trackedArray[:, 2] + trackedArray[:, 3], ModelData.Ntime)

    # Boolean mask for rows active at time t
    mask = (t >= t_start) & (t <= t_end)

    # Extract parcel numbers and their corresponding row indices
    selectedRows = np.where(mask)[0]
    selectedPs = trackedArray[selectedRows, 0]
    leftRightIndexes = trackedArray[selectedRows, 4]

    return selectedRows, selectedPs, leftRightIndexes

In [None]:
#Updraft Area Functions
def GetSpatialData_AllDimensions(t):    
    variableNames = ['Z','Y','X']
    dataDictionary = MakeDataDictionary(variableNames,t)
    [Z,Y,X] = (dataDictionary[k] for k in variableNames)
    return Z,Y,X
    
def CallVariables(ModelData, DataManager, timeString, varNames,zInterpolate=None):
    varDictionary = {}
    for varName in varNames:
        varDictionary[varName] = CallVariable(ModelData, DataManager, timeString, 
                                              variableName=varName, zInterpolate=zInterpolate)
    return varDictionary

def GetBinaryArrays(DataManager, timeString):
    varNames = ['A_g', 'A_c']
    VARs = CallVariables(ModelData, DataManager, timeString, varNames)
    # return tuple(VARs[k] for k in varNames)
    return VARs['A_g'], VARs['A_c']


def callSlice(z,y,x, A):
    z = z if z is not None else slice(None)
    y = y if y is not None else slice(None)
    x = x if x is not None else slice(None)

    x_slice = A[z,y,x]
    return x_slice

def ComputeAreaAtIndex(z, y, x, A, dx, dy):
    """
    Compute the horizontal updraft area (oval approximation) for a given (z, y, x).
    """
    # === FINDING X-LENGTH ===
    x_slice = callSlice(z, y, None, A)
    if np.all(x_slice[x+1:] == 1) or np.all(x_slice[:x] == 1):
        return None

    x_left = np.where(x_slice[:x+1] == 0)[0][-1]
    x_right = np.where(x_slice[x:] == 0)[0][0] + x

    x_length = (x_right - x_left) * dx

    # === FINDING Y-LENGTH ===
    y_slice = callSlice(z, None, x, A)
    if np.all(y_slice[y+1:] == 1) or np.all(y_slice[:y] == 1):
        return None

    y_left = np.where(y_slice[:y+1] == 0)[0][-1]
    y_right = np.where(y_slice[y:] == 0)[0][0] + y
    y_length = (y_right - y_left) * dy

    # === CALCULATE AREA ===
    AREA = x_length * y_length * (np.pi / 4)  # oval correction
    return AREA


def CalculateAreaProfile(z_ind, y_ind, x_ind, A, dx, dy):
    """
    Compute updraft areas for each (z, y, x) index.
    Returns:
        AREAs      : array of area values (same length as inputs, NaN for invalid)
        valid_mask : boolean mask where True = valid area computed
    """
    n = len(z_ind)
    AREAs = np.full(n, np.nan)   # preallocate to keep original order
    valid_mask = np.zeros(n, dtype=bool)

    for count, (z, y, x) in enumerate(zip(z_ind, y_ind, x_ind)):
        # --- progress print ---
        progress_step = max(n // 4, 1)
        if (count % progress_step == 0) or (count == n - 1):
            print(f"Currently {count * 100 / n:.2f}% finished")

        # --- compute local area ---
        AREA = ComputeAreaAtIndex(z, y, x, A, dx, dy)
        if AREA is None:
            continue

        AREAs[count] = AREA
        valid_mask[count] = True

    return AREAs, valid_mask


def MakeTrackedProfiles_UpdraftArea(trackedArrays,profileArraysDictionary,varNames,Z,Y,X,t,
                                    filtering=False):
    """
    Update profileArraysDictionary with variable data for parcels active at time t.
    Accumulates sums and counts in both profile_array and profile_array_squares.
    """
    [A_g, A_c] = GetBinaryArrays(DataManager, ModelData.timeStrings[t])
    
    #CALCULATING
    for key1, subdict in trackedArrays.items():         # e.g. 'CL', 'SBF'
        print("\t",f'working on {key1}')
        for key2, trackedArray in subdict.items():           # e.g. 'ALL', 'DEEP'
            print("\t\t",f'working on {key2}')
            #loading the profile array to fill
            profileArray = profileArraysDictionary[key1][key2] 
    
            #getting parcels in trackedArray to run through
            _, selectedPs, leftRightIndexes = GetParcelNumbers(trackedArray, t) #get parcels that are counted at time t
            
            #getting Z data
            zLevels = Z[selectedPs]
            yLevels = Y[selectedPs]
            xLevels = X[selectedPs]

            # Boolean masks
            mask_left = leftRightIndexes == -1
            mask_right = leftRightIndexes == 1
            
            for varName in varNames:

                #getting data
                ##################################################################################
                A = A_g.copy() if varName == "UpdraftArea_g" else A_c.copy()

                #Optional: only calculate where parcel is in general/cloudy updraft 
                #(NOT RECOMMENDED, distills results)
                if filtering == True:
                    # Filter to only where A == 1 
                    valid_mask = A[zLevels, yLevels, xLevels] == 1
                    if not np.any(valid_mask):
                        continue 
                    zLevels_mask = zLevels[valid_mask]
                    yLevels_mask = yLevels[valid_mask]
                    xLevels_mask = xLevels[valid_mask]
                else:
                    zLevels_mask = zLevels
                    yLevels_mask = yLevels
                    xLevels_mask = xLevels
                results, valid_mask = CalculateAreaProfile(zLevels_mask, yLevels_mask, xLevels_mask, A, ModelData.dx, ModelData.dy)

                # Apply the valid_mask to zLevels
                zLevels_mask = zLevels_mask[valid_mask]
                results = results[valid_mask]
                
                # Also apply to left/right masks so they stay aligned
                mask_left_valid = mask_left[valid_mask]
                mask_right_valid = mask_right[valid_mask]
                ##################################################################################
                 
                 # --- MAIN appending (all parcels go here) ---
                np.add.at(profileArray[varName]["profile_array"][:, 0], zLevels_mask, results)
                np.add.at(profileArray[varName]["profile_array"][:, 1], zLevels_mask, 1)
                np.add.at(profileArray[varName]["profile_array_squares"][:, 0], zLevels_mask, results**2)
                np.add.at(profileArray[varName]["profile_array_squares"][:, 1], zLevels_mask, 1)
                
                # --- LEFT subset (-1 only) ---
                if np.any(mask_left_valid):
                    np.add.at(profileArray[varName]["profile_array_left"][:, 0], zLevels_mask[mask_left_valid], 
                              results[mask_left_valid])
                    np.add.at(profileArray[varName]["profile_array_left"][:, 1], zLevels_mask[mask_left_valid], 1)
                    np.add.at(profileArray[varName]["profile_array_left_squares"][:, 0], zLevels_mask[mask_left_valid], results[mask_left_valid]**2)
                    np.add.at(profileArray[varName]["profile_array_left_squares"][:, 1], zLevels_mask[mask_left_valid], 1)

                # --- RIGHT subset (+1 only) ---
                if np.any(mask_right_valid):
                    np.add.at(profileArray[varName]["profile_array_right"][:, 0], zLevels_mask[mask_right_valid], results[mask_right_valid])
                    np.add.at(profileArray[varName]["profile_array_right"][:, 1], zLevels_mask[mask_right_valid], 1)
                    np.add.at(profileArray[varName]["profile_array_right_squares"][:, 0], zLevels_mask[mask_right_valid], results[mask_right_valid]**2)
                    np.add.at(profileArray[varName]["profile_array_right_squares"][:, 1], zLevels_mask[mask_right_valid], 1)

    return profileArraysDictionary

In [None]:
########################################
#RUNNING

In [None]:
#Loading in Tracked Parcels Info
trackedArrays,LevelsDictionary = TrackedParcel_Loading_Class.LoadingSubsetParcelData(ModelData,DataManager,
                                                         Results_InputOutput_Class)

#Removing After Ascent Count for SHALLOW parcels
#Reason is there is a lot of shallow parcels
#that hit their peak below 4 km, but stay in the cloud in turbulent eddies and later exit at much higher levels
for key1, subdict in trackedArrays.items():
    subdict['SHALLOW'][:,3]=0

# Get Variable Names
varNames = GetVarNames(dataName)

In [None]:
for t in num_elements:
    print("#" * 40,"\n",f"Processing timestep {t}/{num_elements[-1]}")
    timeString = ModelData.timeStrings[t]

    #Forming Dictionary for Profile Arrays for current timestep
    trackedProfileArrays = CopyStructure(trackedArrays)
    profileArraysDictionary = InitializeProfileArrays(trackedProfileArrays,varNames)
    
    #getting variable data
    Z,Y,X = GetSpatialData_AllDimensions(t)

    #making tracked profiles
    profileArraysDictionary = MakeTrackedProfiles_UpdraftArea(trackedArrays,profileArraysDictionary,
                                                                  varNames,
                                                                  Z,Y,X,t)

    #saving tracked profiles for current timestep
    TrackedProfiles_DataLoading_CLASS.SaveProfile(ModelData,DataManager_TrackedProfiles, profileArraysDictionary, dataName, t)

In [None]:
#########################################
#RECOMBINE SEPERATE JOB_ARRAYS AFTER
recombine=False #KEEP FALSE WHEN JOBARRAY IS RUNNING
recombine=True

In [None]:
import copy
def RecombineProfiles(ModelData, DataManager):
    """
    Combine tracked profiles across all timesteps using the first as a template.
    """
    print(f"Recombining {ModelData.Ntime} TrackedProfiles files...\n")

    trackedProfileArrays = None

    for t in tqdm(range(ModelData.Ntime), desc="Combining Profiles", unit="timestep"):
        if dataName in ['Entrainment','PROCESSED_Entrainment'] and t == ModelData.Ntime-1:
            continue
        profileArraysDictionary = TrackedProfiles_DataLoading_CLASS.LoadProfile(ModelData, DataManager, dataName, t)

        if t == 0:
            # Deep copy structure so we donâ€™t overwrite the first timestepâ€™s data
            trackedProfileArrays = copy.deepcopy(profileArraysDictionary)
            continue  # move to next time step â€” skip accumulation for t=0

        # Add all later times to the initial one
        for key1 in profileArraysDictionary:
            for key2 in profileArraysDictionary[key1]:
                for varName in profileArraysDictionary[key1][key2]:
                    for arrayName in ["profile_array", "profile_array_squares",
                                      "profile_array_left", "profile_array_left_squares",
                                      "profile_array_right", "profile_array_right_squares"]:
                        trackedProfileArrays[key1][key2][varName][arrayName][:, 0:2] += (
                            profileArraysDictionary[key1][key2][varName][arrayName][:, 0:2]
                        )
    return trackedProfileArrays


In [None]:
if recombine==True:
    for dataName in ["UpdraftArea"]:
        print(f"Working on {dataName}")
        trackedProfileArrays = RecombineProfiles(ModelData, DataManager_TrackedProfiles)
        TrackedProfiles_DataLoading_CLASS.SaveProfile(ModelData,DataManager_TrackedProfiles, trackedProfileArrays, dataName, t='combined')

In [None]:
##############################################
#LOADING BACK IN

In [None]:
# profileArraysDictionary=TrackedProfiles_DataLoading_CLASS.LoadProfile(ModelData,DataManager_TrackedProfiles, dataName, t='combined')