In [41]:
####################################
#ENVIRONMENT SETUP

In [42]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py
from tqdm import tqdm

In [43]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [44]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [45]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [46]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

=== CM1 Data Summary ===
 Simulation #:   1
 Resolution:     1km
 Time step:      5min
 Vertical levels:34
 Parcels:        1e6
 Data file:      /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
 Parcel file:    /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc
 Time steps:     133

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/D

In [47]:
#data manager class (for saving data)
DataManager_TrackedProfiles = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracked_Profiles", dataName="Tracked_Profiles",
                                dtype='float32',codeSection = "Project_Algorithms")

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracked_Profiles
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ParcelData
 outputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracked_Profiles/1km_5min_34nz/Tracked_Profiles



In [48]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, Results_InputOutput_Class, TrackedParcel_Loading_Class

In [49]:
# IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","3_Tracked_Profiles"))
from CLASSES_TrackedProfiles import TrackedProfiles_DataLoading_CLASS

In [50]:
##############################################
#JOB ARRAY

In [51]:
#JOB ARRAY SETUP
UsingJobArray=True

def GetNumJobs(res):
    if res=='1km':
        num_jobs=20
    elif res=='250m': 
        num_jobs=100
    return num_jobs
num_jobs = GetNumJobs(ModelData.res)
SlurmJobArray = SlurmJobArray_Class(total_elements=ModelData.Ntime, num_jobs=num_jobs, UsingJobArray=UsingJobArray)
start_job = SlurmJobArray.start_job; end_job = SlurmJobArray.end_job

def GetNumElements():
    num_elements = np.arange(ModelData.Ntime)[start_job:end_job]
    return num_elements
num_elements = GetNumElements()

Running timesteps from 0:6 



In [52]:
##############################################
#DATA LOADING FUNCTIONS

In [145]:
def MakeDataDictionary(variableNames,t,printstatement=False):
    timeString = ModelData.timeStrings[t]
    # print(f"Getting data from {timeString}","\n")
    
    dataDictionary = {variableName: CallLagrangianArray(ModelData, DataManager, timeString, variableName=variableName, printstatement=printstatement) 
                      for variableName in variableNames}      
    return dataDictionary
    
def GetSpatialData(t):    
    variableNames = ['Z','Y','X']
    dataDictionary = MakeDataDictionary(variableNames,t)
    [Z,Y,X] = (dataDictionary[k] for k in variableNames)
    return Z,Y,X

def GetLangrangianBinaryArray(t):
    variableNames=['PROCESSED_A_g','PROCESSED_A_c']
    binaryDictionary = MakeDataDictionary(variableNames,t)
    
    A_g = binaryDictionary['PROCESSED_A_g']
    A_c = binaryDictionary['PROCESSED_A_c']

    return A_g,A_c

In [146]:
########################################
#RUNNING FUNCTIONS

In [147]:
#Functions for Initializing Profile Arrays
def CopyStructure(dictionary, placeholder=None):
    """Deep-copy dictionary structure, replacing leaves with a given placeholder."""
    if isinstance(dictionary, dict):
        return {k: CopyStructure(v, placeholder) for k, v in dictionary.items()}
    else:
        return placeholder

def InitializeProfileArrays(trackedArrays, varNames, zhs=ModelData.zh):
    """
    Create a new dictionary with the same nested structure as trackedArrays,
    and for each variable name, create:
        - 'profile_array' / 'profile_array_squares'
        - 'profile_array_left' / 'profile_array_left_squares'
        - 'profile_array_right' / 'profile_array_right_squares'
    Each array has shape (len(zhs), 3) and zhs in the last column.
    """
    profileArraysDictionary = {}

    for category, depth_dict in trackedArrays.items():  # e.g. 'CL', 'SBF'
        profileArraysDictionary[category] = {}

        for depth_type in depth_dict.keys():  # e.g. 'ALL', 'SHALLOW', 'DEEP'
            profileArraysDictionary[category][depth_type] = {}

            for varName in varNames:
                # Create base profile array
                base_profile = np.zeros((len(zhs), 3))
                base_profile[:, 2] = zhs

                profileArraysDictionary[category][depth_type][varName] = {
                    # Main / all parcels
                    "profile_array": base_profile.copy(),
                    "profile_array_squares": base_profile.copy(),

                    # Left subset (-1)
                    "profile_array_left": base_profile.copy(),
                    "profile_array_left_squares": base_profile.copy(),

                    # Right subset (+1)
                    "profile_array_right": base_profile.copy(),
                    "profile_array_right_squares": base_profile.copy(),
                }
    return profileArraysDictionary

In [148]:
def GetParcelNumbers(trackedArray, t):
    """
    Return all parcel indices (p) and their corresponding row indices
    for parcels that are active at time t.
    Vectorized, no row-by-row loops.
    """
    t_start = trackedArray[:, 1]
    t_end   = np.minimum(trackedArray[:, 2] + trackedArray[:, 3], ModelData.Ntime)

    # Boolean mask for rows active at time t
    mask = (t >= t_start) & (t <= t_end)

    # Extract parcel numbers and their corresponding row indices
    selectedRows = np.where(mask)[0]
    selectedPs = trackedArray[selectedRows, 0]
    leftRightIndexes = trackedArray[selectedRows, 4]

    return selectedRows, selectedPs, leftRightIndexes

In [235]:
def MakeTrackedProfiles(trackedArrays,profileArraysDictionary,varNames,VARs,Z,Y,X,t, A_g,A_c,A_g_Prior,A_c_Prior):
    """
    Update profileArraysDictionary with variable data for parcels active at time t.
    Accumulates sums and counts in both profile_array and profile_array_squares.
    """
    #CALCULATING
    for key1, subdict in trackedArrays.items():         # e.g. 'CL', 'SBF'
        print("\t",f'working on {key1}')
        for key2, trackedArray in subdict.items():           # e.g. 'ALL', 'DEEP'
            print("\t\t",f'working on {key2}')
            #loading the profile array to fill
            profileArray = profileArraysDictionary[key1][key2] 
    
            #getting parcels in trackedArray to run through
            _, selectedPs, leftRightIndexes = GetParcelNumbers(trackedArray, t) #get parcels that are counted at time t
            
            #getting Z data
            zLevels = Z[selectedPs]
            yLevels = Y[selectedPs]
            xLevels = X[selectedPs]

            #find which other parcels exist in each grid box
            # Step 1: compute spatial matches once
            gridboxMatches = [
                np.where((Z == zLevel) & (Y == yLevel) & (X == xLevel))[0]
                for zLevel, yLevel, xLevel in zip(zLevels, yLevels, xLevels)
            ]

            #find which of those parcels were entrained into a general/cloudy updraft
            
            # Step 2: compute entrainment masks
            mask_c = A_c & (~A_c_Prior)
            
            # Step 3: apply masks to find all parcels
            whereOtherEntrainedParcels_c = [
                idx[mask_c[idx]]
                for idx in gridboxMatches
            ]

            # Step 4: collapsing list of all found parcels
            collapsed = np.concatenate(whereOtherEntrainedParcels_c)

            # Step 5: track parcels back (last 30 minutes) and read properties
            # property_bins = Dictionary of bins for each variable (predetermined#*
            # time_bins = np.arange of number of timesteps from t to t_30minsago, #*

            ##########
            for varName, arr in VARs.items():

    
            ##########
            
            trackTimes = np.arange(t,(t-30*timestep_per_minute)+1,-1)
            for t_back in trackTimes:

                if t_back!=t:
                    # VARs = MakeDataDictionary(varNames, t_back) ==> W,QV,QCQI,TH #*

                #GETTING LAST TIME PARCEL IN CLOUD #*#*
                # np.where(QCQI[collapsed] >= 1e-6)
                # ==> those that are, 2d histogram of that time (or NaN) and the count and the value of QV
                Dictionary[key1][key2][f"detrainment_hist_2d"] += hist2d
                    
                ##########
                for varName, arr in VARs.items():

                    # property values at this time for these entrained parcels
                    properties = arr[collapsed]

                    #GETTING PROPERTY HISTOGRAMSS
                    
            
                    # time arrays for histogram2d
                    times = np.full(properties.shape, t_back)

                    # 2D histograms (time × property)
                    hist2d, _, _ = np.histogram2d(
                        times,
                        properties,
                        bins=(time_bins, prop_bins)
                    )
                    
                    # ==> Make sure the dictionary has the varName keys in it #*
                    Dictionary[key1][key2][f"{varName}_hist2d"] += hist2d
                ##########

            
            return []


In [200]:
########################################
#RUNNING

In [219]:
#Loading in Tracked Parcels Info
trackedArrays,LevelsDictionary = TrackedParcel_Loading_Class.LoadingSubsetParcelData(ModelData,DataManager,
                                                         Results_InputOutput_Class)

varNames = ["W",'"QV","QCQI","THETA_v"]

CL: ALL=12239, SHALLOW=8447, DEEP=369
nonCL: ALL=10043, SHALLOW=7614, DEEP=181
SBF: ALL=1905, SHALLOW=943, DEEP=139
ColdPool: ALL=10334, SHALLOW=7504, DEEP=230
Mean Cloudbase is: 1.45 km

Min Cloudbase is: 1.25 km

Mean LFC is: 1.93 km

Mean LCL is: 1.79 km

Min LFC is: 1.45 km

Min LCL is: 1.37 km



In [220]:
for t in tqdm(num_elements, desc="Processing"):
    t=100 #*
    print("#" * 40,"\n",f"Processing timestep {t}/{num_elements[-1]}")
    timeString = ModelData.timeStrings[t]

    #Forming Dictionary for Profile Arrays for current timestep
    trackedProfileArrays = CopyStructure(trackedArrays)
    profileArraysDictionary = InitializeProfileArrays(trackedProfileArrays,varNames)
    
    #getting variable data
    VARs = MakeDataDictionary(varNames, t)
    Z,Y,X = GetSpatialData(t)
    A_g,A_c = GetLangrangianBinaryArray(t)
    A_g_Prior,A_c_Prior = GetLangrangianBinaryArray(t-1)
    
    #making tracked profiles
    profileArraysDictionary = MakeTrackedProfiles(trackedArrays,profileArraysDictionary,varNames,VARs,Z,Y,X,t, A_g,A_c,A_g_Prior,A_c_Prior)
    break #*
    
    # #saving tracked profiles for current timestep
    # TrackedProfiles_DataLoading_CLASS.SaveProfile(ModelData,DataManager_TrackedProfiles, profileArraysDictionary, dataName, t)

Processing:   0%|          | 0/7 [00:00<?, ?it/s]

######################################## 
 Processing timestep 100/6
	 working on CL
		 working on ALL


Processing:   0%|          | 0/7 [00:00<?, ?it/s]


In [None]:
#########################################
#RECOMBINE SEPERATE JOB_ARRAYS AFTER
recombine=False #KEEP FALSE WHEN JOBARRAY IS RUNNING
# recombine=True

In [None]:
import copy
def RecombineProfiles(ModelData, DataManager):
    """
    Combine tracked profiles across all timesteps using the first as a template.
    """
    print(f"Recombining {ModelData.Ntime} TrackedProfiles files...\n")

    trackedProfileArrays = None

    for t in tqdm(range(ModelData.Ntime), desc="Combining Profiles", unit="timestep"):
        if dataName in ['Entrainment','PROCESSED_Entrainment'] and t == ModelData.Ntime-1:
            continue
        profileArraysDictionary = TrackedProfiles_DataLoading_CLASS.LoadProfile(ModelData, DataManager, dataName, t)

        if t == 0:
            # Deep copy structure so we don’t overwrite the first timestep’s data
            trackedProfileArrays = copy.deepcopy(profileArraysDictionary)
            continue  # move to next time step — skip accumulation for t=0

        # Add all later times to the initial one
        for key1 in profileArraysDictionary:
            for key2 in profileArraysDictionary[key1]:
                for varName in profileArraysDictionary[key1][key2]:
                    for arrayName in ["profile_array", "profile_array_squares",
                                      "profile_array_left", "profile_array_left_squares",
                                      "profile_array_right", "profile_array_right_squares"]:
                        trackedProfileArrays[key1][key2][varName][arrayName][:, 0:2] += (
                            profileArraysDictionary[key1][key2][varName][arrayName][:, 0:2]
                        )
    return trackedProfileArrays


In [None]:
if recombine==True:
    for dataName in ["Variables",
                     "Entrainment","PROCESSED_Entrainment",
                     "W_Budgets","QV_Budgets","TH_Budgets"]:
        print(f"Working on {dataName}")
        trackedProfileArrays = RecombineProfiles(ModelData, DataManager_TrackedProfiles)
        TrackedProfiles_DataLoading_CLASS.SaveProfile(ModelData,DataManager_TrackedProfiles, trackedProfileArrays, dataName, t='combined')

In [None]:
##############################################
#LOADING BACK IN

In [None]:
# profileArraysDictionary=TrackedProfiles_DataLoading_CLASS.LoadProfile(ModelData,DataManager_TrackedProfiles, dataName, t='combined')

In [None]:
##############################################
#TESTING

In [None]:
# #testing: comparing above vectorized version to multi-loop version below

# #pure calculation method (slow)
# t=80

# a = trackedArrays['CL']['DEEP']
# ps = a[:,0]
# t1s = a[:,1]
# t2s = a[:,2]+a[:,3]

# profile=np.zeros((ModelData.Nzh,3))
# profile[:,2] = ModelData.zh

# plen = len(ps)
# for count, (p,t1,t2) in enumerate(zip(ps,t1s,t2s)):
#     if count % 100 == 0: print(f"{count}/{plen}")
#     ts = np.arange(t1, t2+1)
    
#     for t_ in ts:
#         if t_ != t:
#             continue
#         Z = GetSpatialData(t)[p]
#         var = MakeDataDictionary(["W"], t=t)["W"][p]
#         profile[Z,0]+=var
#         profile[Z,1]+=1


# #plotting

# def Plot(a,color):
#     b= a[:,0]/a[:,1]
#     # b*=1000
#     plt.plot(b,a[:,2],color=color)
# one = profileArraysDictionary['CL']['DEEP']['W']['profile_array'] #created using MakeTrackedProfiles function
# two = profile.copy()
# Plot(one,color='black')
# Plot(two,color='blue') #plots are the same
# np.all(one[:,0]==two[:,0]) #=True