In [80]:
####################################
#ENVIRONMENT SETUP

In [81]:
#Importing Libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

from tqdm import tqdm

from glob import glob

In [82]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [83]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [84]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

=== CM1 Data Summary ===
 Simulation #:   1
 Resolution:     1km
 Time step:      5min
 Vertical levels:34
 Parcels:        1e6
 Data file:      /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
 Parcel file:    /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc
 Time steps:     133

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/D

In [85]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [86]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, SlurmJobArray_Class, Results_InputOutput_Class, TrackedParcel_Loading_Class

In [87]:
##############################################
#SETUP

In [88]:
################################
#JOB ARRAY SETUP
################################
# how many total jobs are being run? i.e. array=1-100 ==> num_jobs=100
if '1e6' in ModelData.Np_str:
    num_jobs=60 #1M parcels
    num_slurm_jobs=10
if '50e6' in ModelData.Np_str:
    num_jobs=200 #50M parcels
    num_slurm_jobs=60
##############################

In [89]:
##############################################
#MODEL AND ALGORITHM NUMERICAL PARAMETERS

In [90]:
times=ModelData.time/(1e9 * 60); times=times.astype(float);
minutes=1/times[1] #1 / minutes per timestep = timesteps per minute
kms=np.argmax(ModelData.xh-ModelData.xh[0] >= 1)

In [91]:
##############################################
#DATA LOADING

In [14]:
directory = f"/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/LagrangianArrays/{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz/Lagrangian_Binary_Array/"
Lagrangian_Binary_Array_Data,files = OpenMultipleSingleTimes_LagrangianArray(directory, ModelData)

In [15]:
directory = f"/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/LagrangianArrays/{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz/LFC/"

LFC_LCL_Data,files = OpenMultipleSingleTimes_LagrangianArray(directory, ModelData,pattern="LFC_*.h5")

In [92]:
##############################################
#DATA LOADING FUNCTIONS

In [93]:
def GetSpatialData(Lagrangian_Binary_Array_Data, start_job,end_job):
    W = Lagrangian_Binary_Array_Data['W'].isel(p=slice(start_job,end_job)).data.compute()
    QCQI = Lagrangian_Binary_Array_Data['QCQI'].isel(p=slice(start_job,end_job)).data.compute()
    Z = Lagrangian_Binary_Array_Data['Z'].isel(p=slice(start_job,end_job)).data.compute()
    Y = Lagrangian_Binary_Array_Data['Y'].isel(p=slice(start_job,end_job)).data.compute()
    X = Lagrangian_Binary_Array_Data['X'].isel(p=slice(start_job,end_job)).data.compute()

    parcel_z = Lagrangian_Binary_Array_Data['z'].isel(p=slice(start_job,end_job)).data.compute()

    return W,QCQI,Z,Y,X,parcel_z

In [94]:
def GetLFCData(LFC_LCL_Data, start_job,end_job):
    LFC = LFC_LCL_Data['LFC'].isel(p=slice(start_job,end_job)).data.compute()
    LCL = LFC_LCL_Data['LCL'].isel(p=slice(start_job,end_job)).data.compute()
    return LFC,LCL

In [95]:
#################################
#MORE FUNCTIONS

In [96]:
#DOMAIN SUBSETTING 
#finding time subset
def GetTimeSubset(noSubset=False):
    if noSubset == True:
        t_start=0; t_end=ModelData.Ntime+1
        # print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    else:
        dt=ModelData.time[1].item()/1e9 #seconds per timestep
        dhours=(dt/60**2) #hours per timestep
    
        #Finding Left Boundary
        start_hour=4 #10:00 am
        t_start=int(start_hour/dhours)
        
        #Finding Right Boundary
        end_hour=11 #5pm
        t_end=int(end_hour/dhours)+1
        
        #printing
        # print(f't in {t_start}:{t_end}')
        tSubset = np.arange(t_start,t_end+1)
    return tSubset

# def GetZSubset(noSubset=True): #(not in use)
#     if noSubset == True:
#         zh_start=0; zh_end=ModelData.Nzh
#         zf_start=0; zf_end=ModelData.Nzf
#     else:
#         #Finding Boundarys
#         zhs=ModelData.zh
#         zh_start=0; zh_end=int(np.where(zhs>=19)[0][0])
#         zfs=ModelData.zf
#         zf_start=0; zf_end=int(np.where(zfs>=20)[0][0])

#     print(f'zh in {zh_start}:{zh_end}'+f', zf in {zf_start}:{zf_end}')
#     zhSubset = np.arange(zh_start,zh_end+1)
#     zfSubset = np.arange(zf_start,zf_end+1)
#     return zhSubset, zfSubset

# def GetYSubset(noSubset=True): #(not in use)

def GetXSubset(noSubset=False):

    xh = ModelData.xh - ModelData.xh[0] 
    xf = ModelData.xf - ModelData.xf[0]
    xh_max = xh[-1] #total physical length
    xf_max = xf[-1]

    if noSubset == True:
        xh_start=0; xh_end = xh_max+1
        xf_start=0; xf_end = xf_max+1
    else:
        #Finding Left Boundary
        ocean_percent=0.25
        left_to_coast=ModelData.xh[0]+(ModelData.xh[-1]-ModelData.xh[0])*ocean_percent
        xh_start=np.where(ModelData.xh>=left_to_coast)[0][0]
        xf_start=np.where(ModelData.xf>=left_to_coast)[0][0]
        
        #Finding Right Boundary
        right_fraction=80/100
        
        # Find index where physical location exceeds 80% of domain
        xh_end = np.where(xh > right_fraction * xh_max)[0][0]+1
        xf_end = np.where(xf > right_fraction * xf_max)[0][0]+1
           
    # print(f'x in {xh_start}:{xh_end} (from coast to 80% of domain, ocean excluded)')
    xhSubset = (xh_start,xh_end+1)
    xfSubset = (xf_start,xf_end+1)
    return xhSubset, xfSubset

In [97]:
#DOMAIN SUBSETTING
def DOMAIN_SUBSET(out_arr,index_adjust, X):
    print(f'length before: {len(out_arr)}')

    tSubset = GetTimeSubset(noSubset=False) #used in recombining code at the bottom
    xSubset = GetXSubset(noSubset=False)
    #SUBSETTING CODE
    
    ################
    ts,ps=out_arr[:,1],out_arr[:,0]

    #GETTING X VALUES OF EACH PARCEL 
    xs=X[ts,ps-index_adjust]

    #GETTING SUBSET CONDITIONS
    cond1=(xs>=xSubset[0][0])&(xs<=xSubset[0][1])
    cond2=(out_arr[:,1]>=tSubset[0])&(out_arr[:,1]<=tSubset[-1])
    combined_conds=cond1&cond2

    #SUBSETTING
    where=np.where(combined_conds)
    out_arr=out_arr[where]

    print(f'==> length after: {len(out_arr)}'+'\n')
    return out_arr

In [98]:
def load_file():
    Dictionary = Results_InputOutput_Class.LoadOutFile(ModelData, DataManager, job_id="combined")
    out_arr = Dictionary['out_arr']
    save_arr = Dictionary['save_arr']
    save2_arr = Dictionary['save2_arr']
    return out_arr,save_arr,save2_arr

def GetALLArrays_CL(start_job,end_job,index_adjust, X, subset=True):
    #loading back in
    [out_arr,save_arr,save2_arr]=load_file()
    
    # print(np.where(np.all(out_arr==0,axis=1))) #testing
    # print(f'there are a total of {len(out_arr)} CL parcels and {len(save_arr)} nonCL parcels'+'\n')
    
    #applying job_array to parcel number
    ####################################
    print('Applying Job Array')
    out_arr=SlurmJobArray_Class.job_filter(out_arr, start_job,end_job)
    save_arr=SlurmJobArray_Class.job_filter(save_arr, start_job,end_job)

    # print(np.where(np.all(out_arr==0,axis=1))) #testing
    # print(f'there are a total of {len(out_arr)} CL parcels and {len(save_arr)} nonCL parcels'+'\n')
    
    #CHOOSING UNIQUE INDEXES (just in case)
    ###############################################################################
    def remove_duplicates(arr):
        lst = []
        unique_values, counts = np.unique(arr[:, 0], return_counts=True)
        duplicates = unique_values[counts > 1]
        for elem in duplicates:
            idx = np.where(arr[:, 0] == elem)[0]
            extras = idx[np.where(arr[idx, 1] != np.min(arr[idx, 1]))]
            lst.extend(extras)
        mask = np.ones(len(arr), dtype=bool)
        mask[lst] = False
        return arr[mask]
    out_arr=remove_duplicates(out_arr)
    save_arr=remove_duplicates(save_arr)
    ###############################################################################
    # print(np.where(np.all(out_arr==0,axis=1))) #TESTING
    
    ############################################################ 
    #SUBSETTING
    if subset==True:
        out_arr_subset=DOMAIN_SUBSET(out_arr,index_adjust, X)
        save_arr_subset=DOMAIN_SUBSET(save_arr,index_adjust, X)
    ############################################################
    # print(np.where(np.all(out_arr==0,axis=1))) #TESTING
    
    ALL_out_arr=out_arr_subset.copy(); ALL_save_arr=save_arr_subset.copy()
    return ALL_out_arr,ALL_save_arr

In [99]:
def ddt(f,dt=1):
    ddx = (
            f[1:  ]
            -
            f[0:-1]
        ) / (
        2 * dt
    )
    return ddx

#search for deep convective parcels within lagrangian tracking output     
##############################################################
def ThresholdFilter(out_arr,parcel_z, zthresh,index_adjust,mode="SHALLOW"):
    
    out_ind=[];
    for ind in range(len(out_arr)): 
        # if np.mod(ind,5000)==0: print(f'{ind}/{len(out_arr)}')
        #CHECK TO SSEE IF NEXT MOST LOCAL TIME MAX GOES ABOVE ZTHRESHS 

        #Get Ascending Range Past LFC For Maximum 120 Minutes Simulation Time
        nummins=120; numsteps=int(nummins/times[1])
        aboverange=np.arange(out_arr[ind,2],out_arr[ind,2]+numsteps,1) #range of times between current time and numsteps later
        aboverange=aboverange[aboverange<ModelData.Ntime] #caps out at max time
        above=parcel_z[aboverange,out_arr[ind,0]-index_adjust]/1000

        #CALCULUS LOCAL MAX ALGORITHM
        #Takes The time derivative 
        ddx=ddt(above)

        #Checks whether the Local Time Max Is Located Above zthresh
        signs = np.sign(ddx)
        signs_diff=np.diff(signs)
        local_maxes=np.where((signs_diff != 0) & (signs_diff < 0))[0]+1 #make sure +1 is here
        if len(local_maxes)==0:
            local_maxes=[0]
        max_z = above[local_maxes[0]] #maximum z of parcel

        #Apply threshold condition based on mode (in SHALLOW or DEEP)
        if mode.lower() == "deep":
            condition = np.any(max_z >= zthresh)
        elif mode.lower() == "shallow":
            condition = np.any(max_z <= zthresh)
        else:
            raise ValueError(f"Invalid mode '{mode}'. Use 'deep' or 'shallow'.")

        if condition:
            out_ind.append(ind)

    #SUBSET OUT FOR FINAL RESULT
    out_arr=out_arr[out_ind,:]
    # print(f'> {zthresh} km. {len(out_arr)} leftover parcels')
    return out_arr

In [100]:
#CL and NONCL

#SHALLOW
def GetSHALLOWArrays_CL(ALL_out_arr,ALL_save_arr,parcel_z,index_adjust):
    convectivelevel=4 #4km
    SHALLOW_out_arr=ThresholdFilter(ALL_out_arr,parcel_z,convectivelevel,index_adjust,mode='SHALLOW') #nonCL
    SHALLOW_save_arr=ThresholdFilter(ALL_save_arr,parcel_z,convectivelevel,index_adjust,mode='SHALLOW') #nonCL
    return SHALLOW_out_arr,SHALLOW_save_arr

#DEEP
def GetDEEPArrays_CL(ALL_out_arr,ALL_save_arr,parcel_z,index_adjust):    
    convectivelevel=6 #6km
    DEEP_out_arr=ThresholdFilter(ALL_out_arr,parcel_z,convectivelevel,index_adjust,mode='DEEP') #CL
    DEEP_save_arr=ThresholdFilter(ALL_save_arr,parcel_z,convectivelevel,index_adjust,mode='DEEP') #nonCL
    return DEEP_out_arr,DEEP_save_arr

In [101]:
# SBF and nonSBF

#getting convergence xmax
def Get_AvgConvergence(t):

    timeString = ModelData.timeStrings[t]
    outputDataDirectory=os.path.normpath(os.path.join(DataManager.outputDataDirectory,"..","Eulerian_CLTracking"))
    Dictionary = TrackingAlgorithms_DataLoading_Class.LoadData(ModelData, DataManager, timeString,
                     dataName="Eulerian_CLTracking",outputDataDirectory=outputDataDirectory,printstatement=False)
    avgConvergence = Dictionary["avgConvergence"]
    return avgConvergence
    
def find_SBF_xmaxs():
    xmaxs=[]
    for t in range(ModelData.Ntime)[1:]:
        if t == 0:
            avgConvergence_max=np.nan
        else:
            avgConvergence = Get_AvgConvergence(t)
            avgConvergence_max=np.max(avgConvergence)
            xmax = np.where(avgConvergence==avgConvergence_max)[0][0]
            xmaxs.append(xmax)
    return xmaxs
xmaxs=find_SBF_xmaxs()

#subsetting SBF parcels
def subset_SBF(out_arr, X, index_adjust):
    SBF_subset=[]
    
    for ind in np.arange(out_arr.shape[0]):
        
        row=out_arr[ind]
        p=row[0]
        t=row[1]

        #checked if parcel is initially within 10 km of the SBF
        if X[t,p-index_adjust] in np.arange( (xmaxs[t]-10*kms),(xmaxs[t]+10*kms) +1): 
            SBF_subset.append(ind)
    
    SBF_out_arr=out_arr[SBF_subset]
    print(f'there are a total of {len(SBF_out_arr)} ALL SBF CL parcels')

    valid_range=np.arange(out_arr.shape[0])
    nonSBF_out_arr=out_arr[list(set(valid_range) - set(SBF_subset))]
    print(f'there are a total of {len(nonSBF_out_arr)} ALL nonSBF CL parcels')
    return SBF_out_arr,nonSBF_out_arr
        
def GetArrays_SBF(ALL_out_arr,parcel_z, 
                  X, index_adjust):

    #SUBSETTING OUT SHALLOW AND DEEP FROM SBF AND NONSBF
    [ALL_SBF_out_arr,ALL_nonSBF_out_arr]=subset_SBF(ALL_out_arr, X, index_adjust)
    SHALLOW_SBF_out_arr=ThresholdFilter(ALL_SBF_out_arr,parcel_z,4,index_adjust,mode='SHALLOW')
    SHALLOW_nonSBF_out_arr=ThresholdFilter(ALL_nonSBF_out_arr,parcel_z,4,index_adjust,mode='SHALLOW')
    DEEP_SBF_out_arr=ThresholdFilter(ALL_SBF_out_arr,parcel_z,6,index_adjust,mode='DEEP')
    DEEP_nonSBF_out_arr=ThresholdFilter(ALL_nonSBF_out_arr,parcel_z,6,index_adjust,mode='DEEP')

    return ALL_SBF_out_arr,ALL_nonSBF_out_arr,SHALLOW_SBF_out_arr,SHALLOW_nonSBF_out_arr,DEEP_SBF_out_arr,DEEP_nonSBF_out_arr

In [102]:
def get_ColdPool(out_arr1,out_arr2):
    arr1 = out_arr1[:,0] #CL
    arr2 = out_arr2[:,0] #nonSBF
    common_values = np.intersect1d(arr1, arr2)
    indices_arr1 = np.where(np.isin(arr1, common_values))[0]  # Indices in arr1
    ColdPool_out_arr=out_arr1[indices_arr1]
    return ColdPool_out_arr

In [103]:
counts=[[],[],[],[],[],[]]
def AddCounts(counts,ALL_out_arr,SHALLOW_out_arr,DEEP_out_arr,ALL_save_arr,SHALLOW_save_arr,DEEP_save_arr, job_id):
    counts[0].append(ALL_out_arr.shape[0])
    counts[1].append(SHALLOW_out_arr.shape[0])
    counts[2].append(DEEP_out_arr.shape[0])
    
    counts[3].append(ALL_save_arr.shape[0])
    counts[4].append(SHALLOW_save_arr.shape[0])
    counts[5].append(DEEP_save_arr.shape[0])
    return counts

In [104]:
#Additional Columns Functions

#after ascent arrays
def compute_after_arrays(data_dict, W, QCQI, index_adjust):
    def find_after_time(out_arr):
        wthresh = 0.5
        qcqithresh = 1e-6
        after_array = np.zeros(len(out_arr), dtype=int)
        for count, out_row in enumerate(out_arr):
            p = out_row[0]
            t2 = out_row[2]
            #find where parcel exits cloudy updraft
            after = np.where((W[t2:, p - index_adjust] < wthresh) |
                             (QCQI[t2:, p - index_adjust] < qcqithresh))
            if len(after[0]) != 0:
                after_array[count] = after[0][0]
        return after_array

    # apply to each dataset
    after_dict = {}
    for key, arr in data_dict.items():
        after_dict[key.replace('_arr', '_after_array')] = find_after_time(arr)
        ## if "SHALLOW" in key: #old method giving no after_time to shallow parcels (not recommended)
        ##     after_dict[key.replace('_arr', '_after_array')] = np.zeros(len(arr), dtype=int) 
        ## else:
        ##     after_dict[key.replace('_arr', '_after_array')] = find_after_time(arr)
    return after_dict

def AddColumn_AfterAscent(data_dict,after_dict):
    for (key1,key2) in zip(data_dict,after_dict):
        data_dict[key1][:, 3] = after_dict[key2]
    return data_dict

#SBF left of right flag arrays
def compute_SBFLeftRight(data_dict, X, xmaxs, index_adjust):
    def find_SBFLeftRight(out_arr):
        SBFLeftRight_array = np.zeros(len(out_arr), dtype=int)
        for count, out_row in enumerate(out_arr):
            #getting indexes
            p = out_row[0]
            t1 = out_row[1]
    
            #get X data
            xPosition = X[t1,p - index_adjust]
            xmax_T = xmaxs[t1]
    
            # SBFLeftRight_array[count] = -1 if xPosition < xmax_T else 1
            SBFLeftRight_array[count] = -1 if xPosition < xmax_T else (1 if xPosition > xmax_T else 0)
        return SBFLeftRight_array
    
    # apply to each dataset
    SBFLeftRight_dict = {}
    for key, arr in data_dict.items():
        SBFLeftRight_dict[key.replace('_arr', '_SBFLeftRight_array')] = find_SBFLeftRight(arr)
    return SBFLeftRight_dict

def AddColumn_SBFLeftRight(data_dict,SBFLeftRight_dict):
    for (key1, key2) in zip(data_dict, SBFLeftRight_dict):
        data_dict[key1][:, 4] = SBFLeftRight_dict[key2]
    return data_dict

In [105]:
def get_mean_cloud_base(out_arr, Z, Y, X, W, QCQI, index_adjust):
    #FINDING MEAN CLOUD BASE 
    zhs=ModelData.zh
    w_thresh2=0.5
    qcqi_thresh=1e-6
    type='all'
    
    profile_array =np.zeros((len(zhs), 2)) #column 1: var, column 2: counter, column 3: list of zhs
    profile_array[:,1]=zhs;
    
    # cloudbase_lst=[]
    after=4 #20 minutes
    for row in range(out_arr.shape[0]):
        if np.mod(row,3000)==0: print(f'{row}/{out_arr.shape[0]}')
        p=out_arr[row,0]
        
        # ts=np.arange(out_arr[row,4],out_arr[row,5]+1 + after)
        ts_end = min(out_arr[row, 2] + 1 + after, ModelData.Ntime) #this takes care of exceeding buffers
        ts = np.arange(out_arr[row, 1], ts_end)
        
        zs=Z[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        ys=Y[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        xs=X[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
    
        ws=W[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        qcqis=QCQI[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        where=np.where((ws>=w_thresh2) & (qcqis>=qcqi_thresh))
        profile_array[zs[where],0]+=1
    
    # all_cloudbase=zhs[np.where(profile_array[:,0]!=0)[0][0]]
    nonzero_indices = np.where(profile_array[:, 0] != 0)[0]
    if len(nonzero_indices) > 0:
        all_cloudbase = zhs[nonzero_indices[0]]
    else:
        all_cloudbase = np.nan
    return all_cloudbase

In [106]:
def get_mean_LFC(out_arr, Z, Y, X, LFC, index_adjust):
    #FINDING MEAN CLOUD BASE 
    zhs=ModelData.zh
    w_thresh2=0.5
    qcqi_thresh=1e-6
    type='all'
    
    lfc_array =np.zeros((1, 2)) #column 1: var, column 2: counter, column 3: list of zhs
    Mean_LFC_array = []
    
    # cloudbase_lst=[]
    after=4 #20 minutes
    for row in range(out_arr.shape[0]):
        if np.mod(row,3000)==0: print(f'{row}/{out_arr.shape[0]}')
        p=out_arr[row,0]
        
        # ts=np.arange(out_arr[row,4],out_arr[row,5]+1 + after)
        ts_end = min(out_arr[row, 2] + 1 + after, ModelData.Ntime) #this takes care of exceeding buffers
        ts = np.arange(out_arr[row, 1], ts_end)
        
        zs=Z[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        ys=Y[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
        xs=X[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST
    
        lfcs=LFC[ts,p-index_adjust] #JOBARRAY INDEX_ADJUST #*******
        lfcs=lfcs[lfcs>0]
        lfc_array[0,0]+=np.sum(lfcs);lfc_array[0,1]+=len(lfcs)

        Mean_LFC = lfc_array[0,0]/ lfc_array[0,1]
        Mean_LFC_array.append(Mean_LFC)
    return Mean_LFC_array

In [107]:
################################################
#RUNNING

In [108]:
def RunCode(job_id_list):
    count_dict = {key: [] for key in [
        'CL_ALL_out_arr', 'CL_SHALLOW_out_arr', 'CL_DEEP_out_arr',
        'nonCL_ALL_out_arr', 'nonCL_SHALLOW_out_arr', 'nonCL_DEEP_out_arr',
    
        'SBF_ALL_out_arr', 'nonSBF_ALL_out_arr',
        'SBF_SHALLOW_out_arr', 'nonSBF_SHALLOW_out_arr',
        'SBF_DEEP_out_arr', 'nonSBF_DEEP_out_arr',
    
        'ColdPool_ALL_out_arr', 'ColdPool_SHALLOW_out_arr',
        'ColdPool_DEEP_out_arr'
    ]}
    
    all_cloudbase=[]
    for job_id in job_id_list:
        if job_id % 10: print(f"current job_id: {job_id}")

        #starting jobarray
        [start_job, end_job, index_adjust] = SlurmJobArray_Class.StartJobArray(ModelData,job_id,num_jobs)

        print("getting variables")
        [W, QCQI, Z, Y, X, parcel_z] = GetSpatialData(Lagrangian_Binary_Array_Data, start_job,end_job)
        LFC,LCL = GetLFCData(LFC_LCL_Data, start_job,end_job)
    
        #CL and nonCL
        print("subsetting CL and nonCL")
        [CL_ALL_out_arr, nonCL_ALL_out_arr] = GetALLArrays_CL(start_job,end_job,index_adjust, X)
        [CL_SHALLOW_out_arr, nonCL_SHALLOW_out_arr] = GetSHALLOWArrays_CL(CL_ALL_out_arr, nonCL_ALL_out_arr,parcel_z,index_adjust)
        [CL_DEEP_out_arr, nonCL_DEEP_out_arr] = GetDEEPArrays_CL(CL_ALL_out_arr, nonCL_ALL_out_arr,parcel_z,index_adjust)
    
        #SBF and nonSBF 
        print("subsetting SBF and nonSBF")
        [SBF_ALL_out_arr, nonSBF_ALL_out_arr,
         SBF_SHALLOW_out_arr, nonSBF_SHALLOW_out_arr,
         SBF_DEEP_out_arr, nonSBF_DEEP_out_arr] = GetArrays_SBF(CL_ALL_out_arr,parcel_z, 
                                                                X, index_adjust)
        
        # ColdPool
        print("subsetting ColdPool")
        ColdPool_ALL_out_arr = get_ColdPool(CL_ALL_out_arr, nonSBF_ALL_out_arr)
        ColdPool_SHALLOW_out_arr = get_ColdPool(CL_SHALLOW_out_arr, nonSBF_SHALLOW_out_arr)
        ColdPool_DEEP_out_arr = get_ColdPool(CL_DEEP_out_arr, nonSBF_DEEP_out_arr)
    
        # Create a dictionary of arrays to save (including SBF arrays)
        data_dict = {
            'CL_ALL_out_arr': CL_ALL_out_arr,
            'CL_SHALLOW_out_arr': CL_SHALLOW_out_arr,
            'CL_DEEP_out_arr': CL_DEEP_out_arr,
            'nonCL_ALL_out_arr': nonCL_ALL_out_arr,
            'nonCL_SHALLOW_out_arr': nonCL_SHALLOW_out_arr,
            'nonCL_DEEP_out_arr': nonCL_DEEP_out_arr,
        
            'SBF_ALL_out_arr': SBF_ALL_out_arr, 
            'nonSBF_ALL_out_arr': nonSBF_ALL_out_arr,
            'SBF_SHALLOW_out_arr': SBF_SHALLOW_out_arr,
            'nonSBF_SHALLOW_out_arr': nonSBF_SHALLOW_out_arr,
            'SBF_DEEP_out_arr': SBF_DEEP_out_arr,
            'nonSBF_DEEP_out_arr': nonSBF_DEEP_out_arr,
        
            'ColdPool_ALL_out_arr': ColdPool_ALL_out_arr,
            'ColdPool_SHALLOW_out_arr': ColdPool_SHALLOW_out_arr,
            'ColdPool_DEEP_out_arr': ColdPool_DEEP_out_arr
        }

        print('storing after ascent arrays')
        #ADDING ANOTHER COLUMN TO STORE THE AFTER ARRAYS and SBF_left_right
        for key, arr in data_dict.items():
            new_column = np.zeros((arr.shape[0], 2), dtype=int) #adds two columns
            data_dict[key] = np.hstack((arr, new_column))
    
        # Compute after-arrays
        after_dict = compute_after_arrays(data_dict, W, QCQI, index_adjust)
        # Adding to Fourth Column
        data_dict=AddColumn_AfterAscent(data_dict,after_dict)

        # Compute SBF_leftright flags
        SBFLeftRight_dict = compute_SBFLeftRight(data_dict, X, xmaxs, index_adjust)
        data_dict = AddColumn_SBFLeftRight(data_dict,SBFLeftRight_dict)

        #GETTING THE COUNT
        print('computing count')
        for key in count_dict:
            count_dict[key].append(data_dict[key].shape[0])
    
        #GETTING CLOUDBASE ZLEVEL
        print('computing cloudbase')
        cloudbase=get_mean_cloud_base(CL_ALL_out_arr, Z, Y, X, W, QCQI, index_adjust)
        all_cloudbase.append(cloudbase)
        print(all_cloudbase) #*#*
    
        #GETTING LFC PROFILE
        print('computing LFC and LCL')
        LFC_profile=get_mean_LFC(CL_ALL_out_arr, Z, Y, X, LFC, index_adjust)
        LCL_profile=get_mean_LFC(CL_ALL_out_arr, Z, Y, X, LCL, index_adjust)
        
        # Call SaveData with the dictionary
        print('saving')
        Results_InputOutput_Class.SaveAllCloudBase_Job(ModelData,DataManager, all_cloudbase,job_id)
        Results_InputOutput_Class.SaveLFC_Profile_Job(ModelData,DataManager, LFC_profile,job_id, Ltype="LFC")
        Results_InputOutput_Class.SaveLFC_Profile_Job(ModelData,DataManager, LCL_profile,job_id, Ltype="LCL")
        Dictionary = {**data_dict, **after_dict}
        Results_InputOutput_Class.SaveOutFile(ModelData,DataManager, Dictionary,f"{job_id}_SUBSET")

        # if job_id == job_id_list[0]: break #testing
        # if job_id == job_id_list[1]: break #testing
    combined_counts={key: sum(counts) for key, counts in count_dict.items()}
    print(f"combined_counts = {combined_counts}")

    return Dictionary
    # return Dictionary, X, index_adjust #TESTING

In [60]:
#starting job arrays
[start_slurm_job,end_slurm_job]=SlurmJobArray_Class.StartSlurmJobArray(num_jobs=num_jobs,num_slurm_jobs=num_slurm_jobs,ISRUN=True) #if ISRUN is False, then will not run using slurm_job_array
print(f"Running on Slurm_Jobs for Slurm_Job_Ids: {(start_slurm_job,end_slurm_job-1)}")
job_id_list=np.arange(start_slurm_job,end_slurm_job)

#running algorithm
StartTime = time.time()
Dictionary = RunCode(job_id_list)
EndTime = time.time(); ElapsedTime = EndTime - StartTime; print(f"Total Elapsed Time: {ElapsedTime} seconds") 

Running on Slurm_Jobs for Slurm_Job_Ids: (1, 5)
current job_id: 1
getting variables
subsetting CL and nonCL
Applying Job Array
length before: 223
==> length after: 189

length before: 247
==> length after: 157

subsetting SBF and nonSBF
there are a total of 25 ALL SBF CL parcels
there are a total of 164 ALL nonSBF CL parcels
subsetting ColdPool
storing after ascent arrays
computing count
computing cloudbase
0/189
[np.float32(1.2463868)]
computing LFC and LCL
0/189
0/189
saving
Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms/1km_5min_34nz/Lagrangian_UpdraftTracking/Lagrangian_UpdraftTracking_1km_5min_34nz_job1_all_cloudbase.h5 

Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms/1km_5min_34nz/Lagrangian_UpdraftTracking/Lagrangian_UpdraftTracking_1km_5min_34nz_job1_LFC_profile.h5 

Saved output to /mnt/lustre/ko

In [109]:
###################################### 
#RECOMBINING
recombine=False #KEEP FALSE WHEN RUNNING
recombine=True

In [110]:
def ReadData(varName,job_id):
    out = Results_InputOutput_Class.LoadOutFile(ModelData,DataManager,f"{job_id}_SUBSET",varName = varName)
    return out
# ReadData(var_name,job_id)
def SaveFinalData(ModelData,DataManager, Dictionary):    
    Results_InputOutput_Class.SaveOutFile(ModelData,DataManager, Dictionary,job_id="combined_SUBSET")

In [111]:
def MakeCount(count_dict):
    # print('Getting Tracked Parcel Count')
    for job_id in np.arange(1,num_jobs+1):
        if job_id % 10==0: print(f"current job_id: {job_id}")
        
        for key in count_dict:
            data_dict_key=ReadData(key,job_id)
            count_dict[key].append(data_dict_key.shape[0])
    combined_counts={key: sum(counts) for key, counts in count_dict.items()}
    return combined_counts

def GetCombinedCounts():
    #GETTING COUNTS FOR MAKING INITIAL RECOMBINED ARRAYS LATER
    count_dict = {key: [] for key in [
        'CL_ALL_out_arr', 'CL_SHALLOW_out_arr', 'CL_DEEP_out_arr',
        'nonCL_ALL_out_arr', 'nonCL_SHALLOW_out_arr', 'nonCL_DEEP_out_arr',
    
        'SBF_ALL_out_arr', 'nonSBF_ALL_out_arr',
        'SBF_SHALLOW_out_arr', 'nonSBF_SHALLOW_out_arr',
        'SBF_DEEP_out_arr', 'nonSBF_DEEP_out_arr',
    
        'ColdPool_ALL_out_arr', 'ColdPool_SHALLOW_out_arr',
        'ColdPool_DEEP_out_arr'
    ]}

    combined_counts=MakeCount(count_dict)
    print(combined_counts)
    return combined_counts

In [115]:
def MakeEmpty(counts_dict):
    empty_dict = {}
    for key, count in counts_dict.items():
        empty_dict[key] = np.zeros((count, 5), dtype=int)
    return empty_dict
    
def MakeOutputDictionary():
    
    Dictionary=MakeEmpty(combined_counts)
    for job_id in np.arange(1,num_jobs+1):
        if job_id % 10==0: print(f"current job_id: {job_id}")
            
        for key in Dictionary:
            var=ReadData(key,job_id)
            if var.size!=0:
                a=np.where(np.all(Dictionary[key] == 0, axis=1))[0][0]
                b=a+var.shape[0]
                # print(key,a,b) #TESTING
                Dictionary[key][a:b]=var
    return Dictionary

In [116]:
def CombineCloudBase():

    #initializing
    all_cloudbase=[]

    #running
    for job_id in np.arange(1,num_jobs+1):
        all_cloudbase_job = Results_InputOutput_Class.LoadAllCloudBase_Job(ModelData,DataManager,
                     job_id)['all_cloudbase']
        all_cloudbase+=list(all_cloudbase_job)
    all_cloudbase=np.array(all_cloudbase)

    # saving
    Results_InputOutput_Class.SaveAllCloudBase_Combined(ModelData,DataManager,
                              all_cloudbase)
    return all_cloudbase
    
def CombineLFC_LCL():

        #initializing
        MeanLFC=[]
        MeanLCL=[]

        #running
        for job_id in np.arange(1,num_jobs+1):
            LoadedLFC = Results_InputOutput_Class.LoadLFC_Profile_Job(ModelData,DataManager,
                         job_id, Ltype="LFC")["LFC_profile"]
            LoadedLCL = Results_InputOutput_Class.LoadLFC_Profile_Job(ModelData,DataManager,
                         job_id, Ltype="LCL")["LCL_profile"]
            MeanLFC+=list(LoadedLFC)
            MeanLCL+=list(LoadedLCL)

        MeanLFC = np.array(MeanLFC)/1e3
        MeanLCL = np.array(MeanLCL)/1e3

        #saving
        Results_InputOutput_Class.SaveLFC_Profile_Combined(ModelData,DataManager,
                        MeanLFC, Ltype = "LFC")
        Results_InputOutput_Class.SaveLFC_Profile_Combined(ModelData,DataManager,
                        MeanLCL, Ltype = "LCL")
        return MeanLFC, MeanLCL


In [117]:
#Recombining Tracked Parcels
if recombine==True:
    combined_counts = GetCombinedCounts()
    Dictionary = MakeOutputDictionary()
    SaveFinalData(ModelData,DataManager, Dictionary)

#Recombining LFC and LCL
if recombine==True:
    all_cloudbase=CombineCloudBase()
    print(f"cloudbase_mean = {np.mean(all_cloudbase)}")
    
    MeanLFC_Array, MeanLCL_Array =CombineLFC_LCL()
    print(f"LFCMean = {np.mean(MeanLFC_Array)}")
    print(f"LCLMean = {np.mean(MeanLCL_Array)}")

current job_id: 10
current job_id: 20
current job_id: 30
current job_id: 40
current job_id: 50
current job_id: 60
{'CL_ALL_out_arr': 12239, 'CL_SHALLOW_out_arr': 8532, 'CL_DEEP_out_arr': 1349, 'nonCL_ALL_out_arr': 10043, 'nonCL_SHALLOW_out_arr': 7684, 'nonCL_DEEP_out_arr': 1113, 'SBF_ALL_out_arr': 1905, 'nonSBF_ALL_out_arr': 10334, 'SBF_SHALLOW_out_arr': 961, 'nonSBF_SHALLOW_out_arr': 7571, 'SBF_DEEP_out_arr': 489, 'nonSBF_DEEP_out_arr': 860, 'ColdPool_ALL_out_arr': 10334, 'ColdPool_SHALLOW_out_arr': 7571, 'ColdPool_DEEP_out_arr': 860}
current job_id: 10
current job_id: 20
current job_id: 30
current job_id: 40
current job_id: 50
current job_id: 60
Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms/1km_5min_34nz/Lagrangian_UpdraftTracking/Lagrangian_UpdraftTracking_1km_5min_34nz_jobcombined_SUBSET.h5 

Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT

In [46]:
#############################################
#READING BACK IN SUBSETTED TRACKED PARCEL DATA

In [47]:
trackedArrays,LevelsDictionary = TrackedParcel_Loading_Class.LoadingSubsetParcelData(ModelData,DataManager,
                                                         Results_InputOutput_Class)

CL: ALL=12239, SHALLOW=8532, DEEP=1349
nonCL: ALL=10043, SHALLOW=7684, DEEP=1113
SBF: ALL=1905, SHALLOW=961, DEEP=489
ColdPool: ALL=10334, SHALLOW=7571, DEEP=860
Mean Cloudbase is: 1.45 km

Min Cloudbase is: 1.25 km

Mean LFC is: 1.93 km

Mean LCL is: 1.79 km

Min LFC is: 1.45 km

Min LCL is: 1.37 km



In [None]:
#############################################
#TESTING

In [121]:
# dir = "/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/CodeFiles/3_Project_Algorithms/2_Tracking_Algorithms/Old_Version/OUTPUT"
# fileName = "parcel_tracking_SUBSET_1km_5min_1e6.h5"
# filePath = os.path.join(dir,fileName)


# with h5py.File(filePath, 'r') as f:
#     print(f.keys())
#     out = f["CL_ALL_out_arr"][:]


# one = trackedArrays["CL"]["ALL"] #get trackedArrays from above "loading back in" code
# two = out

# col1 = one[:, 0]
# col2 = two[:, 0]

# set1 = set(col1)
# set2 = set(col2)

# # Intersection → values in both
# common = set1 & set2

# # Only in one or two
# only_in_one = set1 - set2
# only_in_two = set2 - set1

# print("Common elements:", len(common))
# print("➕ Only in one:", len(only_in_one))
# print("➖ Only in two:", len(only_in_two))

# # Optional: show some examples
# print("\nExamples only in one:", list(only_in_one)[:10])
# print("Examples only in two:", list(only_in_two)[:10])

In [None]:
# #TESTING SBF_LeftRight
# print(Dictionary.keys())
# arr = Dictionary["CL_DEEP_out_arr"]
# ps = arr[:, 0].astype(int) - index_adjust
# ts = arr[:, 1].astype(int)
# xmaxs_arr = np.array(xmaxs)[ts]

# # Compare all elements at once
# a = np.where(X[ts, ps] < xmaxs_arr, -1,
#              np.where(X[ts, ps] > xmaxs_arr, 1, 0))
# print(np.all(arr[:,4] == a))