In [1]:
####################################
#ENVIRONMENT SETUP

In [2]:
#Importing Libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

from tqdm import tqdm

from glob import glob

In [3]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [4]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, DataManager_Class

In [5]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

=== CM1 Data Summary ===
 Simulation #:   1
 Resolution:     1km
 Time step:      5min
 Vertical levels:34
 Parcels:        1e6
 Data file:      /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_1km_5min_34nz.nc
 Parcel file:    /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc
 Time steps:     133

=== DataManager Summary ===
 inputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData
 outputDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms
 inputDataDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/TimeSplitModelData/1km_5min_34nz/ModelData
 inputParcelDirectory #:   /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/D

In [6]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [7]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, Results_InputOutput_Class, SlurmJobArray_Class

In [8]:
##############################
#PRINTING FUNCTIONS

In [9]:
import sys
import os

# Global variable to store original stdout
_original_stdout = sys.stdout

def BlockPrint():
    """Suppress all print() output."""
    global _original_stdout
    sys.stdout = open(os.devnull, 'w')

def RestorePrint():
    """Restore print() output."""
    global _original_stdout
    sys.stdout.close()
    sys.stdout = _original_stdout

# NO_PRINT=False
NO_PRINT=True

In [10]:
##############################
#JOBARRAY SETUP

In [11]:
################################
#JOB ARRAY SETUP
################################
# how many total jobs are being run? i.e. array=1-100 ==> num_jobs=100
if '1e6' in ModelData.Np_str:
    num_jobs=60 #1M parcels
    num_slurm_jobs=10
if '50e6' in ModelData.Np_str:
    num_jobs=200 #50M parcels
    num_slurm_jobs=60
##############################

In [12]:
##############################################
#MODEL AND ALGORITHM NUMERICAL PARAMETERS

In [13]:
times=ModelData.time/(1e9 * 60); times=times.astype(float);
minutes=1/times[1] #1 / minutes per timestep = timesteps per minute
kms=np.argmax(ModelData.xh-ModelData.xh[0] >= 1)

In [14]:
##############################################
#DATA LOADING

In [15]:
parcel1 = ModelData.OpenParcel()

Opened dataset: /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Model/cm1r20.3/run/cm1out_pdata_1km_5min_1e6np.nc


In [16]:
directory = f"/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/LagrangianArrays/{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz/Lagrangian_Binary_Array/"
Lagrangian_Binary_Array_Data,files = OpenMultipleSingleTimes_LagrangianArray(directory, ModelData)

In [17]:
directory = f"/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Variable_Calculation/LagrangianArrays/{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz/LFC/"

LFC_LCL_Data,files = OpenMultipleSingleTimes_LagrangianArray(directory, ModelData,pattern="LFC_*.h5")

In [18]:
##############################################
#DATA LOADING FUNCTIONS

In [19]:
#SUBSETTING PARCEL DATA
def GetData(parcel1,start_job,end_job):
    parcel=parcel1.isel(xh=slice(start_job,end_job))
    return parcel

In [20]:
def GetSpatialData(Lagrangian_Binary_Array_Data, start_job,end_job):
    parcel_z = Lagrangian_Binary_Array_Data['z'].isel(p=slice(start_job,end_job)).data.compute()
    parcel_x = Lagrangian_Binary_Array_Data['x'].isel(p=slice(start_job,end_job)).data.compute()

    parcel_w = Lagrangian_Binary_Array_Data['w'].isel(p=slice(start_job,end_job)).data.compute()
    parcel_u = Lagrangian_Binary_Array_Data['u'].isel(p=slice(start_job,end_job)).data.compute()
    
    # Load the dataset by its name
    Z = Lagrangian_Binary_Array_Data['Z'].isel(p=slice(start_job,end_job)).data.compute()
    Y = Lagrangian_Binary_Array_Data['Y'].isel(p=slice(start_job,end_job)).data.compute()
    X = Lagrangian_Binary_Array_Data['X'].isel(p=slice(start_job,end_job)).data.compute()
    W = Lagrangian_Binary_Array_Data['W'].isel(p=slice(start_job,end_job)).data.compute()

    return parcel_z,parcel_x,parcel_u,parcel_w,Z,Y,X,W

In [21]:
def GetLFCData(LFC_LCL_Data, start_job,end_job):
    LFC = LFC_LCL_Data['LFC'].isel(p=slice(start_job,end_job)).data.compute()
    LCL = LFC_LCL_Data['LCL'].isel(p=slice(start_job,end_job)).data.compute()
    return LFC,LCL

In [22]:
#LOADING CL MAXS FROM CL TRACKING ALGORITHM
def Get_Conv_X(t,z,y):

    timeString = ModelData.timeStrings[t]
    outputDataDirectory=os.path.normpath(os.path.join(DataManager.outputDataDirectory,"..","Eulerian_CLTracking"))
    Dictionary = TrackingAlgorithms_DataLoading_Class.LoadData(ModelData, DataManager, timeString,
                     dataName="Eulerian_CLTracking",outputDataDirectory=outputDataDirectory)
    whereCL = Dictionary["maxConvergence_X"]
    Conv_X_Max=whereCL[z,y]
    return Conv_X_Max

In [23]:
##############################
#ALGORITHM FUNCTIONS

In [24]:
#Updated Lagrangian Tracking Algorithm

#Algorithm Steps:
#(1) Find the first time a parcel is above the LFC:
#(2) First check if the parcel ascends (w>=0.1) for another 20 minutes
#(3) If so, find first time, the parcel slows down (w<0.1)
#(4) If that time is when the parcel is above 750m, save it, "forget", and move on to next parcel
#(5) If that time is when the parcel is below 750m, check if it is within 2km of the CL_Max found from the CL Tracking Algorithm
#(6) If the parcel is near the CL, store in, otherwise save it, "forget", and move on to next parcel
#(7) Continue to next parcel

#(Also, if during, traceback, the parcel escapes the x or z boundary, "forget" parcel, and move on)

In [25]:
#Numerical Settings
Nt=ModelData.Ntime
dt=times[1]*60
#Height parcel must be below to be counted
CLmaxheight=750 #750m
#BL slow-down-threshold
w_thresh=0.1
def GetNp(parcel):
    Np=len(parcel['xh'])
    return Np

In [26]:
# if ((x + dt*u)==0) or ((z + dt*w)==0)
# u=u[t,Z[t,p],Y[t,p],X[t,p]]; W=W[t,p]
# [u[t,Z[t,p],Y[t,p],X[t,p]] for t in time_arr] >np.max(data['xf'].values) or < np.min(data['xf'].values)
# similarly for w
################################################################################################################
#BOUNDARY-ESCAPE CONDITION
xmin=np.min(ModelData.xf)*1e3
xmax=np.max(ModelData.xf)*1e3
zmin=np.min(ModelData.zf)*1e3
zmax=np.max(ModelData.zf)*1e3

def check_boundary(p, where_BL, above_LFC,
                   parcel_x, parcel_u, 
                   parcel_z, parcel_w,
                   Z, Y, X):

    time_arr=np.arange(where_BL,above_LFC)

    def get_x(t,p):
        # return parcel['x'][t,p].item()
        return parcel_x[t,p] 
    def get_u(t,p):
        # return data['uinterp'].isel(time=t,zh=Z[t,p],yh=Y[t,p],xh=X[t,p]).item() #TESTING
        # return parcel['u'][t,p].item() 
        return parcel_u[t,p]
    def get_z(t,p):
        # return parcel['z'][t,p].item()
        return parcel_z[t,p]
    def get_w(t,p):
        # return data['winterp'].isel(time=t,zh=Z[t,p],yh=Y[t,p],xh=X[t,p]).item()
        # return parcel['w'][t,p].item()
        return parcel_w[t,p]
        

    # x_tend = [get_x(t, p) + dt * get_u(t, z, y, x)   #THIS IS OLD, LESS IDEAL
    #       for (t, z, y, x) in zip(time_arr, Z[time_arr, p], Y[time_arr, p], X[time_arr, p])] 
    # z_tend = [get_z(t, p) + dt * get_w(t, z, y, x)  
    #       for (t, z, y, x) in zip(time_arr, Z[time_arr, p], Y[time_arr, p], X[time_arr, p])] 
    
    x_tend = [get_x(t, p) + dt * get_u(t,p)   
          for (t, z, y, x) in zip(time_arr, Z[time_arr, p], Y[time_arr, p], X[time_arr, p])] 
    z_tend = [get_z(t, p) + dt * get_w(t,p)  
          for (t, z, y, x) in zip(time_arr, Z[time_arr, p], Y[time_arr, p], X[time_arr, p])] 

    x_bound=any(val < xmin or val > xmax for val in x_tend)*1
    z_bound=any(val < zmin or val > zmax for val in z_tend)*1

    out=(x_bound,z_bound)
    if out[0]==1:
        print(f'parcel {p} crossed x-boundary between t={where_BL} and t={above_LFC}')
    elif out[1]==1:
        print(f'parcel {p} crossed z-boundary between t={where_BL} and t={above_LFC}')
    return out
#############################################################################################################

In [27]:
#Initialize Output Storage Vector
def InitializeData(Np):
    out_arr=np.zeros((Np,3),dtype=np.int32) 
    save_arr=np.zeros((Np,3),dtype=np.int32) #This one is for saving continued-ascent, slow-below-750m parcels that are not with 2 km of CL
    save2_arr=np.zeros((Np,3),dtype=np.int32) #This one is for saving continued-ascent, slow-above-750m parcels
    return out_arr,save_arr,save2_arr
# [out_arr,save_arr,save2_arr]=InitializeData(Np)

In [28]:
#############################################################################################################
#The Algorithm

In [29]:
def ParcelTracking(Np,W,LFC,
                   parcel_z,parcel_x,
                   parcel_w,parcel_u, 
                   Z,Y,X,
                   out_arr,save_arr,save2_arr,
                   ascent_condition=False):
    #1--------------Looping over each parcel
    for count,p in enumerate(np.arange(Np)): 
        if np.mod(p,5e4)==0: print(f'current parcel: {p}/{Np}')

        z_p = parcel_z[:,p]
        W_p = W[:,p]
        LFC_p = LFC[:,p] 
       
        #----FIND WHERE PARCEL IS ABOVE LFC----
        indices = np.where(z_p >= LFC_p)[0]; above_LFC = indices[0] if indices.size > 0 else -999; #FIRST TIME ABOVE LFC
        if above_LFC ==-999:
            # print(f'parcel {p} never above LFC')
            continue #if the parcel is never above the LFC, skip the parcel

        #----CHECK IF ASCENDS FOR >= 20 minutes AFTER LFC----
        if ascent_condition == True:
            #Note: currently set to False, so it doesn't matter how far above LFC a parcel rises
            # This is because later there is a subsetting algorithm which further subsets parcels based on ascent
            ascend_array=W_p[above_LFC+1:]
            indices=np.where(ascend_array<0)[0]; ascend_stop=indices[0] if indices.size > 0 else 1e6; #location of where parcel stops ascending 
        else: 
            ascend_stop = 1e6
        if ascend_stop>=20*minutes:
        
            #----FIND THE FIRST TIME W_p<=w_thresh----
            indices=np.where(W_p[0:above_LFC]<w_thresh)[0]
            where_BL=indices[-1] if indices.size > 0 else -999 #FIRST PRIOR TIME W<0.1 (IN THE BL) (ADDED 1 TO GET TIME RIGHT AFTER INTERACTION)
            if where_BL ==-999:
                # print(f'parcel {p} w is never below threshold prior to t={above_LFC}')
                continue #if the parcel never slows down backwards in time (unlikely), skip the parcel
                
            #check for boundary escapes
            ################################
            future_location=check_boundary(p, where_BL, above_LFC,
                                           parcel_x, parcel_u, 
                                           parcel_z, parcel_w,
                                           Z, Y, X)
            if (future_location[0]+future_location[1]>=1): continue #if parcel crosses boundary, skips current parcel
            ################################
            
            #----CHECK IF PARCEL SLOWED DOWN LOW ENOUGH----
            if parcel_z[where_BL,p]<=CLmaxheight: #PARCEL MUST BE BELOW 750m WHEN CONTACTING CL #***
            # if LCL[where_BL,p]==0: #PARCEL MUST BE BELOW LCL WHEN CONTACTING CL (not recommended)
        
                #----CHECK IF CL IS WITHIN 2km----
                #Find the CL-max x-location
                t=where_BL; z=Z[where_BL,p]; y=Y[where_BL,p]; x=X[where_BL,p]
                CONV_X=Get_Conv_X(t,z,y)
                within_CL=np.any(np.isin(CONV_X, np.arange(x-2*kms,x+3*kms)))
                
                if within_CL==True:
                    #save X's (t,p) 
                    print(f'Parcel {p} is success at time {where_BL}')
                    out_arr[p,0]=p
                    out_arr[p,1]=where_BL
                    out_arr[p,2]=above_LFC 
                else: #continued-ascent, slow-below-750m parcels that are not with 2 km of CL
                    #SAVE PARCEL
                    # print(f'Parcel {p} not near CL at t={where_BL}')
                    save_arr[p,0]=p
                    save_arr[p,1]=where_BL
                    save_arr[p,2]=above_LFC 
        
            else: #continued-ascent, slow-above-750m parcels
                #SAVE PARCEL
                # print(f'Parcel {p} above {CLmaxheight}m at t={where_BL}')
                save2_arr[p,0]=p
                save2_arr[p,1]=where_BL
                save2_arr[p,2]=above_LFC         
                
            #END OF LOOP, THEN WE MOVE ON TO NEXT PARCEL p
    return out_arr,save_arr,save2_arr
# [out_arr,save_arr,save2_arr]=ParcelTracking(Np,W,LFC,parcel_z,Z,Y,X,out_arr,save_arr,save2_arr)

In [30]:
def CorrectParcelID(out_arr,save_arr,save2_arr,index_adjust):
    #CORRECTING DATA PARCEL ID BASED ON JOB NUMBER
    #####################################################
    out_arr[np.where(np.any(out_arr != 0, axis=1))[0],0]+=index_adjust #*needed for job array*+=index_adjust #*needed for job array*
    save_arr[np.where(np.any(save_arr != 0, axis=1))[0],0]+=index_adjust #*needed for job array*+=index_adjust #*needed for job array*
    save2_arr[np.where(np.any(save2_arr != 0, axis=1))[0],0]+=index_adjust #*needed for job array*+=index_adjust #*needed for job array*
    return out_arr,save_arr,save2_arr
# [out_arr,save_arr,save2_arr]=CorrectParcelID(out_arr,save_arr,save2_arr,index_adjust)

In [31]:
#REMOVING BLANK ROWS
def RemoveZeroRows(arr):
    arr = arr[~np.all(arr == 0, axis=1)]
    return arr
# out_arr=RemoveZeroRows(out_arr);save_arr=RemoveZeroRows(save_arr);save2_arr=RemoveZeroRows(save2_arr)

In [32]:
##############################################
#RUNNING

In [36]:
def RunAlgorithm(job_id_list):
    for job_id in job_id_list:
        if job_id % 1 == 0: print(f'current job_id = {job_id}')
        [start_job,end_job,index_adjust]=SlurmJobArray_Class.StartJobArray(ModelData,job_id,num_jobs)
        
        #SLICING DATA
        print("SLICING DATA")
        parcel=GetData(parcel1,start_job,end_job)
    
        #GETTING REQUIRED DATA
        print("GETTING REQUIRED DATA")
        [parcel_z,parcel_x,parcel_u,parcel_w,Z,Y,X,W] = GetSpatialData(Lagrangian_Binary_Array_Data, start_job,end_job)
        LFC,LCL = GetLFCData(LFC_LCL_Data, start_job,end_job)
    
        #INITIALIZING DATA
        print("INITIALIZING DATA")
        Np=GetNp(parcel)
        [out_arr,save_arr,save2_arr]=InitializeData(Np)
    
        #RUNNING ALGORITHM
        print("RUNNING ALGORITHM")
        start_time = time.time()
        if NO_PRINT==True: BlockPrint()
        [out_arr,save_arr,save2_arr]=ParcelTracking(Np,W,LFC,
                                                    parcel_z,parcel_x,
                                                    parcel_w,parcel_u, 
                                                    Z,Y,X,
                                                    out_arr,save_arr,save2_arr,
                                                    ascent_condition=False)
        if NO_PRINT==True: RestorePrint()
        end_time = time.time(); elapsed_time = end_time - start_time; print(f"Elapsed Time: {elapsed_time} seconds")  
    
        #CORRECTING PARCEL ID FOR JOBARRAY
        [out_arr,save_arr,save2_arr]=CorrectParcelID(out_arr,save_arr,save2_arr,index_adjust)
    
        #REMOVING BLANK ROWS FROMRESULTS
        out_arr=RemoveZeroRows(out_arr);save_arr=RemoveZeroRows(save_arr);save2_arr=RemoveZeroRows(save2_arr)
    
        #SAVING
        print("SAVING")
        Dictionary = {"out_arr": out_arr,
                      "save_arr": save_arr,
                      "save2_arr": save2_arr}
        
        Results_InputOutput_Class.SaveOutFile(ModelData,DataManager, Dictionary,job_id)
    return Dictionary

In [37]:
#starting job arrays
[start_slurm_job,end_slurm_job]=SlurmJobArray_Class.StartSlurmJobArray(num_jobs=num_jobs,num_slurm_jobs=num_slurm_jobs,ISRUN=True) #if ISRUN is False, then will not run using slurm_job_array
print(f"Running on Slurm_Jobs for Slurm_Job_Ids: {(start_slurm_job,end_slurm_job-1)}")
job_id_list=np.arange(start_slurm_job,end_slurm_job)

#running algorithm
StartTime = time.time()
Dictionary = RunAlgorithm(job_id_list)
EndTime = time.time(); ElapsedTime = EndTime - StartTime; print(f"Total Elapsed Time: {ElapsedTime} seconds") 

Running on Slurm_Jobs for Slurm_Job_Ids: (1, 5)
current job_id = 1
SLICING DATA
GETTING REQUIRED DATA
INITIALIZING DATA
RUNNING ALGORITHM
Elapsed Time: 3.1962413787841797 seconds
SAVING
Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms/1km_5min_34nz/Lagrangian_UpdraftTracking/Lagrangian_UpdraftTracking_1km_5min_34nz_job1.h5 

Total Elapsed Time: 17.27579689025879 seconds


In [42]:
############################################################
#Run after finishing job_array
recombine=False #KEEP FALSE WHEN JOB_ARRAY IS RUNNING
# recombine=True 

In [43]:
def MakeUnique(arr):
    return np.unique(arr, axis=0)

def get_total_count(ModelData, DataManager, var_name, num_jobs):
    """
    Sum the total length (axis 0) of `var_name` across all job output files
    using LoadOutFile().
    """
    total = 0
    for job_id in range(1, num_jobs + 1):
        try:
            data_dict = Results_InputOutput_Class.LoadOutFile(ModelData, DataManager, job_id)
            if var_name in data_dict:
                total += data_dict[var_name].shape[0]
            else:
                print(f"Warning: {var_name} not found in job {job_id}")
        except FileNotFoundError:
            print(f"Warning: Missing file for job {job_id}")
            continue
    return total

In [44]:
def Recombine(num_jobs): #*#*
    var_names = ['out_arr', 'save_arr', 'save2_arr']
    recombined_arrays = {}  # Store final arrays here


    # Preallocate arrays
    for var_name in var_names:
        total_count = get_total_count(ModelData, DataManager, var_name, num_jobs)
        recombined_arrays[var_name] = np.zeros((total_count, 3), dtype=np.int32)

    # Fill arrays
    for var_name in var_names:
        print(f"Combining data for {var_name}")
        left_ind = 0
        for job_id in range(1, num_jobs + 1):
            # if job_id % 10 == 0: print(f"{var_name}: processing job {job_id}")
            arr = Results_InputOutput_Class.LoadOutFile(ModelData, DataManager, job_id, varName=var_name)
            n_rows = arr.shape[0]; right_ind = left_ind + n_rows
            recombined_arrays[var_name][left_ind:right_ind, :] = arr
            left_ind = right_ind

    #Make Unique
    for var_name in var_names:
        recombined_arrays[var_name]=MakeUnique(recombined_arrays[var_name])
        
    # Write to file
    Results_InputOutput_Class.SaveOutFile(ModelData,DataManager, recombined_arrays,job_id="combined")

    return recombined_arrays

In [45]:
if recombine==True:
    recombined_arrays = Recombine(num_jobs)

Combining data for out_arr
Combining data for save_arr
Combining data for save2_arr
Saved output to /mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/OUTPUT/Project_Algorithms/Tracking_Algorithms/1km_5min_34nz/Lagrangian_UpdraftTracking/Lagrangian_UpdraftTracking_1km_5min_34nz_jobcombined.h5 



In [44]:
##############################################
#LOADING BACK IN

In [116]:
# Dictionary = Results_InputOutput_Class.LoadOutFile(ModelData, DataManager, job_id="combined")
# Dictionary

{'out_arr': array([[    18,     76,     80],
        [    74,     65,     70],
        [   126,     67,     73],
        ...,
        [999613,     77,     83],
        [999635,     53,     57],
        [999860,     87,     91]], shape=(14356, 3), dtype=int32),
 'save2_arr': array([[   248,     43,     44],
        [   483,     36,     37],
        [   535,     43,     44],
        ...,
        [999763,     77,     78],
        [999795,     43,     44],
        [999972,     38,     39]], shape=(16787, 3), dtype=int32),
 'save_arr': array([[    72,     65,     72],
        [   149,     52,     57],
        [   160,     54,     59],
        ...,
        [999952,     56,     61],
        [999960,     52,     59],
        [999967,     56,     60]], shape=(15312, 3), dtype=int32)}

In [46]:
##############################################
#TESTING

In [118]:
# #comparing to old_version combined output

# dir = "/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Code/CodeFiles/3_Project_Algorithms/2_Tracking_Algorithms/Old_Version/OUTPUT"
# fileName = "parcel_tracking_1km_5min_1e6.h5"
# filePath = os.path.join(dir,fileName)

# with h5py.File(filePath, 'r') as f:
#     out_arr_OG = f["out_arr"][:]
#     save_arr_OG = f["save_arr"][:]

# Dictionary = Results_InputOutput_Class.LoadOutFile(ModelData, DataManager, job_id="combined")

# out_arr = Dictionary['out_arr']
# save_arr = Dictionary['save_arr']

# a = out_arr_OG[:, 0]
# b = out_arr[:, 0]

# setA = set(a)
# setB = set(b)

# # Elements that differ (not shared between both)
# diffElements = setA.symmetric_difference(setB)

# # Count how many unique differing values there are
# numDifferences = len(diffElements)

# print("Number of differing values (order ignored):", numDifferences)