In [None]:
####################################
#ENVIRONMENT SETUP

In [None]:
#Importing Libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

from tqdm import tqdm

from glob import glob

In [None]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [None]:
def GetPlottingDirectory(plotFileName, plotType):
    plottingDirectory = mainCodeDirectory=os.path.join(mainDirectory,"Code","PLOTTING")
    
    specificPlottingDirectory = os.path.join(plottingDirectory, plotType, 
                                             f"{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz")
    os.makedirs(specificPlottingDirectory, exist_ok=True)

    plottingFileName=os.path.join(specificPlottingDirectory, plotFileName)

    return plottingFileName

def SaveFigure(fig,plotType, fileName):
    plotFileName = f"{fileName}_{ModelData.res}_{ModelData.t_res}_{ModelData.Np_str}.jpg"
    plottingFileName = GetPlottingDirectory(plotFileName, plotType)
    print(f"Saving figure to {plottingFileName}")
    fig.savefig(plottingFileName, dpi=300, bbox_inches='tight')

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [None]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

In [None]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, SlurmJobArray_Class, Results_InputOutput_Class, TrackedParcel_Loading_Class

In [None]:
#############################################
#LOADING DATA

In [None]:
#READING BACK IN SUBSETTED TRACKED PARCEL DATA
trackedArrays,LevelsDictionary = TrackedParcel_Loading_Class.LoadingSubsetParcelData(ModelData,DataManager,
                                                         Results_InputOutput_Class)

In [None]:
#GETTING AVERAGE CONVERGENCE MAX 
def Get_AvgConvergence(t):

    timeString = ModelData.timeStrings[t]
    outputDataDirectory=os.path.normpath(os.path.join(DataManager.outputDataDirectory,"..","Eulerian_CLTracking"))
    Dictionary = TrackingAlgorithms_DataLoading_Class.LoadData(ModelData, DataManager, timeString,
                     dataName="Eulerian_CLTracking",outputDataDirectory=outputDataDirectory,printstatement=False)
    avgConvergence = Dictionary["avgConvergence"]
    return avgConvergence
    
def find_SBF_xmaxs():
    xmaxs=[]
    for t in range(ModelData.Ntime)[1:]:
        if t == 0:
            avgConvergence_max=np.nan
        else:
            avgConvergence = Get_AvgConvergence(t)
            avgConvergence_max=np.max(avgConvergence)
            xmax = np.where(avgConvergence==avgConvergence_max)[0][0]
            xmaxs.append(xmax)
    return xmaxs
xmaxs=find_SBF_xmaxs()

In [None]:
#############################################
#RUNNING FUNCTIONS

In [None]:
#*#*#*# TESTING SBF

trackedArray = trackedArrays["SBF"]["DEEP"]
#numerical info
xh = ModelData.xh-ModelData.xh[0]
kms=np.argmax(ModelData.xh-ModelData.xh[0] >= 1)

#getting parcel index and time
ps = trackedArray[:,0]
ts = trackedArray[:,1]

#initializing lists
X_List = []
Xdiff_List = []

#running through each parcel
for t in tqdm(np.unique(ts), desc="Processing timesteps"):

    #DISTANCE METRICS
    #load in X at time t
    timeString = ModelData.timeStrings[t]
    X_t = CallLagrangianArray(ModelData, DataManager, timeString, 'X')
    
    XParcel_t = X_t[ps].tolist()
    XParcel_t = xh[XParcel_t] #converting to kms

    #getting index distance from sea-breeze
    Xdiff = XParcel_t - xmaxs[t]
    Xdiff *= kms #converting to kms

    #appending results to list
    X_List.extend(XParcel_t)
    Xdiff_List.extend(Xdiff.tolist())
    break


    #VARIABLES
    # QV_t = CallLagrangianArray(ModelData, DataManager, timeString, 'qv')
    # THETA_V_t = CallLagrangianArray(ModelData, DataManager, timeString, 'theta_v')

    # QVParcel_t = QV_t[ps]
    # THETA_VParcel_t = THETA_V_t[ps]
    
    # QV_List.extend(QV_t)
    # THETA_V_List.extend(THETA_VParcel_t)
    

In [None]:
def CollectData(trackedArray):

    #numerical info
    xh = ModelData.xh-ModelData.xh[0]
    kms=np.argmax(ModelData.xh-ModelData.xh[0] >= 1)

    #getting parcel index and time
    ps = trackedArray[:,0]
    ts = trackedArray[:,1]

    #initializing lists
    X_List = []
    Xdiff_List = []

    QV_List = []
    THETA_v_List = []
    
    #running through each parcel
    for t in tqdm(np.unique(ts), desc="Processing timesteps"):

        #DISTANCE METRICS
        #load in X at time t
        timeString = ModelData.timeStrings[t]
        X_t = CallLagrangianArray(ModelData, DataManager, timeString, 'X')
        
        XParcel_t = X_t[ps].tolist()
        XParcel_t = xh[XParcel_t] #converting to kms
    
        #getting index distance from sea-breeze
        Xdiff = XParcel_t - xmaxs[t]
        Xdiff *= kms #converting to kms
    
        #appending results to list
        X_List.extend(XParcel_t)
        Xdiff_List.extend(Xdiff.tolist())


        #VARIABLES
        VARS=Get_LagrangianArrays(t,varNames=["QV","THETA_v"])

        QV_t = VARS["QV"]
        THETA_v_t = VARS["THETA_v"]

        QVParcel_t = QV_t[ps]
        THETA_vParcel_t = THETA_v_t[ps]
        
        QV_List.extend(QVParcel_t)
        THETA_v_List.extend(THETA_vParcel_t)
        
    return X_List,Xdiff_List, QV_List,THETA_v_List

In [None]:
def RunAllParcelTypes():
    results = {}
    
    for outer_key, inner_dict in trackedArrays.items():          # e.g. "CL"
        results[outer_key] = {}
        for inner_key, trackedArray in inner_dict.items():       # e.g. "DEEP"
            print(f"\nRunning CollectData for {outer_key} - {inner_key}")
    
            if trackedArray is None or len(trackedArray) == 0:
                print(f"  Skipping {outer_key}-{inner_key}: empty array")
                continue
    
            X_List, Xdiff_List, QV_List,THETA_V_List = CollectData(trackedArray)
    
            # store results in nested dict
            results[outer_key][inner_key] = {
                "X_List": X_List,
                "Xdiff_List": Xdiff_List,
                "QV_List": QV_List,
                "THETA_V_List": THETA_V_List
            }
    return results

In [None]:
def LoadorRun():
    """
    Loads the tracked parcel results from a pickle file if it exists;
    otherwise runs RunAllParcelTypes() and saves the output.
    """
    fileName = f"Tracked_Histogram_Output_{ModelData.res}_{ModelData.t_res}_{ModelData.Nzh}nz.pkl"
    filePath = os.path.join(codeDirectory, fileName)

    if os.path.exists(filePath):
        # Load existing results
        with open(filePath, "rb") as f:
            results = pickle.load(f)
        print(f"Loaded results from {filePath}")
    else:
        # Run function and save new results
        print(f"No pickle file found, running RunAllParcelTypes()...")
        results = RunAllParcelTypes()

        with open(filePath, "wb") as f:
            pickle.dump(results, f)
        print(f"Saved results to {filePath}")

    return results

In [None]:
#############################################
#RUNNING

In [None]:
# ============================================================
# Get_LagrangianArrays_Function
# ============================================================

def Get_LagrangianArrays(t, dataType="VARS", dataName="VARS", varNames=["W"]):
    res = ModelData.res
    t_res = ModelData.t_res
    Nz_str = ModelData.Nz_str
    inputDirectory = os.path.join(DataManager.inputDirectory,
                                  "..","LagrangianArrays",
                                  f"{res}_{t_res}_{Nz_str}nz", dataType)
    timeString = ModelData.timeStrings[t]

    FileName = os.path.join(inputDirectory, f"{dataName}_{res}_{t_res}_{Nz_str}nz_{timeString}.h5")

    dataDictionary = {}
    with h5py.File(FileName, 'r') as f:
        # print("Keys in file:", list(f.keys()))
        for key in varNames:
            dataDictionary[key] = f[key][:]
            # print(f"{key}: shape = {dataDictionary[key].shape}, dtype = {dataDictionary[key].dtype}")
    return dataDictionary

In [None]:
results = LoadorRun()

In [None]:
#############################################
#PLOTTING FUNCTIONS

In [None]:
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = ["DejaVu Sans", "Helvetica", "Arial"]
plt.rcParams["axes.titlesize"] = 14
plt.rcParams["axes.titleweight"] = "bold"

In [None]:
def PlotHistogram(axis, dataList, xlabel, bins=50, color='steelblue', title=None):
    """
    Plot a polished histogram for a given dataset onto an existing axis.
    """

    # ensure NumPy array for consistency
    data = np.array(dataList)

    # plot histogram
    axis.hist(
        data,
        bins=bins,
        color=color,
        edgecolor='black',
        alpha=0.7
    )

    # labels and title
    axis.set_xlabel(xlabel, fontsize=12)
    axis.set_ylabel("Count", fontsize=12)
    if title:
        axis.set_title(title, fontsize=14, pad=10)

    # grid and layout
    axis.grid(True, linestyle='--', alpha=0.4)


In [None]:
def PlotDistancesFunction(parcel_type):
    # choose which outer key to plot
    ptype = parcel_type
    depth_types = ["ALL", "SHALLOW", "DEEP"]
    
    # set up figure (2 rows × 3 columns)
    fig = plt.figure(figsize=(12, 8))
    gs  = gridspec.GridSpec(2, len(depth_types), figure=fig,
                            wspace=0.3, hspace=0.35)
    
    # loop through depth types
    for j, depth in enumerate(depth_types):
        # first row: X_List
        ax_top = fig.add_subplot(gs[0, j])
        if ptype in results and depth in results[ptype]:
            data_x = results[ptype][depth]["X_List"]
            PlotHistogram(ax_top, data_x,
                          xlabel="X distance from left side (km)",
                          title=f"{ptype} – {depth}")
    
        # second row: Xdiff_List
        ax_bottom = fig.add_subplot(gs[1, j])
        if ptype in results and depth in results[ptype]:
            data_xdiff = results[ptype][depth]["Xdiff_List"]
            PlotHistogram(ax_bottom, data_xdiff,
                          xlabel="X distance from SBF (km)",
                          title=f"{ptype} – {depth}")
        else:
            continue
 
    fig.subplots_adjust(left=0.07, right=0.97,   
                        bottom=0.08, top=0.90,
                        wspace=0.35, hspace=0.35)
    return fig

In [None]:
def PlotVariablesFunction(parcel_type):
    # choose which outer key to plot
    ptype = parcel_type
    depth_types = ["ALL", "SHALLOW", "DEEP"]
    
    # set up figure (2 rows × 3 columns)
    fig = plt.figure(figsize=(12, 8))
    gs  = gridspec.GridSpec(2, len(depth_types), figure=fig,
                            wspace=0.3, hspace=0.35)
    
    # loop through depth types
    for j, depth in enumerate(depth_types):
        # first row: X_List
        ax_top = fig.add_subplot(gs[0, j])
        if ptype in results and depth in results[ptype]:
            data_x = results[ptype][depth]["QV_List"]
            data_x = np.array(data_x)*1e3
            PlotHistogram(ax_top, data_x,
                          xlabel="qv (g/kg)",
                          title=f"{ptype} – {depth}")
    
        # second row: Xdiff_List
        ax_bottom = fig.add_subplot(gs[1, j])
        if ptype in results and depth in results[ptype]:
            data_xdiff = results[ptype][depth]["THETA_V_List"]
            PlotHistogram(ax_bottom, data_xdiff,
                          xlabel="th_v (K)",
                          title=f"{ptype} – {depth}")
        else:
            continue
 
    fig.subplots_adjust(left=0.07, right=0.97,   
                        bottom=0.08, top=0.90,
                        wspace=0.35, hspace=0.35)
    return fig

In [None]:
#############################################
#PLOTTING

In [None]:
parcel_types = ["CL", "nonCL", "SBF"]
for parcel_type in parcel_types:
    fig = PlotDistancesFunction(parcel_type)

    #saving
    fileName=f"Tracked_Histograms_Distances_{parcel_type}" 
    SaveFigure(fig,plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",fileName=fileName)

In [None]:
parcel_types = ["CL", "nonCL", "SBF"]
for parcel_type in parcel_types:
    fig = PlotVariablesFunction(parcel_type)

    #saving
    fileName=f"Tracked_Histograms_Variables_{parcel_type}" 
    SaveFigure(fig,plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",fileName=fileName)