In [None]:
####################################
#ENVIRONMENT SETUP

In [None]:
#Importing Libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr

import sys; import os; import time; from datetime import timedelta
import pickle
import h5py

from tqdm import tqdm

from glob import glob

In [None]:
#MAIN DIRECTORIES
def GetDirectories():
    mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
    mainCodeDirectory=os.path.join(mainDirectory,"Code/CodeFiles/")
    scratchDirectory='/mnt/lustre/koa/scratch/air673/'
    codeDirectory=os.getcwd()
    return mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory

[mainDirectory,mainCodeDirectory,scratchDirectory,codeDirectory] = GetDirectories()

In [None]:
def GetPlottingDirectory(plotFileName, plotType):
    plottingDirectory = mainCodeDirectory=os.path.join(mainDirectory,"Code","PLOTTING")
    
    specificPlottingDirectory = os.path.join(plottingDirectory, plotType, 
                                             f"{ModelData.res}_{ModelData.t_res}_{ModelData.Nz_str}nz")
    os.makedirs(specificPlottingDirectory, exist_ok=True)

    plottingFileName=os.path.join(specificPlottingDirectory, plotFileName)

    return plottingFileName

def SaveFigure(fig,plotType, fileName):
    plotFileName = f"{fileName}_{ModelData.res}_{ModelData.t_res}_{ModelData.Np_str}.jpg"
    plottingFileName = GetPlottingDirectory(plotFileName, plotType)
    print(f"Saving figure to {plottingFileName}")
    fig.savefig(plottingFileName, dpi=300, bbox_inches='tight')

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
from CLASSES_Variable_Calculation import ModelData_Class, SlurmJobArray_Class, DataManager_Class

In [None]:
#data loading class
ModelData = ModelData_Class(mainDirectory, scratchDirectory, simulationNumber=1)
#data manager class
DataManager = DataManager_Class(mainDirectory, scratchDirectory, ModelData.res, ModelData.t_res, ModelData.Nz_str,
                                ModelData.Np_str, dataType="Tracking_Algorithms", dataName="Lagrangian_UpdraftTracking",
                                dtype='float32',codeSection = "Project_Algorithms")

In [None]:
#IMPORT FUNCTIONS
sys.path.append(os.path.join(mainCodeDirectory,"2_Variable_Calculation"))
import FUNCTIONS_Variable_Calculation
from FUNCTIONS_Variable_Calculation import *

In [None]:
#IMPORT CLASSES
sys.path.append(os.path.join(mainCodeDirectory,"3_Project_Algorithms","2_Tracking_Algorithms"))
from CLASSES_TrackingAlgorithms import TrackingAlgorithms_DataLoading_Class, SlurmJobArray_Class, Results_InputOutput_Class, TrackedParcel_Loading_Class

In [None]:
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
path=os.path.join(dir2,'Code/CodeFiles/Functions')
sys.path.append(path)

import PlottingFunctions
from PlottingFunctions import * # import PlottingFunctions

# # Get all functions in NumericalFunctions
# import inspect
# functions = [f[0] for f in inspect.getmembers(NumericalFunctions, inspect.isfunction)]
# functions

In [None]:
#############################################
#LOADING DATA

In [None]:
#READING BACK IN SUBSETTED TRACKED PARCEL DATA
trackedArrays,LevelsDictionary = TrackedParcel_Loading_Class.LoadingSubsetParcelData(ModelData,DataManager,
                                                         Results_InputOutput_Class)

In [None]:
#GETTING AVERAGE CONVERGENCE MAX 
def Get_AvgConvergence(t):

    timeString = ModelData.timeStrings[t]
    outputDataDirectory=os.path.normpath(os.path.join(DataManager.outputDataDirectory,"..","Eulerian_CLTracking"))
    Dictionary = TrackingAlgorithms_DataLoading_Class.LoadData(ModelData, DataManager, timeString,
                     dataName="Eulerian_CLTracking",outputDataDirectory=outputDataDirectory,printstatement=False)
    avgConvergence = Dictionary["avgConvergence"]
    return avgConvergence
    
def find_SBF_xmaxs():
    xmaxs=[]
    for t in range(ModelData.Ntime)[1:]:
        if t == 0:
            avgConvergence_max=np.nan
        else:
            avgConvergence = Get_AvgConvergence(t)
            avgConvergence_max=np.max(avgConvergence)
            xmax = np.where(avgConvergence==avgConvergence_max)[0][0]
            xmaxs.append(xmax)
    return xmaxs
xmaxs=find_SBF_xmaxs()

In [None]:
#############################################
#RUNNING FUNCTIONS

In [None]:
def CollectData(trackedArray):

    #numerical info
    xh = ModelData.xh-ModelData.xh[0]
    kms=np.argmax(ModelData.xh-ModelData.xh[0] >= 1)

    #getting parcel index and time
    ps = trackedArray[:,0]
    ts = trackedArray[:,1]

    # sort by time
    sort_idx = np.argsort(ts)
    ts_sorted = ts[sort_idx]
    ps_sorted = ps[sort_idx]

    #initializing lists
    X_List = []
    Xdiff_List = []
    QV_List = []
    THETA_v_List = []

    #time cache (to avoid redundant looping
    previous_t = None
    
    #running through each parcel
    for t, p in tqdm(
        zip(ts_sorted, ps_sorted),
        total=len(ts_sorted),
        desc="Processing timesteps"):

        #X and VARS loading
        if t != previous_t:
            timeString = ModelData.timeStrings[t]
            X_t = CallLagrangianArray(ModelData, DataManager, timeString, 'X')
            VARS=Get_LagrangianArrays(t,varNames=["QV","THETA_v"])
            QV_t = VARS["QV"]
            THETA_v_t = VARS["THETA_v"]
            previous_t = t

        #DISTANCE METRICS
        XParcel_t = X_t[p]
        XParcel_t = xh[XParcel_t] #converting to kms
    
        #getting index distance from sea-breeze
        Xdiff = XParcel_t - xmaxs[t]
        Xdiff *= kms #converting to kms
    
        #appending results to list
        X_List.append(XParcel_t)
        Xdiff_List.append(Xdiff)


        #VARIABLES
        QVParcel_t = QV_t[p]
        THETA_vParcel_t = THETA_v_t[p]
        
        QV_List.append(QVParcel_t)
        THETA_v_List.append(THETA_vParcel_t)
        
    return X_List,Xdiff_List, QV_List,THETA_v_List

In [None]:
def RunAllParcelTypes():
    results = {}
    
    for outer_key, inner_dict in trackedArrays.items():          # e.g. "CL"
        results[outer_key] = {}
        for inner_key, trackedArray in inner_dict.items():       # e.g. "DEEP"
            print(f"\nRunning CollectData for {outer_key} - {inner_key}")
    
            if trackedArray is None or len(trackedArray) == 0:
                print(f"  Skipping {outer_key}-{inner_key}: empty array")
                continue
    
            X_List, Xdiff_List, QV_List,THETA_V_List = CollectData(trackedArray)
    
            # store results in nested dict
            results[outer_key][inner_key] = {
                "X_List": X_List,
                "Xdiff_List": Xdiff_List,
                "QV_List": QV_List,
                "THETA_V_List": THETA_V_List
            }
    return results

In [None]:
def LoadorRun():
    """
    Loads the tracked parcel results from a pickle file if it exists;
    otherwise runs RunAllParcelTypes() and saves the output.
    """
    fileName = f"Tracked_Histogram_Output_{ModelData.res}_{ModelData.t_res}_{ModelData.Nzh}nz.pkl"
    filePath = os.path.join(codeDirectory, fileName)

    if os.path.exists(filePath):
        # Load existing results
        with open(filePath, "rb") as f:
            results = pickle.load(f)
        print(f"Loaded results from {filePath}")
    else:
        # Run function and save new results
        print(f"No pickle file found, running RunAllParcelTypes()...")
        results = RunAllParcelTypes()

        with open(filePath, "wb") as f:
            pickle.dump(results, f)
        print(f"Saved results to {filePath}")

    return results

In [None]:
#############################################
#RUNNING

In [None]:
# ============================================================
# Get_LagrangianArrays_Function
# ============================================================

def Get_LagrangianArrays(t, dataType="VARS", dataName="VARS", varNames=["W"]):
    res = ModelData.res
    t_res = ModelData.t_res
    Nz_str = ModelData.Nz_str
    inputDirectory = os.path.join(DataManager.inputDirectory,
                                  "..","LagrangianArrays",
                                  f"{res}_{t_res}_{Nz_str}nz", dataType)
    timeString = ModelData.timeStrings[t]

    FileName = os.path.join(inputDirectory, f"{dataName}_{res}_{t_res}_{Nz_str}nz_{timeString}.h5")

    dataDictionary = {}
    with h5py.File(FileName, 'r') as f:
        # print("Keys in file:", list(f.keys()))
        for key in varNames:
            dataDictionary[key] = f[key][:]
            # print(f"{key}: shape = {dataDictionary[key].shape}, dtype = {dataDictionary[key].dtype}")
    return dataDictionary

In [None]:
results = LoadorRun()

In [None]:
#############################################
#PLOTTING FUNCTIONS

In [None]:
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = ["DejaVu Sans", "Helvetica", "Arial"]
plt.rcParams["axes.titlesize"] = 8
plt.rcParams["axes.titleweight"] = "normal"   # no bold
plt.rcParams["axes.labelsize"] = 7            # smaller axis labels
plt.rcParams["xtick.labelsize"] = 10           # smaller tick labels
plt.rcParams["ytick.labelsize"] = 10

In [None]:
from scipy.stats import gaussian_kde

def PlotHistogram(axis, dataList, xlabel, bins=50, color='steelblue', title=None):
    """
    Plot a polished histogram with a continuous KDE overlay (scaled to counts).
    The KDE line is labeled '_ignore_snap_' so it is ignored by SnapLimitsToTicks().
    """

    # ensure NumPy array for consistency
    data = np.array(dataList)

    # plot histogram and capture bin edges and counts
    counts, bin_edges, _ = axis.hist(
        data,
        bins=bins,
        color=color,
        edgecolor='black',
        alpha=0.7
    )

    ################################################################
    # calculate KDE (probability density function)
    kde = gaussian_kde(data)

    # build fine x grid for smooth curve across histogram domain
    x_vals = np.linspace(bin_edges[0], bin_edges[-1], 400)

    # KDE gives probability density (area=1)
    # → scale it to match histogram counts
    bin_width = bin_edges[1] - bin_edges[0]
    kde_scaled = kde(x_vals) * len(data) * bin_width

    # plot continuous KDE line
    axis.plot(
        x_vals, kde_scaled,
        color='blue',
        linewidth=1.8,
        zorder=10,
        label="_ignore_snap_"  # ensures your SnapLimitsToTicks() ignores it
    )
    ################################################################

    # labels and title
    axis.set_xlabel(xlabel, fontsize=10)
    axis.set_ylabel("Count", fontsize=10)
    if title:
        axis.set_title(title, fontsize=10, pad=10)

    # grid and layout
    axis.grid(True, linestyle='--', alpha=0.4)


In [None]:
#############################################
#SINGLE PLOTTING FUNCTIONS

In [None]:
# def PlotDistancesFunction(parcel_type):
#     # choose which outer key to plot
#     ptype = parcel_type
#     depth_types = ["ALL", "SHALLOW", "DEEP"]
    
#     # set up figure (2 rows × 3 columns)
#     fig = plt.figure(figsize=(12, 8))
#     gs  = gridspec.GridSpec(2, len(depth_types), figure=fig,
#                             wspace=0.3, hspace=0.35)
    
#     # loop through depth types
#     for j, depth in enumerate(depth_types):
#         # first row: X_List
#         ax_top = fig.add_subplot(gs[0, j])
#         if ptype in results and depth in results[ptype]:
#             data_x = results[ptype][depth]["X_List"]
#             data_x_mean = np.mean(data_x)
#             PlotHistogram(ax_top, data_x,
#                           xlabel="X distance from left side (km)",
#                           title=f"{ptype} – {depth}\n" 
#                           + r"$\mu$ = %.2f km" % data_x_mean)
    
#         # second row: Xdiff_List
#         ax_bottom = fig.add_subplot(gs[1, j])
#         if ptype in results and depth in results[ptype]:
#             data_xdiff = results[ptype][depth]["Xdiff_List"]
#             data_xdiff_mean = np.mean(data_xdiff)
#             PlotHistogram(ax_bottom, data_xdiff,
#                           xlabel="X distance from SBF (km)",
#                           title=f"{ptype} – {depth}\n"
#                                 + r"$\mu$ = %.2f km" % data_xdiff_mean)
#         else:
#             continue
 
#     fig.subplots_adjust(left=0.07, right=0.97,   
#                         bottom=0.08, top=0.90,
#                         wspace=0.35, hspace=0.35)
#     return fig

In [None]:
def PlotVariablesFunction(parcel_type):
    # choose which outer key to plot
    ptype = parcel_type
    depth_types = ["ALL", "SHALLOW", "DEEP"]
    
    # set up figure (2 rows × 3 columns)
    fig = plt.figure(figsize=(12, 8))
    gs  = gridspec.GridSpec(2, len(depth_types), figure=fig,
                            wspace=0.3, hspace=0.35)
    
    # loop through depth types
    for j, depth in enumerate(depth_types):
        # first row: QV
        ax_top = fig.add_subplot(gs[0, j])
        if ptype in results and depth in results[ptype]:
            data_x = results[ptype][depth]["QV_List"]
            data_x = np.array(data_x)*1e3
            data_x_mean = np.mean(data_x)
            PlotHistogram(ax_top, data_x,
                          xlabel="qv (g/kg)",
                          title=f"{ptype} – {depth}\n"
                                + r"$\mu$ = %.2f g/kg" % data_x_mean)
    
        # second row: TH
        ax_bottom = fig.add_subplot(gs[1, j])
        if ptype in results and depth in results[ptype]:
            data_x = results[ptype][depth]["THETA_V_List"]
            data_x_mean = np.mean(data_x)
            PlotHistogram(ax_bottom, data_x,
                          xlabel="th_v (K)",
                          title=f"{ptype} – {depth}\n"
                                + r"$\mu$ = %.2f K" % data_x_mean)
        else:
            continue
 
    fig.subplots_adjust(left=0.07, right=0.97,   
                        bottom=0.08, top=0.90,
                        wspace=0.35, hspace=0.35)
    return fig

In [None]:
#############################################
#PLOTTING

In [None]:
# parcel_types = ["CL", "nonCL", "SBF", "nonSBF"]
# for parcel_type in parcel_types:
#     fig = PlotDistancesFunction(parcel_type)

#     #saving
#     fileName=f"Tracked_Histograms_Distances_{parcel_type}" 
#     SaveFigure(fig,plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",fileName=fileName)

In [None]:
# parcel_types = ["CL", "nonCL", "SBF"]
# for parcel_type in parcel_types:
#     fig = PlotVariablesFunction(parcel_type)

#     #saving
#     fileName=f"Tracked_Histograms_Variables_{parcel_type}" 
#     SaveFigure(fig,plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",fileName=fileName)

In [None]:
#############################################
#COMBINED PLOTTING FUNCTIONS

In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
import numpy as np

def PlotAllHistograms_Distances(parcel_types, results):
    """
    Creates a single figure with 4 parcel-type blocks:
      [CL, nonCL]
      [SBF, nonSBF]
    Each block contains a 2×3 grid of subplots (ALL, SHALLOW, DEEP).
    """

    depth_types = ["ALL", "SHALLOW", "DEEP"]

    # 2×2 outer grid for parcel-type groups
    fig = plt.figure(figsize=(14, 10))
    outer_gs = gridspec.GridSpec(2, 2, figure=fig, wspace=0.25, hspace=0.45)

    # map parcel types into positions
    parcel_map = {
        (0, 0): "CL",
        (0, 1): "nonCL",
        (1, 0): "SBF",
        (1, 1): "nonSBF"
    }

    # loop through outer 2×2 positions
    for (r, c), ptype in parcel_map.items():
        inner_gs = gridspec.GridSpecFromSubplotSpec(
            2, len(depth_types), subplot_spec=outer_gs[r, c],
            wspace=0.4, hspace=0.6  # <-- more breathing room between rows
        )

        # loop through 2×3 subplots inside each parcel-type block
        for j, depth in enumerate(depth_types):
            # --- TOP ROW: X_List ---
            ax_top = fig.add_subplot(inner_gs[0, j])
            if ptype in results and depth in results[ptype]:
                data_x = results[ptype][depth]["X_List"]
                data_x_mean = np.mean(data_x)
                PlotHistogram(
                    ax_top, data_x,
                    xlabel="X distance (km)",
                    title=f"{ptype} – {depth}\n"
                          + r"$\mu$ = %.2f km" % data_x_mean
                )
                ax_top.axvline((ModelData.xf-ModelData.xf[0])[-1]*1/4,color='blue')
                ax_top.set_xlim(left=0, right=(ModelData.xf-ModelData.xf[0])[-1])
                

            # --- BOTTOM ROW: Xdiff_List ---
            ax_bottom = fig.add_subplot(inner_gs[1, j])
            if ptype in results and depth in results[ptype]:
                data_xdiff = results[ptype][depth]["Xdiff_List"]
                data_xdiff_mean = np.mean(data_xdiff)
                PlotHistogram(
                    ax_bottom, data_xdiff,
                    xlabel="X distance from SBF (km)",
                    title=r"$\mu$ = %.2f km" % data_xdiff_mean
                )
                ax_bottom.axvline(0,color='black')
                if ptype != "SBF":
                    halflength = (ModelData.xf-ModelData.xf[0])[-1]/2
                    ax_bottom.set_xlim(left=-halflength, right=halflength)
                elif ptype == "SBF":
                    ax_bottom.set_xlim(left=-10.0, right=10.0)

    # Adjust overall layout to prevent overlap
    fig.subplots_adjust(left=0.06, right=0.97, bottom=0.06, top=0.94)
    return fig


In [None]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
import numpy as np

def PlotAllHistograms_Variables(parcel_types, results):
    """
    Creates a single figure with 4 parcel-type blocks:
      [CL, nonCL]
      [SBF, nonSBF]
    Each block contains a 2×3 grid of subplots (ALL, SHALLOW, DEEP)
    for QV (top) and THv (bottom).
    """

    depth_types = ["ALL", "SHALLOW", "DEEP"]

    # 2×2 outer grid for parcel-type groups
    fig = plt.figure(figsize=(14, 10))
    outer_gs = gridspec.GridSpec(2, 2, figure=fig, wspace=0.25, hspace=0.45)

    # map parcel types into positions
    parcel_map = {
        (0, 0): "CL",
        (0, 1): "nonCL",
        (1, 0): "SBF",
        (1, 1): "nonSBF"
    }

    # loop through outer 2×2 positions
    for (r, c), ptype in parcel_map.items():
        inner_gs = gridspec.GridSpecFromSubplotSpec(
            2, len(depth_types), subplot_spec=outer_gs[r, c],
            wspace=0.4, hspace=0.55
        )

        # loop through depth types inside each 2×3 block
        for j, depth in enumerate(depth_types):

            # --- TOP ROW: QV (g/kg) ---
            ax_top = fig.add_subplot(inner_gs[0, j])
            if ptype in results and depth in results[ptype]:
                data_qv = np.array(results[ptype][depth]["QV_List"]) * 1e3
                data_qv_mean = np.mean(data_qv)
                PlotHistogram(ax_top, data_qv,
                              xlabel=r"$q_v$ (g/kg)",
                              title=f"{ptype} – {depth}\n"
                              + r"$\mu$ = %.2f g kg$^{-1}$" % data_qv_mean)

            # --- BOTTOM ROW: THv (K) ---
            ax_bottom = fig.add_subplot(inner_gs[1, j])
            if ptype in results and depth in results[ptype]:
                data_th = np.array(results[ptype][depth]["THETA_V_List"])
                data_th_mean = np.mean(data_th)
                PlotHistogram(ax_bottom, data_th,
                              xlabel=r"$\theta_v$ (K)",
                              title=r"$\mu$ = %.2f K" % data_th_mean)
    # Global layout
    fig.subplots_adjust(left=0.06, right=0.97, bottom=0.06, top=0.94)
    return fig

In [None]:
#############################################
#PLOTTING

In [None]:
parcel_types = ["CL", "nonCL", "SBF", "nonSBF"]
fig = PlotAllHistograms_Distances(parcel_types, results)

axes = fig.get_axes()
EvenTicksToLimits(axes, dim='x', n_ticks=4)
SnapLimitsToTicks(axes, dim='y')

SaveFigure(
    fig,
    plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",
    fileName="Tracked_Histograms_Distances"
)

In [None]:
parcel_types = ["CL", "nonCL", "SBF", "nonSBF"]
fig = PlotAllHistograms_Variables(parcel_types, results)

axes = fig.get_axes()
MatchAxisLimits(fig.axes[::2], dim='x')
MatchAxisLimits(fig.axes[1::2], dim='x')
EvenTicksToLimits(axes, dim='x', n_ticks=4)
SnapLimitsToTicks(axes, dim='y')

SaveFigure(
    fig,
    plotType="Project_Algorithms/Tracking_Algorithms/Tracked_Histograms",
    fileName="Tracked_Histograms_Variables"
)