In [None]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
from matplotlib.gridspec import GridSpec
from matplotlib.lines import Line2D
import xarray as xr
import os; import time
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
import pickle
import h5py

In [None]:
#MAIN DIRECTORIES
mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
scratchDirectory='/home/air673/koa_scratch/'
codeDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Project_Algorithms/Domain_Profiles'

In [None]:
#LOADING DATA
def GetDataDirectories(simulationNumber):
    if simulationNumber == 1:
        Directory=os.path.join(mainDirectory,'Model/cm1r20.3/run')
        res='1km'; t_res='5min'; Np_str='1e6'; Nz_str='34'
    elif simulationNumber == 2:
        Directory=scratchDirectory
        res='1km'; t_res='1min'; Np_str='50e6'; Nz_str='95'
    elif simulationNumber == 3:
        Directory=scratchDirectory
        res='250m'; t_res='1min'; Np_str='50e6'; Nz_str='95'
        
    dataDirectory = os.path.join(Directory, f"cm1out_{res}_{t_res}_{Nz_str}nz.nc")
    parcelDirectory = os.path.join(Directory,f"cm1out_pdata_{res}_{t_res}_{Np_str}np.nc")
    return dataDirectory, parcelDirectory, res,t_res,Np_str,Nz_str
    
def GetData(dataDirectory, parcelDirectory):
    dataNC = xr.open_dataset(dataDirectory, decode_timedelta=True) 
    parcelNC = xr.open_dataset(parcelDirectory, decode_timedelta=True) 
    return dataNC,parcelNC

def SubsetDataVars(dataNC):
    varList = ["thflux", "qvflux", "tsk", "cape", 
               "cin", "lcl", "lfc", "th",
               "prs", "rho", "qv", "qc",
               "qr", "qi", "qs","qg", 
               "buoyancy", "uinterp", "vinterp", "winterp",]
    
    varList += ["ptb_hadv", "ptb_vadv", "ptb_hidiff", "ptb_vidiff",
                "ptb_hturb", "ptb_vturb", "ptb_mp", "ptb_rdamp", 
                "ptb_rad", "ptb_div", "ptb_diss",]
    
    varList += ["qvb_hadv", "qvb_vadv", "qvb_hidiff", "qvb_vidiff", 
                "qvb_hturb", "qvb_vturb", "qvb_mp",]
    
    varList += ["wb_hadv", "wb_vadv", "wb_hidiff", "wb_vidiff",
                "wb_hturb", "wb_vturb", "wb_pgrad", "wb_rdamp", "wb_buoy",]

    return dataNC[varList]

[dataDirectory,parcelDirectory, res,t_res,Np_str,Nz_str] = GetDataDirectories(simulationNumber=1)
[data1,parcel1] = GetData(dataDirectory, parcelDirectory)

In [None]:
dir='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'

In [None]:
#########################################

In [None]:
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
path=dir2+'Functions/'
sys.path.append(path)

import NumericalFunctions
from NumericalFunctions import * # import NumericalFunctions 
import PlottingFunctions
from PlottingFunctions import * # import PlottingFunctions

# # Get all functions in NumericalFunctions
# import inspect
# functions = [f[0] for f in inspect.getmembers(NumericalFunctions, inspect.isfunction)]
# functions

#####

#Import StatisticalFunctions 
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
path=dir2+'Functions/'
sys.path.append(path)

import StatisticalFunctions
from StatisticalFunctions import * # import NumericalFunctions 

In [None]:
##########################################################################################
#Plotting
plotting=False #KEEP FALSE WHEN JOB ARRAY IS RUNNING
plotting=True

In [None]:
#READING BACK IN
# import pickle
# dir2 = dir + f'Project_Algorithms/Domain_Profiles/'
# input_file = dir2 + f'mean_lfc_{res}_{t_res}_{Np_str}.pkl'

# with open(input_file, 'rb') as f:
#     mean_LFC = pickle.load(f)
# print(mean_LFC)

def LoadMeanLFC():
    dir2 = dir + f'Project_Algorithms/Tracking_Algorithms/OUTPUT/'
    in_file = dir2 + f"MeanLFC_{res}_{t_res}_{Np_str}.pkl"
    with open(in_file, 'rb') as f:
        MeanLFC = pickle.load(f)
    return MeanLFC
MeanLFC=LoadMeanLFC()
mean_LFC=MeanLFC
print(f"Mean LFC is: {MeanLFC}\n")


def LoadAllCloudBase():
    dir2 = dir + f'Project_Algorithms/Tracking_Algorithms/OUTPUT/'
    in_file = dir2 + f"all_cloudbase_{res}_{t_res}_{Np_str}.pkl"
    with open(in_file, 'rb') as f:
        all_cloudbase = pickle.load(f)
    return(all_cloudbase)
min_all_cloudbase=np.nanmin(LoadAllCloudBase())
cloudbase=min_all_cloudbase
print(f"Minimum Cloudbase is: {cloudbase}\n")

In [None]:
#thresholds
w_thresh1=0.1
w_thresh2=0.5
qcqi_thresh=1e-6

In [None]:
import numpy as np
import h5py
import os

def LoadProfiles(type, vars):
    global factor
    dir2 = os.path.join(dir, 'Project_Algorithms/Domain_Profiles/OUTPUT/')
    if type == "general":
        input_file = os.path.join(dir2, f'general_eulerian_profiles_{res}_{t_res}_{Np_str}.h5')
    elif type == "cloudy":
        input_file = os.path.join(dir2, f'cloudy_eulerian_profiles_{res}_{t_res}_{Np_str}.h5')
    else:
        raise ValueError("type must be either 'general' or 'cloudy'")

    with h5py.File(input_file, 'r') as f:
        for var in vars:
            globals()[f"profile_{type}_{var}"] = np.array(f[f"profile_{var}"])
            try:
                # If 'profile_{var}_squares' exists, use SE or SD function
                # Uncomment and define the appropriate function as needed:
                globals()[f"profile_{type}_{var}_SE"] = ProfileStandardError(globals()[f"profile_{type}_{var}"], np.array(f[f"profile_{var}_squares"])); factor = 1.96
                # globals()[f"profile_{type}_{var}_SE"] = ProfileStandardDeviation(globals()[f"profile_{type}_{var}"], np.array(f[f"profile_{var}_squares"])); factor=1
                pass
            except KeyError:
                # Create dummy SE array if not available
                dummy = globals()[f"profile_{type}_{var}"].copy()
                dummy[:, 0] = 0
                dummy = dummy[dummy[:, 1] > 1]
                globals()[f"profile_{type}_{var}_SE"] = dummy


In [None]:
def SavePlot(fig, res, t_res, Np_str, dir2, limit_y=False):
    # Define output directory
    subdir_name = f'{res}_{t_res}_{Np_str}'
    output_dir = os.path.join(dir2, 'Project_Algorithms', 'Domain_Profiles', 'PLOTS', 'Updraft_Properties', subdir_name)
    
    # Create the directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Choose file name
    if not limit_y:
        filename = f'Property_Updraft_Profiles_{res}_{t_res}_{Np_str}.jpg'
    else:
        filename = f'SLICE/Property_Updraft_Profiles_{res}_{t_res}_{Np_str}_SLICE.jpg'
    
    save_path = os.path.join(output_dir, filename)
    
    # Save the figure
    fig.savefig(save_path, bbox_inches='tight', dpi=300)
    print(f"Saved figure to: {save_path}")

In [None]:
def PlotProfiles(vars, xlabels, title):

    n_vars = len(vars)

    # Determine grid size: try to make it as square as possible
    n_cols = int(np.ceil(np.sqrt(n_vars)))
    n_rows = int(np.ceil(n_vars / n_cols))

    fig, axs = plt.subplots(n_rows, n_cols, figsize=(5*n_cols, 4*n_rows))
    fig.suptitle(title)

    # Flatten axes array for easy indexing
    if n_vars == 1:
        axs = np.array([axs])  # Make it iterable even if only one axis
    else:
        axs = axs.flatten()

    for type in ["general", "cloudy"]:
        print(f'currently on type {type}')

        #FIXING INCLUDE BOTH VMF_g and VMF_c
        if type=='cloudy':
            vars[vars.index('VMF_g')]='VMF_c'

        #LOADING VARIABLES IN
        LoadProfiles(type, vars)

        #Plotting function
        def plotting_func(out_var, axis, label, color):
            axis.plot(out_var[:, 0], out_var[:, 1], label=label, color=color)
            axis.grid(True)

        #TAKING THE AVERAGE OF THE STORED PROFILE
        for var in vars:
            globals()[f"out_{type}_{var}"] = ProfileMean(globals()[f"profile_{type}_{var}"])

        for var, axis, xlabel in zip(vars, axs, xlabels):
            color = 'black' if type == 'general' else 'blue'

            # converts qv and qc from kg/kg=>g/kg
            if var in ['qv', 'qc_plus_qi', 'HMC']:
                globals()[f"out_{type}_{var}"][:, 0] *= 1000
                globals()[f"profile_{type}_{var}_SE"][:, 0] *= 1000

            if var in ['MSE']:
                cpd=1005.7
                #should divide by cpd + r_T*cl, but r_T is not available here
                #assume r_T is on average < 1e-3 g/kg 
                #so cl term would only contribute < 4 to the divisor
                globals()[f"out_{type}_{var}"][:, 0] /= cpd
                globals()[f"profile_{type}_{var}_SE"][:, 0] /= cpd

            profile = globals()[f"out_{type}_{var}"]
            profile_SE = globals()[f"profile_{type}_{var}_SE"]
            plotting_func(profile, axis, label=type+' ', color=color)
            axis.fill_betweenx(profile[:, 1],
                               profile[:, 0] - factor*profile_SE[:, 0],
                               profile[:, 0] + factor*profile_SE[:, 0],
                               color=color, alpha=0.1)
            axis.set_ylabel('z (km)')
            axis.set_xlabel(xlabel)

            if axis == axs[1]:  # equivalent to your ax2
                axis.legend()

    #FIXING TICKS
    SnapLimitsToTicks(axs, dim='x')
    for axis in axs:
        axis.set_ylim(0,20)

    #FIXING XLIMS
    ax_lst=[axs[3],axs[4]]
    MatchAxisLimits(ax_lst,dim='x')
    for axis in ax_lst:
        axis.set_xlim(left=0)

    #SCIENTIFIC NOTATION XLABEL
    apply_scientific_notation([axs[5]],decimals=2)
    apply_scientific_notation(axs[0:4+1],decimals=2,scientific=False)
    apply_scientific_notation(axs[6:],decimals=1,scientific=False)
    
    # # Hide any unused subplots
    # for ax in axs[n_vars:]:
    #     ax.set_visible(False)

    # fig.tight_layout() #incompatible with SnapLimitsToTIcks
    fig.subplots_adjust(top=0.92, wspace=0.4, hspace=0.4)

    # MEAN CLOUD BASE
    axline_lw=1.1
    for axis in axs:
        axis.axhline(cloudbase, color='purple', linestyle='dashed', lw=axline_lw)
        axis.axhline(mean_LFC/1000, color='green', linestyle='dashed', lw=axline_lw)

    #ADDING AXVLINES
    for ax in axs[3:5+1]:
        ax.axvline(0,color='gray',linestyle='dashed',zorder=-10)


    #SAVING
    SavePlot(fig, res, t_res, Np_str, dir2)

In [None]:
# vars=['w','qv','qc_plus_qi','th','th_v','th_e','RH_vapor','RH_ice','buoyancy_cm1','buoyancy_full','buoyancy_full_each_t','HMC'] 
# xlabels=['w (m/s)',r'$q_v$ (g/kg)',r'$q_c + q_i$ (g/kg)',r'$\theta$ (K)',r'$\theta_v$ (K)',r'$\theta_e$ (K)',r'$RH_v$ (fraction)',r'$RH_i$ (fraction)',r'$B_{cm1}$ (m/s/s)',r'$B_{full}$ (m/s/s)', r'$B_{full\_each\_t}$ (m/s/s)', 'HMC (g/kg/s)'] 

vars=['qv','qc_plus_qi','RH_vapor',
      'w','VMF_g','HMC',
      'th_v','th_e','MSE'] 
xlabels = [
    r'$q_v$ $\mathrm{(g/kg)}$', 
    r'$q_c + q_i$ $\mathrm{(g/kg)}$', 
    r'$RH_v$ (fraction)',
    
    r'$w$ $\mathrm{(m/s)}$', 
    r'VMF $\mathrm{(kg/m^2/s)}$',
    r'HMC $\mathrm{(g/kg/s)}$', 
    
    r'$\theta_v$ $\mathrm{(K)}$', 
    r'$\theta_e$ $\mathrm{(K)}$', 
    r'$\mathrm{MSE}/c_{pd}$ $\mathrm{(K)}$', 
]

In [None]:
#RUNNING PLOTTING FUNCTION
title=f"Average General vs Cloudy Updraft Property Profiles (Eulerian)"
PlotProfiles(vars,xlabels,title)

In [None]:
# import numpy as np
# import matplotlib.pyplot as plt

# def averaged_profiles(profile):
#     """Return average profile by normalizing by count and filtering"""
#     out_var = profile[(profile[:, 1] > 1)]  # remove rows with no data
#     out_var = np.array([out_var[:, 0] / out_var[:, 1], out_var[:, 2]]).T
#     return out_var

# def get_data(type_, var_list, dir, res, t_res, Np_str):
#     """Load profiles for given type and variables from HDF5 files"""
#     dir2 = dir + 'Project_Algorithms/Domain_Profiles/OUTPUT/'
#     input_file = dir2+f"{type_}_eulerian_profiles_{res}_{t_res}_{Np_str}.h5"
    
#     data_dict = {}
#     with h5py.File(input_file, 'r') as f:
#         for var in var_list:
#             data_dict[f"profile_{var}"] = np.array(f[f"profile_{var}"])
#     return data_dict

# def average_difference(array1, array2):
#     """Compute difference between averaged profiles where z matches"""
#     out_var_one = averaged_profiles(array1)
#     out_var_two = averaged_profiles(array2)

#     # mask to keep matching heights
#     mask_one = np.isin(out_var_one[:, 1], out_var_two[:, 1])
#     mask_two = np.isin(out_var_two[:, 1], out_var_one[:, 1])

#     out_var_one = out_var_one[mask_one]
#     out_var_two = out_var_two[mask_two]

#     diff = out_var_one[:, 0] - out_var_two[:, 0]
#     zs = out_var_one[:, 1]

#     out_profile = np.zeros((len(diff), 2))
#     out_profile[:, 0] = diff
#     out_profile[:, 1] = zs
#     return out_profile

# def diff_plotting(vars,xlabels,title):
#     """Return cloudy minus general updraft difference plots"""
#     # Setup figure and axes
#     n_vars = len(vars)
#     n_cols = int(np.ceil(np.sqrt(n_vars)))
#     n_rows = int(np.ceil(n_vars / n_cols))
#     fig, axs = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
#     fig.suptitle(title)

#     if n_vars == 1:
#         axs = np.array([axs])
#     else:
#         axs = axs.flatten()

#     # Load data once per type to avoid repeated I/O
#     data_cloudy = get_data("cloudy", vars, dir, res, t_res, Np_str)
#     data_general = get_data("general", vars, dir, res, t_res, Np_str)

#     for var, axis, xlabel in zip(vars, axs, xlabels):
#         print(var)
#         out_var_one = data_cloudy[f"profile_{var}"]
#         out_var_two = data_general[f"profile_{var}"]

#         # Cutoff indices where cloudy count > 10 (optional)
#         cutofflow_idx = np.where(out_var_one[:, 1] > 10)[0][0]
#         cutoffhigh_idx = np.where(out_var_one[:, 1] > 10)[0][-1]

#         # Compute average difference profile
#         out_var_diff = average_difference(out_var_one, out_var_two)

#         # Convert units for some variables (kg/kg to g/kg)
#         if var in ['qv', 'qc_plus_qi', 'qi', 'HMC']:
#             out_var_diff[:, 0] *= 1000
#         if var in ['MSE']:
#             cpd=1005.7
#             #should divide by cpd + r_T*cl, but r_T is not available here
#             #assume r_T is on average < 1e-3 g/kg 
#             #so cl term would only contribute < 4 to the divisor
#             out_var_diff[:, 0] /= cpd

#         axis.plot(out_var_diff[:, 0], out_var_diff[:, 1], color='black')
#         axis.axvline(0, color='black', linestyle='dashed')

#         axis.set_ylabel('z (km)')
#         axis.set_xlabel(xlabel)
#         axis.grid(True)

#         apply_scientific_notation([axis])

#     # Hide unused axes
#     for ax in axs[n_vars:]:
#         ax.set_visible(False)

#     # #FIXING BUOYANCY XLABELS
#     # ax_lst=[axs[8],axs[9],axs[10]]
#     # MatchAxisLimits(ax_lst,dim='x')

#     # MEAN CLOUD BASE
#     axline_lw=1.1
#     for axis in axs:
#         axis.axhline(cloudbase, color='purple', linestyle='dashed', lw=axline_lw)
#         axis.axhline(mean_LFC/1000, color='green', linestyle='dashed', lw=axline_lw)

#     plt.tight_layout()
#     # plt.savefig(dir + f'Project_Algorithms/plots/eulerian_updraft_profiles_diff_{res}_{t_res}_{Np_str}.jpg', bbox_inches='tight', dpi=300)

In [None]:
# title=f"Average Cloudy - General Updraft Property Difference Profiles (Eulerian)"
# diff_plotting(vars,xlabels,title)