In [None]:
#Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr
import os; import time
import pickle
import h5py

In [None]:
#MAIN DIRECTORIES
mainDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/'
scratchDirectory='/home/air673/koa_scratch/'
codeDirectory='/mnt/lustre/koa/koastore/torri_group/air_directory/Projects/DCI-Project/Variable_Calculation/CalculateMoreVariables'

In [None]:
#LOADING DATA
def GetDataDirectories(simulationNumber):
    if simulationNumber == 1:
        Directory=os.path.join(mainDirectory,'Model/cm1r20.3/run')
        res='1km'; t_res='5min'; Np_str='1e6'; Nz_str='34'
    elif simulationNumber == 2:
        Directory=scratchDirectory
        res='1km'; t_res='1min'; Np_str='50e6'; Nz_str='95'
    elif simulationNumber == 3:
        Directory=scratchDirectory
        res='250m'; t_res='1min'; Np_str='50e6'; Nz_str='95'
        
    dataDirectory = os.path.join(Directory, f"cm1out_{res}_{t_res}_{Nz_str}nz.nc")
    parcelDirectory = os.path.join(Directory,f"cm1out_pdata_{res}_{t_res}_{Np_str}np.nc")
    return dataDirectory, parcelDirectory, res,t_res,Np_str,Nz_str
    
def GetData(dataDirectory, parcelDirectory):
    dataNC = xr.open_dataset(dataDirectory, decode_timedelta=True) 
    parcelNC = xr.open_dataset(parcelDirectory, decode_timedelta=True) 
    return dataNC,parcelNC

def SubsetDataVars(dataNC):
    varList = ["thflux", "qvflux", "tsk", "cape", 
               "cin", "lcl", "lfc", "th",
               "prs", "rho", "qv", "qc",
               "qr", "qi", "qs","qg", 
               "buoyancy", "uinterp", "vinterp", "winterp",]
    
    varList += ["ptb_hadv", "ptb_vadv", "ptb_hidiff", "ptb_vidiff",
                "ptb_hturb", "ptb_vturb", "ptb_mp", "ptb_rdamp", 
                "ptb_rad", "ptb_div", "ptb_diss",]
    
    varList += ["qvb_hadv", "qvb_vadv", "qvb_hidiff", "qvb_vidiff", 
                "qvb_hturb", "qvb_vturb", "qvb_mp",]
    
    varList += ["wb_hadv", "wb_vadv", "wb_hidiff", "wb_vidiff",
                "wb_hturb", "wb_vturb", "wb_pgrad", "wb_rdamp", "wb_buoy",]

    return dataNC[varList]

[dataDirectory,parcelDirectory, res,t_res,Np_str,Nz_str] = GetDataDirectories(simulationNumber=1)
[data,parcel] = GetData(dataDirectory, parcelDirectory)

In [None]:
dir='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'

In [None]:
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
path=dir2+'../Functions/'
sys.path.append(path)

import NumericalFunctions
from NumericalFunctions import * # import NumericalFunctions 
import PlottingFunctions
from PlottingFunctions import * # import PlottingFunctions

# # Get all functions in NumericalFunctions
# import inspect
# functions = [f[0] for f in inspect.getmembers(NumericalFunctions, inspect.isfunction)]
# functions

#####

#Import StatisticalFunctions 
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
path=dir2+'../Functions/'
sys.path.append(path)

import StatisticalFunctions
from StatisticalFunctions import * # import NumericalFunctions 

In [None]:
#LOAD VARIABLES
################################################################################
def LoadData(data_t,horiz_avg):
    #LOADING TERMS
    rho=data_t['rho'].data

    #MAKING MEAN TERMS
    if horiz_avg==False:
        rho0 = data['rho'].isel(time=0).mean(dim=('xh', 'yh')).data[:, np.newaxis, np.newaxis]
    elif horiz_avg==True:
        #using horizontal average at each timestep
        rho_mean = np.mean(rho, axis=(1, 2), keepdims=True)   # shape (z, 1, 1)

    if horiz_avg==False:
        #using first timestep single column, as in cm1
        rho_mean = rho0.copy()

    #MAKING PERTURBATION TERMS
    rho_prime = rho - rho_mean
    
    return rho_prime,rho_mean

In [None]:
def GetTrueBuoyancy(data, data_t):
    # CM1 MODEL OUTPUT BUOYANCY (TH_V approximation + extra approximations)
    buoyancy_cm1 = data_t['buoyancy'].data  # Use model output directly

    # FULL BUOYANCY CALCULATION
    g = 9.81
    local_vars = {}
    for horiz_avg in [False, True]:  # using horizontal average at first timestep OR at each timestep
        [rho_prime, rho_mean] = LoadData(data_t, horiz_avg)
        buoyancy = -g * rho_prime / rho_mean

        var_name = 'buoyancy_full' if not horiz_avg else 'buoyancy_full_each_t'
        local_vars[var_name] = buoyancy

    return {
        'buoyancy_cm1': buoyancy_cm1,
        **local_vars  # Unpack local_vars into the return dict
    }

In [None]:
def GetOutputName():
    if res=='1km':
        dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
    elif res=='250m':
        dir2='/mnt/lustre/koa/scratch/air673/'
    out_file = dir2 + 'Variable_Calculation/OUTPUT/' + f'Buoyancy_{res}_{t_res}.h5'
    return out_file
    
def initiate_array(VarNames):
    # Define array dimensions (adjust based on your data)
    t_size = len(data['time'])  # Number of timesteps
    z_size = len(data['zh'])    # Number of vertical levels
    y_size = len(data['yh'])    # Number of y-axis points
    x_size = len(data['xh'])    # Number of x-axis points

    out_file=GetOutputName()

    with h5py.File(out_file, 'a') as f:
        for var_name in VarNames:
            if var_name not in f:
                f.create_dataset(
                    var_name,
                    shape=(t_size, z_size, y_size, x_size),
                    maxshape=(None, z_size, y_size, x_size),
                    dtype='float64',
                    chunks=(1, z_size, y_size, x_size)
                )

def add_timestep_at_index(VARS, index, ):
    out_file=GetOutputName()
    
    with h5py.File(out_file, 'a') as f:
        for var_name, timestep_data in VARS.items():
            if var_name in f:
                f[var_name][index] = timestep_data
            else:
                raise KeyError(f"Dataset '{var_name}' does not exist in {out_file}")

In [None]:
#RUNNING

In [None]:
#MAKING ARRAY TO STORE BUOYANCY
VarNames=['buoyancy_cm1','buoyancy_full','buoyancy_full_each_t']
initiate_array(VarNames)

#CALCULATING AND APPENDING TO DATA EACH TIMESTEP
for t in range(len(data['time'])):
    if np.mod(t,1)==0: print(f'Current time {t}')
    data_t=data.isel(time=t)
    
    VARS = GetTrueBuoyancy(data, data_t)
    add_timestep_at_index(VARS, t)



In [None]:
##########################################################################
# #READING BACK IN

In [None]:
# t=100
# approximation=True
# # approximation=False
# in_file=GetOutputName(approximation)
# #READING FINAL OUTPUT
# dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
# # dir2='/mnt/lustre/koa/scratch/air673/'
# with h5py.File(in_file, 'a') as f:
#     # Access the existing dataset 'MSE'
#     th_term = f['th_term'][t]
#     rv_term = f['rv_term'][t]
#     rl_term = f['rl_term'][t]

In [None]:
##########################################################################
# #TESTING

In [None]:
# def CalculateInitial(data, varname):
#     if varname == 'th_v':
#         data_t = data.isel(time=0)
#         var = MakeTheta_V(data_t)[varname]
#         # init_array = var[:, 0, 0]
#         init_array = np.mean(var, axis=(1, 2))
#     else:
#         # init_array = data[varname].isel(time=0, yh=0, xh=0).data
#         init_array = data[varname].isel(time=0).mean(dim=("yh", "xh")).data
#     return init_array
# def MakeTheta_V(data_t):
    
#     #LOAD VARIABLES
#     ################################################################################
#     def load_vars(data_t):
#         # print('MOISTURE VARIABLES'); ################################# MOISTURE VARIABLES
#         rv=data_t['qv'].data;rl=data_t['qc'].data+data_t['qr'].data;
#         ri=data_t['qi'].data
        
#         # print('THERMODYNAMICS'); ################################# THERMODYNAMICS
#         Rd=287.04
#         Rv=461.5
#         eps = Rd / Rv
        
#         # print('POTENTIAL TEMPERATURE'); ################################# POTENTIAL TEMPERATURE
#         th=data_t['th'].data
    
#         return th,rv,rl,ri,eps
    
#     def make_theta_v(th,rv,rl,ri,eps):
#         # print('VIRTUAL POTENTIAL TEMPERATURE'); ################################# VIRTUAL POTENTIAL TEMPERATURE
#         th_v = (th*(1+rv/eps))/(1+rv+rl+ri)
    
#         # print('TEMPERATURE'); ################################# TEMPERATURE
#         # T=theta*(P/p0)**(Rd/Cpd)
        
#         # print('VIRTUAL TEMPERATURE'); ################################# VIRTUAL TEMPERATURE
#         # eps=Rd/Rv
#         # rv=data['qv'].data
#         # scalar=(1+rv/eps)/(1+rv+rl+ri)
#         # Tv=T*scalar
#         VARs={'th_v': th_v}
#         return VARs
#     [th,rv,rl,ri,eps]=load_vars(data_t)
#     VARS=make_theta_v(th,rv,rl,ri,eps)
#     return VARS

# def PerturbationOverMean(varname, t, z):
#     # Select data at time t and vertical level z
#     data_tz = data.isel(time=t, zh=z)
    
#     if varname == 'th_v':
#         var = MakeTheta_V(data_tz)['th_v']  # Should return DataArray at t,z with spatial dims
#     else: 
#         var = data_tz[varname]  # Access from selected subset
    
#     # Calculate mean over horizontal dims at level z and time t
#     # Assuming CalculateInitial returns mean profile array indexed by z
#     var_mean = CalculateInitial(data, varname)[z]  
    
#     # Broadcast mean to var shape for subtraction
#     # If var is 2D (yh,xh), var_mean is scalar — this works automatically in xarray/numpy
    
#     var_prime = var - var_mean
    
#     # Return perturbation normalized by mean, same shape as var
#     return var_prime / var_mean


# def ModelBuoyancy(t, z):
#     # Return buoyancy at time t and level z
#     B = data['buoyancy'].isel(time=t, zh=z)
    
#     # Return as numpy array if you prefer
#     return B.data


In [None]:
# #CALCULATING PRIMEBARs
# t=100
# z=15
# rho_primebar=PerturbationOverMean('rho',t,z).data
# prs_primebar=PerturbationOverMean('prs',t,z).data
# th_v_primebar=PerturbationOverMean('th_v',t,z)

In [None]:
# #CALCULATING BUOYANCYs
# g=9.81;
# Rd=287.04;Cpd=1005.7; k=Rd/Cpd
# B_full=-g*rho_primebar
# B_thv=g*th_v_primebar

# B_prs=-g*(1-k)*prs_primebar
# B_prs_plus_thv=B_thv+B_prs

# B_model=ModelBuoyancy(t,z)

In [None]:
# import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec

# fig = plt.figure(figsize=(8, 6))

# # Create a 2x2 GridSpec layout
# gs = gridspec.GridSpec(2, 2)

# # Create subplots using grid positions
# ax1 = fig.add_subplot(gs[0, 0])  # Row 0, Col 0
# ax2 = fig.add_subplot(gs[0, 1])  # Row 0, Col 1
# ax3 = fig.add_subplot(gs[1, 0])  # Row 1, Col 0
# ax4 = fig.add_subplot(gs[1, 1])  # Row 1, Col 1

# c1=ax1.contourf(B_full);cb1=fig.colorbar(c1, ax=ax1);ax1.set_title('Full Buoyancy')
# c2=ax2.contourf(B_thv);cb2=fig.colorbar(c2, ax=ax2);ax2.set_title('th_v Buoyancy')
# c3=ax3.contourf(B_prs_plus_thv);cb3=fig.colorbar(c3, ax=ax3);ax3.set_title('th_v+prs_pert Buoyancy')
# c4=ax4.contourf(B_model);cb4=fig.colorbar(c4, ax=ax4);ax4.set_title('Model Buoyancy')

# fig.tight_layout()

# # apply_scientific_notation_colorbar([cb3])

In [None]:
# import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec

# fig = plt.figure(figsize=(10, 10))
# gs = gridspec.GridSpec(3, 2)

# # Define axes
# ax1 = fig.add_subplot(gs[0, 0])
# ax2 = fig.add_subplot(gs[0, 1])
# ax6 = fig.add_subplot(gs[1, 0])  # moved up to row 1
# ax3 = fig.add_subplot(gs[1, 1])
# ax4 = fig.add_subplot(gs[2, 0])
# ax5 = fig.add_subplot(gs[2, 1])

# # Now plot with contourf and colorbars
# c1 = ax1.contourf(B_full - B_model); cb1 = fig.colorbar(c1, ax=ax1); ax1.set_title('Full - Model Buoyancy')
# c2 = ax2.contourf(B_full - B_thv); cb2 = fig.colorbar(c2, ax=ax2); ax2.set_title('Full - TH_V Buoyancy')
# c6 = ax6.contourf(B_full - B_prs_plus_thv); cb6 = fig.colorbar(c6, ax=ax6); ax6.set_title('Full - TH_V+PRS Buoyancy')
# c3 = ax3.contourf(B_thv - B_model); cb3 = fig.colorbar(c3, ax=ax3); ax3.set_title('TH_V - Model Buoyancy')
# c4 = ax4.contourf(B_thv - B_model); cb4 = fig.colorbar(c4, ax=ax4); ax4.set_title('TH_V - TH_V+PRS BUOYANCY')
# c5 = ax5.contourf(B_prs_plus_thv - B_model); cb5 = fig.colorbar(c5, ax=ax5); ax5.set_title('TH_V+PRS - MODEL BUOYANCY')

# fig.tight_layout()
# plt.show()


In [None]:
# import numpy as np

# # Assuming the following variables are already defined and loaded:
# # B_full, B_model: buoyancy arrays
# # rho_primebar, prs_primebar, th_v_primebar: perturbation/bar terms arrays
# # g: gravitational acceleration (≈9.81 m/s²)
# # k: kappa (Rd/cp, ~0.286)

# # Calculate buoyancy difference
# diff = (B_full - B_model)

# # Find indices where error is between 0.08 and 0.1
# cond1 = diff <= 0.1
# cond2 = diff >= 0.08
# where = np.where(cond1 & cond2)

# print('Looking at cases where buoyancy error is in [0.08, 0.1]:\n')

# print('Full - Model Buoyancy Error at selected points:')
# print(diff[where])
# print('\nPerturbation / base state terms at those points:')
# print('rho_primebar:', rho_primebar[where])
# print('prs_primebar:', prs_primebar[where])
# print('th_v_primebar:', th_v_primebar[where])

# print('\nSecond-order Taylor error terms:')

# err_p = g * (1 - k) * (prs_primebar[where])**2 / 2
# err_thv = g * (th_v_primebar[where])**2 / 2

# print('Pressure second-order error terms:')
# print(err_p)
# print('Virtual potential temperature second-order error terms:')
# print(err_thv)

# print('\nCombined second-order Taylor error approximation:')
# combined_error = err_thv - err_p
# print(combined_error)

# # print('\nResidual (Actual buoyancy difference - Taylor approximation):')
# # residual = diff[where] - combined_error
# # print(residual)
