In [None]:
#Loading in Packages and Data

#Importing Packages
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr
import os; import time
import pickle
import h5py
###############################################################
def coefs(coefficients,degree):
    coef=coefficients
    coefs=""
    for n in range(degree, -1, -1):
        string=f"({coefficients[len(coef)-(n+1)]:.1e})"
        coefs+=string + f"x^{n}"
        if n != 0:
            coefs+=" + "
    return coefs
###############################################################
start_time = time.time();

#Importing Model Data
check=False
dir='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
job_array=False;index_adjust=0
ocean_fraction=2/8

# dx = 1 km; Np = 1M; Nt = 5 min
data1=xr.open_dataset(dir+'../cm1r20.3/run/cm1out_1km_5min.nc', decode_timedelta=True) #***
parcel1=xr.open_dataset(dir+'../cm1r20.3/run/cm1out_pdata_1km_5min_1e6.nc', decode_timedelta=True) #***
res='1km';t_res='5min'
Np_str='1e6'

# # dx = 1km; Np = 50M
# #Importing Model Data
# dir2='/home/air673/koa_scratch/'
# data1=xr.open_dataset(dir2+'cm1out_1km_1min.nc', decode_timedelta=True) #***
# parcel1=xr.open_dataset(dir2+'cm1out_pdata_1km_1min_50M.nc', decode_timedelta=True) #***
# res='1km'; t_res='1min'; Np_str='50e6'

# # dx = 1km; Np = 50M; Nz = 95
# #Importing Model Data
# dir2='/home/air673/koa_scratch/'
# data1=xr.open_dataset(dir2+'cm1out_1km_1min_95nz.nc', decode_timedelta=True) #***
# parcel1=xr.open_dataset(dir2+'cm1out_pdata_1km_1min_95nz.nc', decode_timedelta=True) #***
# res='1km'; t_res='1min_95nz'; Np_str='50e6'

# # dx = 250m; Np = 50M
# #Importing Model Data
# dir2='/home/air673/koa_scratch/'
# data1=xr.open_dataset(dir2+'cm1out_250m_1min_50M.nc', decode_timedelta=True) #***
# parcel1=xr.open_dataset(dir2+'cm1out_pdata_250m_1min_50M.nc', decode_timedelta=True) #***
# res='250m'; t_res='1min'; Np_str='50e6'

In [None]:
############################################################################################
#MODEL AND ALGORITHM NUMERICAL PARAMETERS
times=data1['time'].values/(1e9 * 60); times=times.astype(float);
minutes=1/times[1] #1 / minutes per timestep = timesteps per minute
kms=np.argmax(data1['xh'].values-data1['xh'][0].values >= 1) #finds how many x grids is 1 km

In [None]:
#JOB ARRAY SETUP
def StartJobArray(num_jobs):
    total_elements=len(data1['time']) #total num of variables
    
    if num_jobs >= total_elements:
        raise ValueError("Number of jobs cannot be greater than or equal to total elements.")
    
    job_range = total_elements // num_jobs  # Base size for each chunk
    remaining = total_elements % num_jobs   # Number of chunks with 1 extra 
    
    # Function to compute the start and end for each job_id
    def get_job_range(job_id):
        job_id-=1
        # Add one extra element to the first 'remaining' chunks
        start_job = job_id * job_range + min(job_id, remaining)
        end_job = start_job + job_range + (1 if job_id < remaining else 0)
    
        if job_id == num_jobs - 1: 
            end_job = total_elements #- 1
        return start_job, end_job
    # def job_testing():
    #     #TESTING
    #     start=[];end=[]
    #     for job_id in range(1,num_jobs+1):
    #         start_job, end_job = get_job_range(job_id)
    #         print(start_job,end_job)
    #         start.append(start_job)
    #         end.append(end_job)
    #     print(np.all(start!=end))
    #     print(len(np.unique(start))==len(start))
    #     print(len(np.unique(end))==len(end))
    # job_testing()
    
    job_id = int(os.environ.get('SLURM_ARRAY_TASK_ID', 0)) #this is the current SBATCH job id
    if job_id==0: job_id=1
    start_job, end_job = get_job_range(job_id)
    index_adjust=start_job
    # print(f'start_job = {start_job}, end_job = {end_job}')
    return start_job,end_job,index_adjust,job_id

In [None]:
###################################################################################################################################
#GENERAL FUNCTIONS

In [None]:
#Get Data Functions
def get_2dtime_data(data,varname,tlev,zlev):
    cloud_var=data[varname].isel(time=tlev,zh=zlev).values
    return cloud_var
def get_3dtime_data(data,varname,tlev):
    cloud_var=data[varname].isel(time=tlev).values
    return cloud_var

def get_conv1(t,z):
    import h5py
    # print('calculating convergence and taking mean')
    if res=='1km':
        dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
    elif res=='250m':
        dir2='/home/air673/koa_scratch/'
    file_path = dir2 + 'Variable_Calculation/OUTPUT/' + 'Convergence' + f'_{res}_{t_res}.h5'
    with h5py.File(file_path, 'r') as f:
        Conv = f['conv'][t+index_adjust,z] #*#*#* For JobArray
    return Conv

def get_conv2(data,t):
    Nz=len(data['zh'])
    import h5py
    # print('calculating convergence and taking mean')
    if res=='1km':
        dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
    elif res=='250m':
        dir2='/home/air673/koa_scratch/'
    file_path = dir2 + 'Variable_Calculation/OUTPUT/' + 'Convergence' + f'_{res}_{t_res}.h5'
    with h5py.File(file_path, 'r') as f:
        Conv = f['conv'][t+index_adjust,0:Nz] #*#*#* For JobArray
    return Conv

In [None]:
###################################################################################################################################
#ALGORITHM FUNCTIONS

In [None]:
#Function for taking x and y derivatives (Gradient)
def cd2d(f,dx,dy): #size not compatible, cant calculate adjacent gradient
    ddx = (
            f[:,:, 1:  ]
            -
            f[:,:, 0:-1]
        ) / (
        2 * dx
    )
    
    ddy = (
        f[:,1:, :]
        -
        f[:,0:-1, :]
    ) / (
        2 * dy
    )
    
    return ddx, ddy

In [None]:
# def find_SBZ_xmaxs():

    
#     # Define the vertical level you are interested in
#     if res=='1km':
#         zlev = 4 #534m
#     elif res=='250m':
#         zlev= 10 #525m
    
#     # Initialize a list to store the xmaxs for each time step
#     xmaxs_list = []

#     # Loop over each time step (axis=0 corresponds to time)
#     for t in range(len(data1['time'])):  # conv_dataset.shape[0] is the time dimension size
#         if t % 60 ==0: print(f"current time {t}")
#         # Read the relevant slice for this time step and vertical level
#         Conv_t_zlev = get_conv1(t, zlev)  # Shape should be (y_size, x_size)
        
#         # Calculate the mean across the y-axis
#         Conv_ymean = np.mean(Conv_t_zlev, axis=0)  # Mean across the y-axis
        
#         # Find the index of the maximum value along the x-axis
#         xmax = np.argmax(Conv_ymean)
        
#         # Append the result for this time step
#         xmaxs_list.append(xmax)
    
#     # Convert the list of xmaxs to a numpy array (optional)
#     xmaxs = np.array(xmaxs_list)

#     return xmaxs #returns SBZ x location for each timestep
# SBZ_MAXS=find_SBZ_xmaxs()
# print('done')
# print(SBZ_MAXS)

In [None]:
#Finds all local maximums (from Calculus) along each y level for a specific z level (~0.28km in this case)
def find_local_maxes(conv_z,t,yind,conv_thresh,ONLY_SBZ):
    xf=data['xf'].values
    dx=np.round(data['xf'][1]-data['xf'][0],2).item() #grid resolution (in km) (can also be set to meters, since this function only finds the inflection points)

    #indexes convergence in y
    yconv=conv_z[yind,:]
    
    #takes dconv/dx
    f=yconv
    ddx = (
            f[1:  ]
            -
            f[0:-1]
        ) / (
        2 * dx
    )

    ########################################################
    #RUNNING
    ########################################################
    
    #finds local max where dconv/dx sign changes
    signs = np.sign(ddx)
    signs_diff=np.diff(signs)
    local_maxes=np.where((signs_diff != 0) & (signs_diff < 0))[0]+1 #make sure +1 is here (it corrects the location of the derivative)
    local_maxes=local_maxes[np.where(yconv[local_maxes]>conv_thresh)] #check if convergence is greater than convergence threshold (1s-1)
    local_maxes=local_maxes[(local_maxes>50*kms)&(local_maxes<len(xf)-50*kms)] #removes maxes that are with 50 km of y boundary
    # local_maxes=local_maxes[local_maxes>int(len(xf)*ocean_fraction)] #restricts to right land side
    # if ONLY_SBZ==True:
    #     local_maxes=local_maxes[(local_maxes>=SBZ_MAXS[t-index_adjust]-10*kms)&(local_maxes<=SBZ_MAXS[t]+10*kms)] #removes maxes that are with 50 km of y boundary

    # ################################################################################
    # #second round maxes (not 100% necessary, only if missing many convergence maximums that are visually there)
    # yconv2=yconv.copy()
    # yconv2[local_maxes]=0
    # #takes dconv/dx
    # f=yconv2
    # ddx = (
    #         f[1:  ]
    #         -
    #         f[0:-1]
    #     ) / (
    #     2 * dx
    # )
    # signs = np.sign(ddx)
    # signs_diff=np.diff(signs)
    # local_maxes2=np.where((signs_diff != 0) & (signs_diff < 0))[0]+1 #make sure +1 is here
    # local_maxes2=local_maxes2[np.where(yconv2[local_maxes2]>conv_thresh)] #remove local maxes less than zero
    # local_maxes2=local_maxes2[(local_maxes2>50*kms)&(local_maxes2<len(xf)-50*kms)] #removes maxes that are with 50 km of y boundary
    # local_maxes2=local_maxes2[local_maxes2>int(len(xf)/2)] #restricts to right land side
    # local_maxes=np.concatenate((local_maxes,local_maxes2))
    # ################################################################################
    return ddx,local_maxes

In [None]:
###################################################################################################################################
#Calculation Run

In [None]:
#Find_Local_Maxes Function
#(1) At a single time and z level, runs through each y-level
#(2) At each y-level, takes the x-derivative
#(3) Take sign(x_derivative)
#(4) Take diff(x_derivative)
#(5) Max is located one index to the right of where derivative changes from positive to negative or diff is +1
#[(6) Optional: the algorithm can run a second time over the leftover maxes after removing previous maxes from temporary variable]

In [None]:
#SBZ Convergence Line Search Algorithm (levels are seperate) (python version 3.10.9) (All Max Algorithm)
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr; import time as time

def layermax(data,t,ONLY_SBZ): #finds max convergence along y for multiple z location (5 is good)
    Nz=len(data['zh'])
    
    #making data to fill
    maxconv_x=np.full((Nz,len(data['yh']),len(data['xh'])), -1, dtype=np.int16)
    #RUNNING AGAIN FOR ALL LEVELS
    for zlev in range(0,Nz):
        #Taking Convergence of current timesftep
        conv=get_conv2(data,t)
        conv_z=conv[zlev,:,:] #current z level for convergence

        for yind in range(0,len(data['yh'])): #plot maximums for each row
            #setting convergence threshold
            if res=='1km':
                conv_thresh=1.0/1000
            elif res=="250m":
                conv_thresh=1.5/1000

            #finds all local maxes
            [ddx,local_maxes]=find_local_maxes(conv_z,t,yind,conv_thresh,ONLY_SBZ) #convergence threshold (in 1/s)
            
            #storing data
            maxconv_x[zlev,yind,local_maxes] = local_maxes
    return maxconv_x

In [None]:
def GetOutputName(job_id):
    base_folder = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/'
    if job_array:
        base_folder += 'CL_Tracking_Out/'
    
    subfolder = 'ALL_CLS/' if not ONLY_SBZ else 'ONLY_SBZS/'
    
    out_folder = base_folder + subfolder

    fname = f'whereCL_{res}_{t_res}_'
    fname += 'ONLY_SBZS' if ONLY_SBZ else 'ALL_CLS'
    
    fname += f'_{job_id}'
    fname += '.h5'

    full_path = out_folder + fname
    return full_path


def initiate_array(VarNames, data, job_id):
    t_size = len(data['time'])
    z_size = len(data['zh'])
    y_size = len(data['yh'])
    x_size = len(data['xh'])

    out_file = GetOutputName(job_id)

    with h5py.File(out_file, 'a') as f:
        for var_name in VarNames:
            if var_name not in f:
                f.create_dataset(
                    var_name,
                    shape=(t_size, z_size, y_size, x_size),
                    maxshape=(None, z_size, y_size, x_size),
                    dtype='float64',
                    chunks=(1, z_size, y_size, x_size)
                )
    return out_file

In [None]:
#RUNNING ALGORITHM
ONLY_SBZ=False
# ONLY_SBZ=True
def SubsetZ(data):
    # Find the last index where zh <= 0.775 and add 1 to get count
    num_zlevs = np.where(data['zh'].data <= 0.775)[0][-1] + 1  
    # Select vertical levels from 0 up to num_zlevs (not including num_zlevs)
    out = data.isel(zh=slice(0, num_zlevs))
    return out
def RunAlgorithm(data, job_id):
    out_file = initiate_array(['maxconv_x'], data, job_id)

    with h5py.File(out_file, 'a') as f:
        for t in range(len(data['time'])):
            if t % 1 == 0: print(f'Processing timestep {t}/{len(data["time"])}')

            # Compute maxconv_x for this timestep (z,y,x)
            maxconv_x = layermax(data, t, ONLY_SBZ)

            # Directly write into HDF5 dataset at index t
            SaveData(f, maxconv_x, 'maxconv_x', t)

    print(f'Data saved to {out_file}')
    return out_file


#SAVING DATA
def SaveData(h5file, data_array, var_name, t):
    h5file[var_name][t, :, :, :] = data_array

In [None]:
#JOB_ARRAY SETUP
########################################
# job_array=False
job_array=True
if res=='1km':
    num_jobs=60
elif res=='250m':
    num_jobs=150
########################################

In [None]:
#############################################
#RUNNING

In [None]:
start_time=time.time()

if job_array==False:
    start_job=0;end_job=len(data1['time']);index_adjust=0;job_id=0
    data=data1.copy()
if job_array==True:
    [start_job,end_job,index_adjust,job_id]=StartJobArray(num_jobs=num_jobs)
    print(f"job_id = {job_id} ==> Running for t = [{start_job},{end_job}]")
    data=data1.isel(time=slice(start_job,end_job))

data=SubsetZ(data)
output=RunAlgorithm(data, job_id)
end_time = time.time(); elapsed_time = end_time - start_time; print(f"Total Elapsed Time: {elapsed_time} seconds")

In [None]:
#RECOMBINING
recombine=False
recombine=True

In [None]:
def RecombineDask(ONLY_SBZ, num_jobs):
    import xarray as xr
    from dask.diagnostics import ProgressBar

    base_path = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/CL_Tracking_Out/'
    base_path += 'ALL_CLS/' if not ONLY_SBZ else 'ONLY_SBZS/'
    varname = 'ALL_CLS' if not ONLY_SBZ else 'ONLY_SBZS'

    filepaths = [f'{base_path}whereCL_{res}_{t_res}_{varname}_{job_id}.h5'
                 for job_id in range(1, num_jobs + 1)]
    
    # print(f'Combining files:\n' + "\n".join(filepaths))

    ds = xr.open_mfdataset(
        filepaths,
        concat_dim='phony_dim_0',  # phony_dim_0 is first dimension = time
        combine='nested',
        parallel=True,
        engine='h5netcdf',
        phony_dims='access'
    )

    # Optional: rename phony dims to meaningful ones
    ds = ds.rename({'phony_dim_0': 'time', 'phony_dim_1': 'z', 'phony_dim_2': 'y', 'phony_dim_3': 'x'})

    out_path = f'{base_path}../whereCL_{res}_{t_res}_{varname}.h5'
    print(f"Saving combined dataset to: {out_path}")

    with ProgressBar():
        ds.to_netcdf(out_path, engine='netcdf4', compute=True)




In [None]:
if recombine == True:
    RecombineDask(ONLY_SBZ, num_jobs=num_jobs)

In [None]:
# #################################
# # READING BACK IN

# import xarray as xr

# # Define the path to your output file
# ONLY_SBZ = False  # or True
# varname = 'ONLY_SBZS' if ONLY_SBZ else 'ALL_CLS'

# # File path
# file_path = f'/mnt/lustre/koa/koastore/torri_group/air_directory/' \
#             f'DCI-Project/Project_Algorithms/Tracking_Algorithms/' \
#             f'CL_Tracking_Out/' \
#             f'whereCL_{res}_{t_res}_{varname}.h5'

# # Open dataset (as it's valid NetCDF)
# ds = xr.open_dataset(file_path, engine='netcdf4')['maxconv_x']

In [None]:
# #TESTING
#################################

In [None]:
# #SIMPLE PLOT

# out=ds.isel(time=100,z=3).data
# plt.contourf(out)

In [None]:
# #TESTING COMPARING TO VERSION2
# ######
# load_dir = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/CL_Tracking_Out/'
# open_name = load_dir+f'whereCL_{res}_{t_res}_ALL_CLS_50.nc'
# hey=xr.open_dataset(open_name)['maxconv_x']
# print(np.where(hey!=-1))
# hey.isel(time=0,z=7).plot()

# ######
# #TESTING COMPARING TO VERSION2
# load_dir = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/'
# open_name = load_dir+f'whereCL_{res}_{t_res}_ALL_CLS.nc'
# out=xr.open_dataset(open_name)['maxconv_x']
# print(np.all(hey==out))

In [None]:
# #TESTING COMPARING TO VERSION2 (FULL DATA)
# ######
# #TESTING COMPARING TO VERSION2
# load_dir = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/'
# open_name = load_dir+f'whereCL_{res}_{t_res}_ALL_CLS.nc'
# out1=xr.open_dataset(open_name)['maxconv_x']
# ######
# #TESTING COMPARING TO VERSION2
# load_dir = '/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/Project_Algorithms/Tracking_Algorithms/CL_Tracking_Out/'
# open_name = load_dir+f'whereCL_{res}_{t_res}_ALL_CLS.nc'
# out2=xr.open_dataset(open_name)['maxconv_x']

# #####
# t=10
# for t in np.arange(1,661,5):
#     print(np.all(out1[t]==out2[t]).data)