In [None]:
#Loading in Packages and Data

#Importing Packages
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib.colors import Normalize
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import ScalarFormatter
import matplotlib.gridspec as gridspec
import xarray as xr
import os; import time
import pickle
import h5py
###############################################################
def coefs(coefficients,degree):
    coef=coefficients
    coefs=""
    for n in range(degree, -1, -1):
        string=f"({coefficients[len(coef)-(n+1)]:.1e})"
        coefs+=string + f"x^{n}"
        if n != 0:
            coefs+=" + "
    return coefs
###############################################################

#Importing Model Data
check=False
dir='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'

# dx = 1 km; Np = 1M; Nt = 5 min
data=xr.open_dataset(dir+'../cm1r20.3/run/cm1out_1km_1e6.nc', decode_timedelta=True) #***
parcel=xr.open_dataset(dir+'../cm1r20.3/run/cm1out_pdata_1km_1e6.nc', decode_timedelta=True) #***
res='1km'
Np_str='1e6'

# dx = 1km; Np = 50M
#Importing Model Data
check=False
dir2='/home/air673/koa_scratch/'
data=xr.open_dataset(dir2+'cm1out_1km_1min.nc', decode_timedelta=True) #***
parcel=xr.open_dataset(dir2+'cm1out_pdata_1km_1min_50M.nc', decode_timedelta=True) #***
res='1km'; t_res='1min'; Np_str='50e6'

# # dx = 1km; Np = 100M
# #Importing Model Data
# check=False
# dir2='/home/air673/koa_scratch/'
# data=xr.open_dataset(dir2+'cm1out_1km_1min.nc', decode_timedelta=True) #***
# parcel=xr.open_dataset(dir2+'cm1out_pdata_1km_1min_100M.nc', decode_timedelta=True) #***
# res='1km'; t_res='1min'; Np_str='100e6'


# dx = 250 m
# #Importing Model Data
# check=False
# dir2='/home/air673/koa_scratch/'
# data=xr.open_dataset(dir2+'cm1out_250m.nc', decode_timedelta=True) #***
# parcel=xr.open_dataset(dir2+'cm1out_pdata_250m.nc', decode_timedelta=True) #***

In [None]:
def check_memory():
    import sys
    ipython_vars = ["In", "Out", "exit", "quit", "get_ipython", "ipython_vars"]
    print("Top 10 objects with highest memory usage")
    # Get a sorted list of the objects and their sizes
    mem = {
        key: round(value/1e6,2)
        for key, value in sorted(
            [
                (x, sys.getsizeof(globals().get(x)))
                for x in globals()
                if not x.startswith("_") and x not in sys.modules and x not in ipython_vars
            ],
            key=lambda x: x[1],
            reverse=True)[:10]
    }
    print({key:f"{value} MB" for key,value in mem.items()})
    print(f"\n{round(sum(mem.values()),2)/1000} GB in use overall")

In [None]:
import sys
dir2='/mnt/lustre/koa/koastore/torri_group/air_directory/DCI-Project/'
path=dir2+'../Functions/'
sys.path.append(path)

import NumericalFunctions
from NumericalFunctions import * # import NumericalFunctions 
import PlottingFunctions
from PlottingFunctions import * # import PlottingFunctions


# # Get all functions in NumericalFunctions
# import inspect
# functions = [f[0] for f in inspect.getmembers(NumericalFunctions, inspect.isfunction)]
# functions

In [None]:
#JOB ARRAY SETUP
job_array=True
if job_array==True:

    num_jobs=60 #how many total jobs are being run? i.e. array=1-100 ==> num_jobs=100 #***
    total_elements=len(parcel['xh']) #total num of variables

    if num_jobs >= total_elements:
        raise ValueError("Number of jobs cannot be greater than or equal to total elements.")
    
    job_range = total_elements // num_jobs  # Base size for each chunk
    remaining = total_elements % num_jobs   # Number of chunks with 1 extra 
    
    # Function to compute the start and end for each job_id
    def get_job_range(job_id, num_jobs):
        job_id-=1
        # Add one extra element to the first 'remaining' chunks
        start_job = job_id * job_range + min(job_id, remaining)
        end_job = start_job + job_range + (1 if job_id < remaining else 0)
    
        if job_id == num_jobs - 1: 
            end_job = total_elements #- 1
        return start_job, end_job
    # def job_testing():
    #     #TESTING
    #     start=[];end=[]
    #     for job_id in range(1,num_jobs+1):
    #         start_job, end_job = get_job_range(job_id)
    #         print(start_job,end_job)
    #         start.append(start_job)
    #         end.append(end_job)
    #     print(np.all(start!=end))
    #     print(len(np.unique(start))==len(start))
    #     print(len(np.unique(end))==len(end))
    # job_testing()
    
    job_id = int(os.environ.get('SLURM_ARRAY_TASK_ID', 0)) #this is the current SBATCH job id
    if job_id==0: job_id=1
    start_job, end_job = get_job_range(job_id, num_jobs)
    index_adjust=start_job
    print(f'start_job = {start_job}, end_job = {end_job}')

In [None]:
#Indexing Array with JobArray
parcel=parcel.isel(xh=slice(start_job,end_job))
#(for 150_000_000 parcels use 500-1000 jobs)

In [None]:
#RESIDENCE CODE
########################################################################################################################

In [None]:
# Reading Back Data Later
##############
def make_data_dict(in_file,var_names,read_type):
    if read_type=='h5py':
        with h5py.File(in_file, 'r') as f:
            data_dict = {var_name: f[var_name][:,start_job:end_job] for var_name in var_names}
            
    elif read_type=='xarray':
        in_data = xr.open_dataset(
            in_file,
            engine='h5netcdf',
            phony_dims='sort',
            chunks={'phony_dim_0': 100, 'phony_dim_1': 1_000_000} 
        )
        data_dict = {k: in_data[k][:,start_job:end_job].compute().data for k in var_names}
    return data_dict

# read_type='xarray'
read_type='h5py'

In [None]:
import h5py
dir2=dir+'Project_Algorithms/Lagrangian_Binary_Array/'
in_file=dir2+f'lagrangian_binary_array_{res}_{t_res}_{Np_str}.h5'

var_names = ['A_g', 'A_c', 'Z', 'Y', 'X']
data_dict = make_data_dict(in_file,var_names,read_type)
A_g, A_c, Z, Y, X = (data_dict[k] for k in var_names)

# #Making Time Matrix
# rows, cols = A.shape[0], A.shape[1]
# T = np.arange(rows).reshape(-1, 1) * np.ones((1, cols), dtype=int)
check_memory()

In [None]:
# #READING BACK IN
# dir2=dir+'Project_Algorithms/Entrainment/'
# in_file=dir2+f'processed_binary_arrays_{res}_{t_res}_{Np_str}.h5'

# var_names = ['A_g_Processed', 'A_c_Processed']
# data_dict = make_data_dict(in_file,var_names,read_type)
# A_g_Processed, A_c_Processed = (data_dict[k] for k in var_names)
# check_memory(globals())

In [None]:
#CALCULATING RESIDENCE TIMES
#############################

In [None]:
dir2=dir+'Project_Algorithms/Lagrangian_Binary_Array/'
def residence_times(p,type,updraft_type):

    if updraft_type=='general':
        A=A_g
    elif updraft_type=='cloudy':
        A=A_c
    
    B = A[:,p]

    # B=np.array([0,1,0,1,0,0,1,1,0,1,1]) #TESTING
    
    T=np.arange(len(B))
    
    if np.any(B)==True:
        if type=='e':
            C=B.copy()
        elif type=='d':
            C=1-B
    
        
        # Find the changes in the array
        changes = np.diff(np.concatenate(([0], C, [0])))  # Add 0s to detect edges
            
        start_ind = np.where(changes == 1)[0]  # Start of sequences
        end_ind = np.where(changes == -1)[0]  # End of sequences
        
        # Calculate the lengths of sequences
        lengths = end_ind - start_ind

        sequences = [(start) for start, end, length in zip(start_ind, end_ind, lengths) if length >= 1] #only records en/detrainment time
        # sequences = [(start, *range(start + 1, end+1)) for start, end, length in zip(start_ind, end_ind, lengths) if length >= 1]
        lens=[(end-start) for start, end, length in zip(start_ind, end_ind, lengths) if length >= 1] #residence times

        #Remove the last one to get rid of entrainments that reach end of simulation
        sequences=sequences[:-1];lens=lens[:-1]

        #Initial Entrainment/Detrainment Times
        ts=np.array(sequences.copy()) #only records en/detrainment time 
        # ts=np.array(tuple(item for seq in sequences for item in seq))

         #Finds Last Time Parcel is in Cloudy Updraft before Initial Entrainment
        last=[None]+[np.where(C[:ind + 1] == 1)[0][-2] for ind in ts[1:]]
        last_lens=ts[1:]-last[1:]; 
        last_lens=np.insert(last_lens, 0, -1e5) #if never in cloudy updraft add -1e5 for nan

        if np.any(ts):
            zs=Z[ts,p]
            ys=Y[ts,p]
            xs=X[ts,p]
            return [np.array(lens),ts,zs,ys,xs,last_lens]
        else:
            return []
    else:
        return []

p=1234; out=residence_times(p,type='e',updraft_type='cloudy')
out


In [None]:
#ENTRAINMENT/DETRAINMENT PROFILES
Nx=len(data['xh']);Ny=len(data['yh']);Nz=len(data['zh'])
yx_array=np.zeros((Ny,Nx));yx_counter=np.zeros_like(yx_array)
zx_array=np.zeros((Nz,Nx));zx_counter=np.zeros_like(zx_array)

Np=len(parcel['xh'])-1
for p in np.arange(Np): 
    if np.mod(p,4000)==0: print(p)
    out=residence_times(p,type='e',updraft_type='cloudy')
    

    if np.any(out):
        np.add.at(yx_array, (out[3], out[4]), out[0])  
        np.add.at(yx_counter, (out[3], out[4]), 1)

        np.add.at(zx_array, (out[2], out[4]), out[0])  # Add residence times to (x, z) positions
        np.add.at(zx_counter, (out[2], out[4]), 1)


# #Divide by Counts (MOVED TO JOB_ARRAY COMPILE STEP)
# mask = yx_counter != 0
# yx_array[mask]/=yx_counter[mask]
# mask = zx_counter != 0
# zx_array[mask]/=zx_counter[mask]

# #Convert to Minutes
# mins=((data['time'][1]-data['time'][0])/1e9/60).item()
# yx_array*=mins
# zx_array*=mins


#SAVING
dir2=dir+'Project_Algorithms/Entrainment/'
output_file = dir2+f'job_out/e_residence_time_arrays_{res}_{t_res}_{Np_str}_{job_id}.h5' 
with h5py.File(output_file, 'w') as f:
    f.create_dataset('yx_array', data=yx_array, compression="gzip")
    f.create_dataset('yx_counter', data=yx_counter, compression="gzip")
    f.create_dataset('zx_array', data=zx_array, compression="gzip")
    f.create_dataset('zx_counter', data=zx_counter, compression="gzip")

In [None]:
#ENTRAINMENT/DETRAINMENT PROFILES
Nx=len(data['xh']);Ny=len(data['yh']);Nz=len(data['zh'])
yx_array=np.zeros((Ny,Nx));yx_counter=np.zeros_like(yx_array)
zx_array=np.zeros((Nz,Nx));zx_counter=np.zeros_like(zx_array)

Np=len(parcel['xh'])-1
for p in np.arange(Np): 
    if np.mod(p,4000)==0: print(p)
    out=residence_times(p,type='d',updraft_type='cloudy')
    

    if np.any(out):
        np.add.at(yx_array, (out[3], out[4]), out[0])  
        np.add.at(yx_counter, (out[3], out[4]), 1)

        np.add.at(zx_array, (out[2], out[4]), out[0])  # Add residence times to (x, z) positions
        np.add.at(zx_counter, (out[2], out[4]), 1)


# #Divide by Counts (MOVED TO JOB_ARRAY COMPILE STEP)
# mask = yx_counter != 0
# yx_array[mask]/=yx_counter[mask]
# mask = zx_counter != 0
# zx_array[mask]/=zx_counter[mask]

# #Convert to Minutes
# mins=((data['time'][1]-data['time'][0])/1e9/60).item()
# yx_array*=mins
# zx_array*=mins


#SAVING
dir2=dir+'Project_Algorithms/Entrainment/'
output_file = dir2+f'job_out/d_residence_time_arrays_{res}_{t_res}_{Np_str}_{job_id}.h5' 
with h5py.File(output_file, 'w') as f:
    f.create_dataset('yx_array', data=yx_array, compression="gzip")
    f.create_dataset('yx_counter', data=yx_counter, compression="gzip")
    f.create_dataset('zx_array', data=zx_array, compression="gzip")
    f.create_dataset('zx_counter', data=zx_counter, compression="gzip")

In [None]:
#########################################
#RECOMBINE SEPERATE JOB_ARRAYS AFTER

In [None]:
dir2=dir+'Project_Algorithms/Entrainment/'
output_file = dir2+f'job_out/e_residence_time_arrays_{res}_{t_res}_{Np_str}.h5' 

Nz=len(data['zh'])
Ny=len(data['yh'])
Nx=len(data['xh'])
yx_array=np.zeros((Ny,Nx))
yx_counter=yx_array.copy()                  
zx_array=np.zeros((Nz,Nx))
zx_counter=zx_array.copy()                  

num_jobs=60
for job_id in np.arange(1,num_jobs+1):
    if np.mod(job_id,20)==0: print(f"{job_id}/{num_jobs}")
    input_file = dir2+f'job_out/e_residence_time_arrays_{res}_{t_res}_{Np_str}_{job_id}.h5' 
    with h5py.File(input_file,'r') as f:
        yx_array+=f['yx_array']
        yx_counter+=f['yx_counter']
        zx_array+=f['zx_array']
        zx_counter+=f['zx_counter']

######################################################
#Divide by Counts
print('dividing by counts')
mask = yx_counter != 0
yx_array[mask]/=yx_counter[mask]
mask = zx_counter != 0
zx_array[mask]/=zx_counter[mask]

#Convert to Minutes
mins=((data['time'][1]-data['time'][0])/1e9/60).item()
yx_array*=mins
zx_array*=mins
######################################################

#SAVING INTO FINAL FORM
print('saving')
with h5py.File(output_file, 'w') as f:
    f.create_dataset('yx_array', data=yx_array, compression="gzip")
    f.create_dataset('yx_counter', data=yx_counter, compression="gzip")
    f.create_dataset('zx_array', data=zx_array, compression="gzip")
    f.create_dataset('zx_counter', data=zx_counter, compression="gzip") 

In [None]:
dir2=dir+'Project_Algorithms/Entrainment/'
output_file = dir2+f'job_out/d_residence_time_arrays_{res}_{t_res}_{Np_str}.h5' 

Nz=len(data['zh'])
Ny=len(data['yh'])
Nx=len(data['xh'])
yx_array=np.zeros((Ny,Nx))
yx_counter=yx_array.copy()                  
zx_array=np.zeros((Nz,Nx))
zx_counter=zx_array.copy()                  

num_jobs=60
for job_id in np.arange(1,num_jobs+1):
    if np.mod(job_id,20)==0: print(f"{job_id}/{num_jobs}")
    input_file = dir2+f'job_out/d_residence_time_arrays_{res}_{t_res}_{Np_str}_{job_id}.h5' 
    with h5py.File(input_file,'r') as f:
        yx_array+=f['yx_array']
        yx_counter+=f['yx_counter']
        zx_array+=f['zx_array']
        zx_counter+=f['zx_counter']

######################################################
#Divide by Counts
print('dividing by counts')
mask = yx_counter != 0
yx_array[mask]/=yx_counter[mask]
mask = zx_counter != 0
zx_array[mask]/=zx_counter[mask]

#Convert to Minutes
mins=((data['time'][1]-data['time'][0])/1e9/60).item()
yx_array*=mins
zx_array*=mins
######################################################

#SAVING INTO FINAL FORM
print('saving')
with h5py.File(output_file, 'w') as f:
    f.create_dataset('yx_array', data=yx_array, compression="gzip")
    f.create_dataset('yx_counter', data=yx_counter, compression="gzip")
    f.create_dataset('zx_array', data=zx_array, compression="gzip")
    f.create_dataset('zx_counter', data=zx_counter, compression="gzip") 

In [None]:
########################
#PLOTTING

In [None]:
output_file = dir2+f'job_out/e_residence_time_arrays_{res}_{t_res}_{Np_str}.h5' 
with h5py.File(output_file, 'r') as f:
    yx_array = f['yx_array'][:]
    yx_counter = f['yx_counter'][:]
    zx_array = f['zx_array'][:]
    zx_counter = f['zx_counter'][:]

In [None]:
yx_array[yx_array==0]=np.nan
zx_array[zx_array==0]=np.nan

fig = plt.figure(figsize=(8*(512/34)/5, 8))
gs = gridspec.GridSpec(2, 1)  # 1 row, 2 columns
cmap='seismic'
levels1=15;levels2=15

# First subplot (yx_array contour)
ax1 = fig.add_subplot(gs[0, 0])  # First column
contour1 = ax1.contourf(yx_array,levels=levels1,cmap=cmap)
cbar1 = plt.colorbar(contour1, ax=ax1);cbar1.set_label("mins / parcel")
ax1.set_title("XY Entrainment Residence Time (Plotted by Entrainment Time)")

# Second subplot (zx_array contour)
ax2 = fig.add_subplot(gs[1, 0])  # Second column
contour2 = ax2.contourf(zx_array, levels=levels2, cmap=cmap)
cbar2 = plt.colorbar(contour2, ax=ax2);cbar2.set_label("mins / parcel")
ax2.set_title("XZ Entrainment Residence Time (Plotted by Entrainment Time)")


#COASTLINE
ocean_fraction=2/8
ax1.axvline(yx_array.shape[1]*ocean_fraction,color='green',linewidth=3)
ax2.axvline(yx_array.shape[1]*ocean_fraction,color='green',linewidth=3)

#THICKEN COLOR LINES

for edge in cbar1.ax.collections:  # Loop over individual elements in each list
    edge.set_linewidth(10)
for edge in cbar2.ax.collections:  # Loop over individual elements in each list
    edge.set_linewidth(8)



In [None]:
# currently i look for runs of being in cloudy updraft that are at least 5 minutes (1 timestep). i add the total length of that run at the location of initial entrainment/detrainment. then i average to get vertical profile. 

# For entrainment, particles should stay in a cloud for 30/60 minutes
# I’m very surprised by a) the extremely low values for entrainment time and b) the extremely high values for detainment times
# another thing: this is only cloudy updrafts, not clouds as well. is that 30/60 minutes true for the “cloud updrafts”?
# for detrainment then, there are some parcels that don’t interact with any “cloudy updrafts” for a very long time.
# also, if you look at the contour plot we do have higher values than the vertical profile 

plt.plot(np.nanmean(zx_array[:,:],axis=(1)),data['zh'],label='everywhere')
plt.ylabel('z (km)');plt.xlabel('residence time (mins)')


# plt.plot(np.nanmean(zx_array[:,int(512/2):512],axis=(1)),data['zh'],label='over land')
# plt.ylabel('z (km)');plt.xlabel('preconditioning time (mins)')

# plt.plot(np.nanmean(zx_array[:,0:int(512/2)],axis=(1)),data['zh'],label='over ocean')
# plt.ylabel('z (km)');plt.xlabel('preconditioning time (mins)')

plt.ylim(top=20)
plt.title('preconditioning time')
plt.legend()

In [None]:
output_file = dir2+f'job_out/d_residence_time_arrays_{res}_{t_res}_{Np_str}.h5' 
with h5py.File(output_file, 'r') as f:
    yx_array = f['yx_array'][:]
    yx_counter = f['yx_counter'][:]
    zx_array = f['zx_array'][:]
    zx_counter = f['zx_counter'][:]

In [None]:
yx_array[yx_array==0]=np.nan
zx_array[zx_array==0]=np.nan

fig = plt.figure(figsize=(8*(512/34)/5, 8))
gs = gridspec.GridSpec(2, 1)  # 1 row, 2 columns
cmap='seismic'
levels1=15;levels2=15

# First subplot (yx_array contour)
ax1 = fig.add_subplot(gs[0, 0])  # First column
contour1 = ax1.contourf(yx_array,levels=levels1,cmap=cmap)
cbar1 = plt.colorbar(contour1, ax=ax1);cbar1.set_label("mins / parcel")
ax1.set_title("XY Detrainment Residence Time (Plotted by Detrainment Time)")

# Second subplot (zx_array contour)
ax2 = fig.add_subplot(gs[1, 0])  # Second column
contour2 = ax2.contourf(zx_array, levels=levels2, cmap=cmap)
cbar2 = plt.colorbar(contour2, ax=ax2);cbar2.set_label("mins / parcel")
ax2.set_title("XZ Detrainment Residence Time (Plotted by Detrainment Time)")


#COASTLINE
ocean_fraction=2/8
ax1.axvline(yx_array.shape[1]*ocean_fraction,color='green',linewidth=3)
ax2.axvline(yx_array.shape[1]*ocean_fraction,color='green',linewidth=3)

#THICKEN COLOR LINES

for edge in cbar1.ax.collections:  # Loop over individual elements in each list
    edge.set_linewidth(10)
for edge in cbar2.ax.collections:  # Loop over individual elements in each list
    edge.set_linewidth(8)



In [None]:
# currently i look for runs of being in cloudy updraft that are at least 5 minutes (1 timestep). i add the total length of that run at the location of initial entrainment/detrainment. then i average to get vertical profile. 

# For entrainment, particles should stay in a cloud for 30/60 minutes
# I’m very surprised by a) the extremely low values for entrainment time and b) the extremely high values for detainment times
# another thing: this is only cloudy updrafts, not clouds as well. is that 30/60 minutes true for the “cloud updrafts”?
# for detrainment then, there are some parcels that don’t interact with any “cloudy updrafts” for a very long time.
# also, if you look at the contour plot we do have higher values than the vertical profile 

plt.plot(np.nanmean(zx_array[:,:],axis=(1)),data['zh'],label='everywhere')
plt.ylabel('z (km)');plt.xlabel('nonresidence time (mins)')


# plt.plot(np.nanmean(zx_array[:,int(512/2):512],axis=(1)),data['zh'],label='over land')
# plt.ylabel('z (km)');plt.xlabel('preconditioning time (mins)')

# plt.plot(np.nanmean(zx_array[:,0:int(512/2)],axis=(1)),data['zh'],label='over ocean')
# plt.ylabel('z (km)');plt.xlabel('preconditioning time (mins)')

plt.ylim(top=20)
plt.title('nonresidence time')
plt.legend()

In [None]:
#IMPORTANT FOR PLOTTING

# xticks/yticks
# xticks = plt.gca().get_xticks()
# new_labels = [str(int(tick * 5)) for tick in xticks]
# plt.gca().set_xticklabels(new_labels);

# cbar-ticks
# cbar_ticks = cbar.get_ticks()  # Get the current ticks
# new_ticks = [str(int(tick * 5)) for tick in cbar_ticks]  # Modify ticks (multiply by 5 and convert to string)
# cbar.set_ticks(cbar_ticks)  # Set the original ticks again to avoid resetting
# cbar.set_ticklabels(new_ticks)


#imshow
# plt.yticks(np.arange(Nz))
# new_ytick_labels = np.round(data['zf'].values[:Nz], 2)
# plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0)

In [None]:
###################################
#OTHER TESTING FOR POSSIBLY ANALYSISES

In [None]:
#MAKING 2D HISTOGRAM OF RESIDENCE TIME VS Z (COLOR: VARIABLE)

In [None]:
def averaged_profiles(profile): 
    out_var=profile[ (profile[:, 1] != 0)]; #gets rid of rows that have no data
    out_var=np.array([out_var[:, 0] / out_var[:, 1], out_var[:, 2]]).T #divides the data column by the counter column
    return out_var

In [None]:
lst=[]
for p in np.arange(Np):
    out=residence_times(p,type='e',updraft_type='cloudy')
    if np.any(out)==True:
        lens=out[0]
        lst.append(lens)
print(f'max entrainment time: {max(arr.max() for arr in lst)}')

In [None]:
Nz=len(data['zh'])
profile=np.zeros((Nz,18)); #residence time by Z levels
counter=np.zeros_like(profile)

Np=125000-1
for p in np.arange(Np):
    out=residence_times(p,type='e',updraft_type='cloudy')
    if np.any(out)==True:
        lens=out[0]
        # print(lens)
        
        ts=out[1]
        zs=out[2]
        ys=out[3]
        xs=out[4]

        for ind,(z,l) in enumerate(zip(zs,lens)):
            profile[z,l]+=1

# #NORMALIZATION
row_averages = np.nansum(profile, axis=1)
mask = row_averages!=0
profile[mask] /= row_averages[mask,np.newaxis]


In [None]:
# one=profile.copy()
two=profile.copy()

In [None]:
residence_profile=profile.copy() #save for comparing with TKE later


#Nan out zeros
cmap='plasma'
profile2=profile.copy()
profile2[profile2==0]=np.nan


#PLOTTING
# plt.imshow(profile.T);plt.gca().invert_yaxis()
plt.contourf(profile2*100,cmap=cmap,levels=50)

cbar=plt.colorbar(label='normalized count (%)')
plt.ylabel('z (km)');plt.xlabel('total entrainment residence time (mins)')
plt.title('Entrainment Count')

#FIXING TICKS

plt.yticks(np.arange(Nz));
new_ytick_labels = np.round(data['zf'].values[:Nz], 2);
plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0);

xticks = plt.gca().get_xticks()
new_labels = [str(int(tick * 5)) for tick in xticks]
plt.gca().set_xticklabels(new_labels);


In [None]:
ts
for ind,(z,l) in enumerate(zip(zs,lens)):
    print(ind,z,l)

In [None]:
# # Loading Important Variables
# ##############
# if 'emptylike' not in globals():
#     print('loading neccessary variables')
#     variable='w'; w_data=data[variable] #get w data
#     w_data=w_data.interp(zf=data['zh']).data #interpolation w data z coordinate from zh to zf
#     variable='qv'; qv_data=data[variable].data # get qc data
#     variable='qc'; qc_data=data[variable].data # get qc data
#     variable='qi'; qi_data=data[variable].data # get qc data
#     qc_plus_qi=qc_data+qi_data
#     buoyancy_data=data['buoyancy'].data

#     import h5py
#     with h5py.File(dir + 'Variable_Calculation/' + 'theta_e'+f'_{res}_{t_res}'+'.h5', 'r') as f:
#         theta_e_data = f['theta_e'][:]
    
#     print('done')
#     empty_like=True

In [None]:
# def call_variables(t): 
#     if np.mod(t,25)==0: print(f'loading variables for time {t}')
#     variable='w'; w_data=data[variable].isel(time=t).interp(zf=data['zh']).data #get w_data and interpolation w data z coordinate from zh to zf
#     variable='qv'; qv_data=data[variable].isel(time=t).data # get qc data
#     variable='qc'; qc_data=data[variable].isel(time=t).data # get qc data
#     variable='qi'; qi_data=data[variable].isel(time=t).data # get qc data
#     qc_plus_qi=qc_data+qi_data
#     variable='th'; th_data=data[variable].isel(time=t).data # get qc data
#     variable='buoyancy'; buoyancy_data=data[variable].isel(time=t).data # get qc data
    
#     import h5py
#     with h5py.File(dir + 'Variable_Calculation/' + 'theta_e'+f'_{res}_{t_res}'+'.h5', 'r') as f:
#         theta_e_data = f['theta_e'][t]
        
#     if np.mod(t,25)==0:print(f'done loading')

#     return w_data,qv_data,qc_data,qi_data,qc_plus_qi,th_data,buoyancy_data,theta_e_data

In [None]:
Nz=len(data['zh'])
profile=np.zeros((Nz,18)); #residence time by Z levels
counter=np.zeros_like(profile)

Np=125000-1
for p in np.arange(Np):
    out=residence_times(p,type='e',updraft_type='cloudy')
    if np.any(out)==True:
        lens=out[0]
        # print(lens)
        
        ts=out[1]
        zs=out[2]
        ys=out[3]
        xs=out[4]

        
        for ind,(z,l) in enumerate(zip(zs,lens)): 
            profile[z,l]+=w_data[ts[ind],zs[ind],ys[ind],xs[ind]] #USE IF LOADING IN FULL VARIABLE
            # t=ts[ind];variable='w'; w_data=data[variable].isel(time=t).interp(zf=data['zh']).data #get w_data and interpolation w data z coordinate from zh to zf #THIS IS WAY TOO SLOW
            # profile[z,l]+=w_data[zs[ind],ys[ind],xs[ind]]
            counter[z,l]+=1

#averaging by number of parcel
mask=profile!=0
profile[mask]/=counter[mask]

In [None]:
cmap='plasma'
profile[profile==0]=np.nan
plt.contourf(profile,cmap=cmap,levels=50)
plt.colorbar(label='w (m/s) per parcel')
plt.xlabel('total entrainment residence time (mins)');plt.ylabel('z (km)')
plt.title('Entrained W Profile')

xticks = plt.gca().get_xticks()
new_labels = [str(int(tick * 5)) for tick in xticks]
plt.gca().set_xticklabels(new_labels);


plt.yticks(np.arange(Nz));
new_ytick_labels = np.round(data['zf'].values[:Nz], 2);
plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0);

In [None]:
Nz=len(data['zh'])
profile=np.zeros((Nz,18)); #residence time by Z levels
counter=np.zeros_like(profile)

Np=125000-1
for p in np.arange(Np):
    out=residence_times(p,type='e',updraft_type='cloudy')
    if np.any(out)==True:
        lens=out[0]
        # print(lens)
        
        ts=out[1]
        zs=out[2]
        ys=out[3]
        xs=out[4]

        
        for ind,(z,l) in enumerate(zip(zs,lens)):
            profile[z,l]+=theta_e_data[ts[ind],zs[ind],ys[ind],xs[ind]] #USE IF LOADING IN FULL VARIABLE
            # t=ts[ind];import h5py #     with h5py.File(dir + 'Variable_Calculation/' + 'theta_e'+f'_{res}_{t_res}'+'.h5', 'r') as f: #         theta_e_data = f['theta_e'][t] #THIS IS WAY TOO SLOW
            # profile[z,l]+=theta_e_data[zs[ind],ys[ind],xs[ind]]
            counter[z,l]+=1

#averaging by number of parcel
mask=profile!=0
profile[mask]/=counter[mask]

In [None]:
cmap='viridis'
profile[profile==0]=np.nan

plt.contourf(profile,cmap=cmap,levels=50)#,vmin=200)
plt.colorbar(label='theta_e (K) per parcel')
plt.xlabel('total entrainment residence time (mins)');plt.ylabel('z (km)')
plt.title(r'Entrained $\theta_e$ Profile')

xticks = plt.gca().get_xticks()
new_labels = [str(int(tick * 5)) for tick in xticks]
plt.gca().set_xticklabels(new_labels);

plt.yticks(np.arange(Nz));
new_ytick_labels = np.round(data['zf'].values[:Nz], 2);
plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0);


In [None]:
#Loading Some Data
tke_data=data['tke'].interp(zf=data['zh']).data

In [None]:
Nz=len(data['zh'])
profile=np.zeros((Nz,18)); #residence time by Z levels
counter=np.zeros_like(profile)

Np=125000-1
for p in np.arange(Np):
    out=residence_times(p,type='e',updraft_type='cloudy')
    if np.any(out)==True:
        lens=out[0]
        # print(lens)
        
        ts=out[1]
        zs=out[2]
        ys=out[3]
        xs=out[4]

        
        for ind,(z,l) in enumerate(zip(zs,lens)):
            profile[z,l]+=tke_data[ts[ind],zs[ind],ys[ind],xs[ind]] #USE IF LOADING IN FULL VARIABLE
            # t=ts[ind];tke_data=data['tke'].isel(time=t).interp(zf=data['zh']).data
            # profile[z,l]+=tke_data[zs[ind],ys[ind],xs[ind]]
            counter[z,l]+=1

#averaging by number of parcel
mask=profile!=0
profile[mask]/=counter[mask]

In [None]:
TKE_profile=profile.copy()

cmap='plasma'
profile[profile==0]=np.nan

plt.contourf(profile,cmap=cmap, levels=50)
plt.colorbar(label=r'TKE ($m^2/s^2$) per parcel')
plt.xlabel('total entrainment residence time (mins)');plt.ylabel('z (km)')
plt.title('Entrained TKE Profile')

xticks = plt.gca().get_xticks()
new_labels = [str(int(tick * 5)) for tick in xticks]
plt.gca().set_xticklabels(new_labels);

plt.yticks(np.arange(Nz));
new_ytick_labels = np.round(data['zf'].values[:Nz], 2);
plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0);

In [None]:
#TESTING TESTING TESTING


TKE_norm = (TKE_profile - np.min(TKE_profile)) / (np.max(TKE_profile) - np.min(TKE_profile))
residence_norm = (residence_profile - np.min(residence_profile)) / (np.max(residence_profile) - np.min(residence_profile))

compare_array = (1-TKE_norm) - (1-residence_norm)



plt.contourf(compare_array)
plt.colorbar(label='Difference of Norms')
plt.xlabel('total entrainment residence time (mins)');plt.ylabel('z (km)')
plt.title('Entrainment Compared with Entrained TKE Profile')

####

xticks = plt.gca().get_xticks()
new_labels = [str(int(tick * 5)) for tick in xticks]
plt.gca().set_xticklabels(new_labels);

plt.yticks(np.arange(Nz));
new_ytick_labels = np.round(data['zf'].values[:Nz], 2);
plt.gca().set_yticklabels(new_ytick_labels, fontsize=8, rotation=0);