In [None]:
import time
import glob
import pandas as pd
import xarray as xr
import gc
import os

In [2]:
# Record the start time of the script
start_time = time.time()

# Define the path to the NetCDF files
path = sorted(glob.glob("/glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-*"))

# Sample time_stamps
time_stamps = pd.date_range(start="2020-03-13 00:00:00", periods=len(path), freq="5T")  # Adjusted for the length of 'path'

# Initialize an empty list to store processed datasets
dataset = []

# Loop through all files in the list
for i, file_path in enumerate(path):
    start_time1 = time.time()
    print(f"Processing file {i + 1} of {len(path)}: {file_path}")

    # Open the NetCDF file using xarray
    data = xr.open_dataset(file_path)

    # Calculate the mean of QGRAUP, QRAIN, and QSNOW along the "bottom_top" dimension
    data['QHYDROMETEORS'] = (data['QGRAUP'] + data['QRAIN'] + data['QSNOW']).mean("bottom_top")

    # Drop unnecessary variables
    data1 = data.drop_vars(['QGRAUP', 'QRAIN', 'QSNOW', 'HGT'])

    # Convert timestamp to a native Python datetime object
    native_time = time_stamps[i].to_pydatetime()

    # Create a DataArray for time with the current timestamp
    time_data_array = xr.DataArray([native_time], dims=['time'], coords={'time': [native_time]})

    # Assign the time coordinate to the dataset
    data1 = data1.assign_coords(time=time_data_array)

    # Append the processed dataset to the list
    dataset.append(data1)

    # Calculate the time taken to process the current file
    end_time1 = time.time()
    elapsed_time = end_time1 - start_time1
    # Convert seconds to hours, minutes, seconds
    hours, remainder = divmod(elapsed_time, 3600)
    minutes, seconds = divmod(remainder, 60)
    print(f"Processed time for {i + 1} of {len(path)}: {file_path} --- {hours} hours, {minutes} minutes, {seconds} seconds")
    print("===================================================================================================================== ")

    # Concatenate and save the processed data to "q-hydrometeors.nc" after processing each file
    if i == 0:
        combined_data = xr.concat(dataset, dim='time')
    else:
        combined_data = xr.concat([combined_data, data1], dim='time')

    combined_data.to_netcdf("/glade/scratch/noteng/q-hydrometeors.nc")

    # Clear memory by deleting variables
    del data, data1, time_data_array
    print("Memory cleared.")

    # Collect garbage to release memory
    gc.collect()
    print("Garbage collected.")

# Print a completion message
print("Processing completed.")

# Calculate the total execution time of the script
end_time = time.time()
elapsed_time = end_time - start_time
# Convert seconds to hours, minutes, seconds
hours, remainder = divmod(elapsed_time, 3600)
minutes, seconds = divmod(remainder, 60)
print("Execution time: ", hours, "hours,", minutes, "minutes,", seconds, "seconds")


Processing file 1 of 4: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_15_00
Memory cleared for file 1: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_15_00
Processed file 1 of 4: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_15_00
Processed time for 1 of 4: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_15_00 --- 0.0 hours, 1.0 minutes, 4.150990962982178 seconds


PermissionError: [Errno 13] Permission denied: b'/glade/scratch/noteng/q-hydrometeors.nc'

In [2]:
processed_time_stamps

[datetime.datetime(2020, 3, 13, 0, 15)]

In [3]:
# import time
# import glob
# import pandas as pd
# import xarray as xr
# import gc
# import os

# start_time = time.time()

# # Define the path to the NetCDF files
# path = sorted(glob.glob("/glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-*"))


# # Sample time_stamps
# time_stamps = pd.date_range(start="2020-03-13 00:00:00", periods=len(path), freq="5T")  # Adjusted for the length of 'path'

# # Initialize an empty list to store processed datasets
# dataset = []

# # Loop through all files in the list
# for i, file_path in enumerate(path):
#     start_time1 = time.time()
#     print(f"Processing file {i + 1} of {len(path)}: {file_path}")

#     # Open the NetCDF file using xarray
#     data = xr.open_dataset(file_path)

#     # Calculate the mean of QGRAUP, QRAIN, and QSNOW along the "bottom_top" dimension
#     data['QHYDROMETEORS'] = (data['QGRAUP'] + data['QRAIN'] + data['QSNOW']).mean("bottom_top")

#     # Drop unnecessary variables
#     data1 = data.drop_vars(['QGRAUP', 'QRAIN', 'QSNOW', 'HGT'])

#     # Convert timestamp to a native Python datetime object
#     native_time = time_stamps[i].to_pydatetime()

#     # Create a DataArray for time with the current timestamp
#     time_data_array = xr.DataArray([native_time], dims=['time'], coords={'time': [native_time]})

#     # Assign the time coordinate to the dataset
#     data1 = data1.assign_coords(time=time_data_array)

#     # Append the processed dataset to the list
#     dataset.append(data1)

    

#     # Print a message indicating that memory is cleared for the current file
#     print(f"Memory cleared for file {i + 1}: {file_path}")

#     # Print a message indicating the completion of processing for the current file
#     print(f"Processed file {i + 1} of {len(path)}: {file_path}")

#     # Calculate the time taken to process the current file
#     end_time1 = time.time()
#     elapsed_time = end_time1 - start_time1
#     # Convert seconds to hours, minutes, seconds
#     hours, remainder = divmod(elapsed_time, 3600)
#     minutes, seconds = divmod(remainder, 60)
#     print(f"Processed time for {i + 1} of {len(path)}: {file_path} --- {hours} hours, {minutes} minutes, {seconds} seconds")
#     print("===================================================================================================================== ")

    
#     # Concatenate and save the processed data to "q-hydrometeors.nc" after processing each file
#     if i == 0:
#         combined_data = xr.concat(dataset, dim='time')
#     else:
#         combined_data = xr.concat([combined_data, data1], dim='time')

#     combined_data.to_netcdf("/glade/scratch/noteng/q-hydrometeors.nc")
    
    
    
#     # Clear memory by deleting variables
#     del data, data1, time_data_array

#     # Collect garbage to release memory
#     gc.collect()

# # Print a completion message
# print("Processing completed.")

# # Calculate the total execution time of the script
# end_time = time.time()
# elapsed_time = end_time - start_time
# # Convert seconds to hours, minutes, seconds
# hours, remainder = divmod(elapsed_time, 3600)
# minutes, seconds = divmod(remainder, 60)
# print("Execution time: ", hours, "hours,", minutes, "minutes,", seconds, "seconds")

In [2]:
# import time
# import glob
# import pandas as pd
# import xarray as xr
# import gc
# import os

# # Define the path to the NetCDF files
# path = sorted(glob.glob("/glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-*"))[:3]

# # Sample time_stamps
# time_stamps = pd.date_range(start="2020-03-13 00:00:00", periods=len(path), freq="5T")  # Adjusted for the length of 'path'

# # Initialize an empty list to store processed datasets
# dataset = []

# # Loop through all files in the list
# for i, file_path in enumerate(path):
#     start_time1 = time.time()
#     print(f"Processing file {i + 1} of {len(path)}: {file_path}")

#     # Open the NetCDF file using xarray
#     data = xr.open_dataset(file_path)

#     # Calculate the mean of QGRAUP, QRAIN, and QSNOW along the "bottom_top" dimension
#     data['QHYDROMETEORS'] = (data['QGRAUP'] + data['QRAIN'] + data['QSNOW']).mean("bottom_top")

#     # Drop unnecessary variables
#     data1 = data.drop_vars(['QGRAUP', 'QRAIN', 'QSNOW', 'HGT'])

#     # Convert timestamp to a native Python datetime object
#     native_time = time_stamps[i].to_pydatetime()

#     # Create a DataArray for time with the current timestamp
#     time_data_array = xr.DataArray([native_time], dims=['time'], coords={'time': [native_time]})

#     # Assign the time coordinate to the dataset
#     data1 = data1.assign_coords(time=time_data_array)

#     # Append the processed dataset to the list
#     dataset.append(data1)

#     # Print a message indicating that memory is cleared for the current file
#     print(f"Memory cleared for file {i + 1}: {file_path}")

#     # Print a message indicating the completion of processing for the current file
#     print(f"Processed file {i + 1} of {len(path)}: {file_path}")

#     # Calculate the time taken to process the current file
#     end_time1 = time.time()
#     elapsed_time = end_time1 - start_time1
#     # Convert seconds to hours, minutes, seconds
#     hours, remainder = divmod(elapsed_time, 3600)
#     minutes, seconds = divmod(remainder, 60)
#     print(f"Processed time for {i + 1} of {len(path)}: {file_path} --- {hours} hours, {minutes} minutes, {seconds} seconds")
#     print("===================================================================================================================== ")

#     # Concatenate the datasets
#     if i == 0:
#         combined_data = xr.concat(dataset, dim='time')
#     else:
#         combined_data = xr.concat([combined_data, data1], dim='time')

#     # Define the path to the output file
#     output_file = "/glade/scratch/noteng/q-hydrometeors.nc"

#     # Save the combined data to the output file and set permissions to allow read, write, and execute for everyone
#     combined_data.to_netcdf(output_file, mode='w', format='NETCDF4')
#     os.chmod(output_file, 0o777)  # Change permissions to allow read, write, and execute for everyone

#     # Clear memory by deleting variables
#     del data, data1, time_data_array

#     # Collect garbage to release memory
#     gc.collect()

# # Print a completion message
# print("Processing completed.")

# # Calculate the total execution time of the script
# end_time = time.time()
# elapsed_time = end_time - start_time
# # Convert seconds to hours, minutes, seconds
# hours, remainder = divmod(elapsed_time, 3600)
# minutes, seconds = divmod(remainder, 60)
# print("Execution time: ", hours, "hours,", minutes, "minutes,", seconds, "seconds")


In [1]:
# import time
# import glob
# import pandas as pd
# import xarray as xr
# import gc
# import os

# # Define the path to the NetCDF files
# path = sorted(glob.glob("/glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-*"))[:3]

# # Sample time_stamps
# time_stamps = pd.date_range(start="2020-03-13 00:00:00", periods=len(path), freq="5T")  # Adjusted for the length of 'path'

# # Initialize an empty list to store processed datasets
# dataset = []

# # Start time for the total execution time
# start_time = time.time()

# # Loop through all files in the list
# for i, file_path in enumerate(path):
#     start_time1 = time.time()
#     print(f"Processing file {i + 1} of {len(path)}: {file_path}")

#     # Open the NetCDF file using xarray
#     data = xr.open_dataset(file_path)

#     # Calculate the mean of QGRAUP, QRAIN, and QSNOW along the "bottom_top" dimension
#     data['QHYDROMETEORS'] = (data['QGRAUP'] + data['QRAIN'] + data['QSNOW']).mean("bottom_top")

#     # Drop unnecessary variables
#     data1 = data.drop_vars(['QGRAUP', 'QRAIN', 'QSNOW', 'HGT'])

#     # Convert timestamp to a native Python datetime object
#     native_time = time_stamps[i].to_pydatetime()

#     # Create a DataArray for time with the current timestamp
#     time_data_array = xr.DataArray([native_time], dims=['time'], coords={'time': [native_time]})

#     # Assign the time coordinate to the dataset
#     data1 = data1.assign_coords(time=time_data_array)

#     # Append the processed dataset to the list
#     dataset.append(data1)

#     # Print a message indicating that memory is cleared for the current file
#     print(f"Memory cleared for file {i + 1}: {file_path}")

#     # Print a message indicating the completion of processing for the current file
#     print(f"Processed file {i + 1} of {len(path)}: {file_path}")

#     # Calculate the time taken to process the current file
#     end_time1 = time.time()
#     elapsed_time = end_time1 - start_time1
#     # Convert seconds to hours, minutes, seconds
#     hours, remainder = divmod(elapsed_time, 3600)
#     minutes, seconds = divmod(remainder, 60)
#     print(f"Processed time for {i + 1} of {len(path)}: {file_path} --- {hours} hours, {minutes} minutes, {seconds} seconds")
#     print("===================================================================================================================== ")

#     # Concatenate and save the processed data to "q-hydrometeors.nc" after processing each file
#     combined_data = xr.concat(dataset, dim='time')
#     output_file = "/glade/scratch/noteng/q-hydrometeors.nc"
#     combined_data.to_netcdf(output_file)

#     # Explicitly delete the combined_data variable to release memory
#     del combined_data

#     # Clear memory by deleting variables
#     del data, data1, time_data_array

#     # Collect garbage to release memory
#     gc.collect()

# # Print a completion message
# print("Processing completed.")

# # Calculate the total execution time of the script
# end_time = time.time()
# elapsed_time = end_time - start_time
# # Convert seconds to hours, minutes, seconds
# hours, remainder = divmod(elapsed_time, 3600)
# minutes, seconds = divmod(remainder, 60)
# print("Execution time: ", hours, "hours,", minutes, "minutes,", seconds, "seconds")

Processing file 1 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_00_00
Memory cleared for file 1: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_00_00
Processed file 1 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_00_00
Processed time for 1 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_00_00 --- 0.0 hours, 1.0 minutes, 1.1815471649169922 seconds
Processing file 2 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_05_00
Memory cleared for file 2: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_05_00
Processed file 2 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_05_00
Processed time for 2 of 3: /glade/u/home/noteng/noteng/LES-output/wrfout_cloud_d02_2020-03-13_00_05_00 --- 0.0 hours, 1.0 minutes, 0.7326364517211914 seconds


PermissionError: [Errno 13] Permission denied: b'/glade/scratch/noteng/q-hydrometeors.nc'

In [4]:
!ls -l q-hydrometeors.nc

ls: cannot access q-hydrometeors.nc: No such file or directory


In [10]:
# xr.open_dataset(saved_data_path)['QHYDROMETEORS'][1]