In [None]:
# Import relevant packages 
import xarray as xr
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt

In [None]:
# Define input and output directories. The output directory will be created if it does not already exist. Note
# we will store the converted files rather than replace the originals in case anything goes wrong with the processing. 
# Remember to replace "/home/u/jamie/towner" with your own file path.
input_dir = "/home/u/jamie.towner/jordan_training/data/forecast_data"
output_dir = "/home/u/jamie.towner/jordan_training/data/forecast_data/converted"
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Define the variable name in the netcdf files.
var_name = "tprate"
original_var_name = "tprate"  # name in original file
new_var_name = "tp"           # desired name after conversion
mm_per_m = 1000  # convert meters to mm
seconds_per_day = 86400

# Days in months ahead (i.e., the lead time months)
avg_days_in_month = np.array([31, 28, 31, 30, 31, 30])  # or use 30.4375 for all months

seconds_in_month = avg_days_in_month * seconds_per_day  # shape (6,)

In [None]:
# Finally we will loop through all the netcdf files, regardless of which centre they originate from 
# and perform the unit conversion on all files. 

# Loop through all NetCDF files
nc_files = glob.glob(os.path.join(input_dir, "*.nc"))

for file in nc_files:
    ds = xr.open_dataset(file)

    if var_name not in ds:
        print(f"Variable '{var_name}' not found in {file}")
        continue

    # Ensure forecastMonth has size 6
    if ds.dims["forecastMonth"] != 6:
        print(f"Unexpected forecastMonth dimension size in {file}")
        continue

    # Create a DataArray for seconds_in_month 
    seconds_da = xr.DataArray(
        seconds_in_month,
        dims=["forecastMonth"],
        coords={"forecastMonth": ds["forecastMonth"]}
    )

    # Convert m/s to mm/month
    tp_mm_month = ds[original_var_name] * seconds_da * mm_per_m
    tp_mm_month.attrs['units'] = 'mm/month'
    tp_mm_month.name = new_var_name

    # Remove the original variable and add the renamed one
    ds = ds.drop_vars(original_var_name)
    ds[new_var_name] = tp_mm_month

    # Save to new file
    output_file = os.path.join(output_dir, os.path.basename(file))
    ds.to_netcdf(output_file)

    print(f"Saved: {output_file}")

In [None]:
# Path to your NetCDF file. Provide a specific file example. Remember to replace "/home/u/jamie/towner" with your
# own file path
file_path = "/home/u/jamie.towner/jordan_training/data/forecast_data/converted/jordan_precip_ncep_2025_01.nc"

# Open the dataset
ds = xr.open_dataset(file_path)

# Print the entire dataset summary
print(ds)

# If you want to list just dimensions:
print("\nDimensions:")
print(ds.dims)

# If you want to list variables and their units:
print("\nVariables and units:")
for var in ds.data_vars:
    units = ds[var].attrs.get("units", "no units")
    print(f"{var}: {units}")


In [None]:
# Variable name
var = "tp"  

# Choose lead time (e.g., forecastMonth = 1)
lead_index = 1  # Note that Python starts indexing from 0 and therefore 0 is month 1, 1 is month 2 etc.

# Select data for the chosen lead time
selected = ds[var].isel(forecastMonth=lead_index)

# Compute ensemble mean
ensemble_mean = selected.mean(dim='number')

# Plot
plt.figure(figsize=(8, 6))
ensemble_mean.plot(cmap='viridis')  # xarray handles colorbar
plt.title(f"Ensemble Mean for Forecast Month {int(ds.forecastMonth[lead_index].values)}")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()