In [None]:
import xarray as xr
import numpy as np
import os
import iris
from matplotlib import pyplot as plt, animation
import datetime
import warnings
from dask.distributed import Client
warnings.filterwarnings('ignore')
os.environ['UMDIR']='/g/data/access/projects/access/umdir/'

### Constants

In [None]:
file_prefix="umnsa_cldrad_"

file_d=datetime.datetime.fromisoformat('2022-02-22T00:00:00')
dir_d=datetime.datetime.fromisoformat('2022-02-22T00:00:00')
endtime=datetime.datetime.fromisoformat('2022-03-07T19:00:00')
file_td=datetime.timedelta(hours=1)
dir_td=datetime.timedelta(hours=6)

iso_dir_d=dir_d.strftime("%Y%m%dT%H00")
iso_file_d=file_d.strftime("%Y%m%dT%H00")

### Function to constrain by cell method if necessary

In [None]:
def has_cell_method(x):
    if not x.cell_methods: return False
    return x.cell_methods[0].method == c.cell_methods[0].method
    

### Setup dask cluster

In [None]:
client=Client(threads_per_worker=1)
client

### Loop over all cubes from one file
Note that this needs to be adapted depending on the file requirements. The code below outputs hourly data, however, this may need to be changed to daily, or over the whole duration of the experiment. The `all_cubes` variable is a remnant of this. In the case where the single-variable files should be longer than the UM output period, gather all `con_cubes` into the `all_cubes` list, then run `xr.concat` on the list and save that, with appropriate date stamps on the final netCDF file.

In [None]:
cubes=iris.load(f'/g/data/hh5/tmp/WACI-Hackathon-2023/AUS2200/day1/{iso_dir_d}Z/aus2200/d0198/RA3/um/{file_prefix}{iso_file_d}')
### Need to constrain by both name and cell_method, but cell_method sometimes doesn't exist.
duplicate_stash=( "m01s03i236" ,'m01s03i460', 'm01s03i461', 'm01s03i234', 'm01s03i217','m01s03i223' )
for c in cubes:
    field_con=iris.AttributeConstraint(STASH=c.attributes['STASH'])
    if ( str(c.attributes['STASH']) in duplicate_stash ) and c.cell_methods:
        cell_method_con=iris.Constraint(cube_func=has_cell_method)
        netcdf_name=f"/scratch/v45/dr4292/single_field_netcdf/{c.name()}_{c.cell_methods[0].method}"
    else:
        cell_method_con=None
        netcdf_name=f"/scratch/v45/dr4292/single_field_netcdf/{c.name()}"

    if os.path.isfile(netcdf_name): continue
    #if c.name() == "land_binary_mask": continue
    
    file_d=datetime.datetime.fromisoformat('2022-02-22T00:00:00')
    dir_d=datetime.datetime.fromisoformat('2022-02-22T00:00:00')
    file_td=datetime.timedelta(hours=1)
    dir_td=datetime.timedelta(hours=6)
    iso_dir_d=dir_d.strftime("%Y%m%dT%H00")
    iso_file_d=file_d.strftime("%Y%m%dT%H00")
    counter=24
    dayno=1
    all_cubes=[]
    con_cubes=[]
    while dir_d < endtime:
        all_cubes=[]
        con_cubes=[]
        iso_dir_d=dir_d.strftime("%Y%m%dT%H00")
        iso_file_d=file_d.strftime("%Y%m%dT%H00")
        con_cubes=iris.load(f'/g/data/hh5/tmp/WACI-Hackathon-2023/AUS2200/day{dayno}/{iso_dir_d}Z/aus2200/d0198/RA3/um/{file_prefix}{iso_file_d}',(field_con,cell_method_con))

        con_cubes[0].remove_coord('forecast_period')
        con_cubes[0].remove_coord('forecast_reference_time')
        
        ### .expand_dims('time') necessary for UM output with 1 timestep per file.
        test=xr.DataArray.from_iris(con_cubes[0]).expand_dims('time')
        
        counter=counter+1
        file_d=file_d+file_td
        if counter > 24 and counter%6 == 0:
            dir_d=dir_d+dir_td
        if counter > 24 and counter%24 == 0:
            dayno=dayno+1
            
        if c.attributes['STASH'] in ("m01s08i225","m01s08i223"):
            encoding={ 
                c.name(): {
                'dtype':'float32',
                #'zlib':True,
                #'shuffle':True,
                #'complevel':5,
                'chunksizes': [14, 1, 212, 260 ]
            }
        }   
        else:
            encoding={ 
            c.name(): {
                'dtype':'float32',
    #            'zlib':True,
    #            'shuffle':True,
    #            'complevel':5,
                'chunksizes': [14, 212, 260 ]
            }
        }
    
        test.to_netcdf(netcdf_name + "_" + iso_file_d + ".nc",encoding=encoding)
        del(all_cubes)