In [45]:
#!/usr/bin/python3
# Example: time python3 artist_aggregator.py --src /g/data/jk72/MIZ/Bremen/netcdf/ --dest /g/data/jk72/MIZ/processed/
# qsub -I -q normal -P jk72 -l walltime=05:00:00,ncpus=48,mem=128gb,storage=gdata/v10+gdata/jk72,wd

import xarray as xr
import argparse
from pathlib import Path
import glob
import sys
import os
import pandas as pd
import numpy as np
import odc.geo.xr
# import rioxarray
import pdb
from tqdm.auto import tqdm
from cftime import date2num
from datetime import datetime
import cftime

def paths_to_datetimeindex(paths, string_slice=(0, 10), form='%Y%m'):
    """
    Helper function to generate a Pandas datetimeindex object
    from dates contained in a file path string.
    Parameters
    ----------
    paths : list of strings
        A list of file path strings that will be used to extract times
    string_slice : tuple
        An optional tuple giving the start and stop position that
        contains the time information in the provided paths. These are
        applied to the basename (i.e. file name) in each path, not the
        path itself. Defaults to (0, 10).
    Returns
    -------
    A pandas.DatetimeIndex object containing a 'datetime64[ns]' derived
    from the file paths provided by `paths`.
    """
    date_strings = [os.path.basename(i)[slice(*string_slice)]
                    for i in paths]
    return pd.to_datetime(date_strings, format=form)

In [10]:
def paths_to_datetime(paths, string_slice=(0, 10), units="days since 1970-01-01 00:00"):
    """
    Helper function to generate a Pandas datetimeindex object
    from dates contained in a file path string.
    Parameters
    ----------
    paths : list of strings
        A list of file path strings that will be used to extract times
    string_slice : tuple
        An optional tuple giving the start and stop position that
        contains the time information in the provided paths. These are
        applied to the basename (i.e. file name) in each path, not the
        path itself. Defaults to (0, 10).
    Returns
    -------
    A pandas.DatetimeIndex object containing a 'datetime64[ns]' derived
    from the file paths provided by `paths`.
    """
    date_strings = [os.path.basename(i)[slice(*string_slice)]
                    for i in paths]
    
    date_times = [pd.Timestamp(i) for i in date_strings]
    
    cf_times = date2num(date_times, units)
        
    return date_times

## Loading data

In [42]:
path = str("/g/data/jk72/MIZ/Bremen/netcdf/")
file_list = []

# create list of paths
print("Finding files")
for file in Path(path).rglob('*2021*'):
    file_list.append(file)

file_list = sorted(file_list, key=lambda i: int(os.path.splitext(os.path.basename(i)[16:24])[0]))
print(f"Processing {len(file_list)} files")

Finding files
Processing 375 files


## Time and concatenation

In [4]:
import cftime
cftime.date2num(pd.Timestamp('20210218'), "days since 1970-01-01 00:00")

18676

In [59]:
cftime.num2date(18676, time_units)

cftime.DatetimeGregorian(2021, 2, 18, 0, 0, 0, 0, has_year_zero=False)

In [43]:
# Create variable used for time axis
# %m = zero padded decimal for month
# % d = zero padded decimal for day
# time_var = xr.Variable('time', paths_to_datetime(file_list, string_slice=(16, 24), units=units))
# units = "days since 2021-06-01 00:00"
time_var = xr.Variable('time', paths_to_datetimeindex(file_list, string_slice=(16, 24), form='%Y%m%d'))

In [44]:
# Load in and concatenate individual data
# decode_coords all reads the polar_stereographic projection as a coordinate
ds = xr.concat([xr.open_dataset(i) for i in tqdm(file_list)],
                        dim=time_var)

  0%|          | 0/375 [00:00<?, ?it/s]

In [46]:
ds_ = ds

In [62]:
ds = ds_.odc.assign_crs("EPSG:3031")

In [27]:
# ds.time.encoding['units'] = units
# ds.time.attrs['units'] = units

In [65]:
ds = ds_.sel(time=slice("2021-01-01", "2021-04-30"))

In [66]:
ds

## Writing data

In [67]:
### GEOTIFF

# Covert our xarray.DataArray into a xarray.Dataset
# ds = ds.to_dataset('band')
# ds = ds.rename({1: 'conc'})

# NETCDF

# ds = ds.drop('polar_stereographic')
# Rename the variable to a more useful name
ds = ds.rename({'z': 'conc'})
# del ds['conc'].attrs['grid_mapping']
ds['conc'].attrs['units'] = '%'
ds['conc'].attrs['standard_name'] = 'sea_ice_area_fraction'
ds.time.attrs['long_name']="time"
# create monthly data
print("Monthly resampling")
monthly_resample = ds.resample(time="M").mean(keep_attrs=True)
monthly_resample.time.attrs['long_name']="time"

# Writeout the output
print("Writing data")
# ds.to_netcdf(path='/g/data/jk72/sc0554/'+'asi2_daily_test.nc')
monthly_resample.to_netcdf(path='/g/data/jk72/MIZ/processed/'+'asi2_2021_jan_april_monthly3.nc')

Monthly resampling
Writing data


In [59]:
monthly_resample.time.attrs['long_name']="time"


In [61]:
monthly_resample.to_netcdf(path='/g/data/jk72/MIZ/processed/'+'asi2_2021_jan_april_monthly2.nc')