In [None]:
import os

os.chdir(os.path.dirname(os.getcwd()))

In [None]:
from pathlib import Path
from dask.diagnostics import ProgressBar
from pathlib import Path
import cdsapi
import libs.utils
import libs.vars
import urllib
import xarray

In [None]:
# Retrieve UKESM grid to re-grid to
base_path = '_data/cmip6'
ukesm_file = f'/UKESM1-0-LL/siconc/siconc_SImon_UKESM1-0-LL_ssp585_r2i1p1f2_gn_201501-204912.nc'
ukesm_path = f'{base_path}{ukesm_file}'
not Path(ukesm_path).exists() and libs.utils.download_variable(**{
    'experiment_id': 'ssp585',
    'frequency': 'mon',
    'variable_id': 'siconc',
    'save_to_local': True,
    'source_id': 'UKESM1-0-LL',
    'table_id': 'SImon',
    'variant_label': 'r2i1p1f2'
})
ukesm_grid = xarray.open_mfdataset(
    paths=ukesm_path, 
    combine='by_coords',
    use_cftime=True
)

regrid_s2d = {
    'grid': ukesm_grid,
    'method': 'nearest_s2d',
    'copy_dims': ['i', 'j', 'longitude', 'latitude', 'vertices']
}
regrid_bil_s2d = {
    'grid': ukesm_grid,
    'method': 'bilinear',
    'extrap_method': 'nearest_s2d',
    'copy_dims': ['i', 'j', 'longitude', 'latitude', 'vertices']
}

obs_base_path = '_data/_cache/_obs'
time_slice = slice('1980-01-01', '2021-01-01')
variables_obs = [
    {
        'regrid_kwargs': regrid_s2d,
        'url': 'https://www.metoffice.gov.uk/hadobs/hadisst/data/HadISST_ice.nc', # '.gz'
        'variable_id': 'sic',
    },
    {
        'regrid_kwargs': regrid_s2d,
        'url': 'https://www.metoffice.gov.uk/hadobs/hadisst2/data/HadISST.2.2.0.0_sea_ice_concentration.nc',
        'variable_id': 'sic',
    }
]

In [None]:
c = cdsapi.Client()

era5_variables = [
    { 'name': '2m_temperature', 'regrid_kwargs': regrid_bil_s2d, 'variable_id': 't2m' }, 
    { 'name': 'evaporation', 'regrid_kwargs': regrid_bil_s2d, 'variable_id': 'e' }, 
    { 'name': 'sea_surface_temperature', 'regrid_kwargs': regrid_s2d, 'variable_id': 'sst' }, 
    { 'name': 'snowfall', 'regrid_kwargs': regrid_s2d, 'variable_id': 'sf' }, 
    { 'name': 'total_precipitation', 'regrid_kwargs': regrid_s2d, 'variable_id': 'tp' }
]

for v in era5_variables:
    v_name = v['name']
    filename = f'era5_{v_name}_1980-2020.nc'
    dest = f'{obs_base_path}/{filename}'

    not Path(dest).exists() and c.retrieve(
        'reanalysis-era5-single-levels-monthly-means',
        {
            'format': 'netcdf',
            'product_type': 'monthly_averaged_reanalysis',
            'variable': v_name,
            'year': [
                '1980', '1981', '1982',
                '1983', '1984', '1985',
                '1986', '1987', '1988',
                '1989', '1990', '1991',
                '1992', '1993', '1994',
                '1995', '1996', '1997',
                '1998', '1999', '2000',
                '2001', '2002', '2003',
                '2004', '2005', '2006',
                '2007', '2008', '2009',
                '2010', '2011', '2012',
                '2013', '2014', '2015',
                '2016', '2017', '2018',
                '2019', '2020',
            ],
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'time': '00:00',
        },
        dest
    )
    
    variables_obs.append({
        'regrid_kwargs':  v['regrid_kwargs'],
        'url': filename,
        'variable_id': v['variable_id'],
    })

In [None]:
def download_url(url):
    filename = urllib.parse.urlparse(url).path.split('/')[-1]    
    local_filename = Path(obs_base_path, filename)
    local_filename.parent.mkdir(parents=True, exist_ok=True)
    
    if local_filename.exists():
        print(f'   -> Already exists, skipping: {local_filename}')
        return local_filename
    
    try:
        print(f'   -> Downloading:')
        print(f'   -> {url}')
        print(f'   -> {local_filename}')
        urllib.request.urlretrieve(url, local_filename)
    except Exception as e:
        print('An error occurred during initial query', e, sep='\n')
        return None

    return local_filename


files = []
plot_variable = 'sic'

for v in variables_obs:
    local_file = download_url(v['url'])
    if local_file == None:
        continue
    
    local_file_processed = Path(str(local_file).replace('.nc', '_processed.nc'))
    if local_file_processed.exists():
        # Cleanup & return
        print('   -> Processed file already exists, skipping write')
        plot_variable == v['variable_id'] and files.append(local_file_processed)
        continue
    
    arr = xarray.open_mfdataset(
        paths=local_file, 
        autoclose=True,
        use_cftime=True
    )
    arr = arr.sel(time=time_slice)
    arr = libs.utils.regrid(arr, **v['regrid_kwargs'])
    
    # Write to file
    print(f'   -> Writing to {local_file_processed}')
    write = arr.to_netcdf(
        local_file_processed,
        compute=False,
        engine='netcdf4',
        unlimited_dims=['time']
    )
    with ProgressBar():
        write.compute()

    arr.close()
    
    # Finally, compress as to_netcdf() seems to produce large file sizes
    local_file_processed, diff = libs.utils.compress_nc_file(local_file_processed, local_file_processed)
    print(f'   -> Compressed (Savings: {diff})')

    plot_variable == v['variable_id'] and files.append(local_file_processed)

In [None]:
import libs.plot
import warnings
warnings.filterwarnings('ignore')

file_data = []
for file in files:
    item_data = xarray.open_mfdataset(
        paths=file, 
        combine='by_coords',
        use_cftime=True
    )
    file_data.append({
        'data': item_data[plot_variable][6, :, :],
        'label': plot_variable
    })

libs.plot.nstereo(
    file_data,
    title=f'Ensemble {plot_variable}, (1) HadISST1, (2) HadISST2.2',
    colorbar_label=plot_variable,
    colormesh_kwargs={
        'cmap': 'RdBu_r',
        'extend': 'both',
        'levels': 21,
        'vmin': 0,
        'vmax': 1,
        'x': 'longitude', 
        'y': 'latitude'
    },
    shape=(1, len(file_data))
);