# This notebook contains:
* Functions to regrid the channel 13 brightness temperature for the study domain and save the regridded dataset as a netcdf4 file

In [1]:
# import libraries
import xarray as xr
import xesmf as xe
import numpy as np
from numpy import s_
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import os
from collections import defaultdict
import metpy
import metpy.calc as mpcalc
from metpy.units import units
from scipy.interpolate import griddata
from datetime import datetime
import pandas as pd

# Functions to open, regrid, and save satellite data

In [14]:
def list_files_in_directory(directory: str) -> list[str]:
    """Function to return the full list of file paths to the Himawari dataset (strings)

    Args:
        directory (str): String where netcdf files are stored

    Returns:
        list[str]: Returns a list of file paths (str)
    """
    file_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".nc"):  # Checks for the ".nc" file extension
                file_paths.append(os.path.join(root, file))
    return file_paths


def regrid_H8_dataset(file_paths: list[str],attrs_note: str) -> xr.Dataset:
    """Function to regrid the satellite dataset, and storing as a new xr.Dataset 

    Args:
        file_paths (list[str]): List of file paths as strings
        attrs_note (str): Note to add to produced dataset attributes about time period and resolution of regridded dataset e.g. JFM ch13BT for study domain with natural satellite res (2km). File created with xarray and xesmf.

    Returns:
        xr.Dataset: Regridded channel 13 BT output (xr.Dataset) with time, lat and lon coordinates  
    """
    output = []
    for file in file_paths:
        # select elements and lines (x,y) for the study domain of interest (reduces computational time)
        with xr.open_dataset(file, engine="h5netcdf").sel(elements=slice(2500,3500),lines=slice(1800,2320)) as ds:
            # xesmf can only recognise 'lat' and 'lon', so rename pixel_latitude and pixel_longitude variables
            ds = ds.rename({
            "pixel_latitude": "lat",
            "pixel_longitude": "lon"
            })
            # chunk native coordinates of dataset before regridding
            ds_chunked = ds.chunk({'elements': 'auto', 'lines': 'auto'}) 
            # create a latlong grid using xesmf regridder
            out_grid = xe.util.grid_2d(lat0_b=-22, lat1_b=-14, lon0_b=143, lon1_b=152, d_lat=0.018, d_lon=0.018)
            lon = np.arange(143, 152.018, 0.018)
            lon = xr.DataArray(lon, dims=('lon',), coords={'lon': lon}, attrs={'name': 'Longitude', 'units': 'degree_east'})
            lat = np.arange(-22, -13.982, 0.018)
            lat = xr.DataArray(lat, dims=('lat',), coords={'lat': lat}, attrs={'name': 'Latitude', 'units': 'degree_north'})
            out_grid=xr.Dataset({'lat': lat, 'lon': lon})
               # mask = np.ones_like(ds.lat)
               # mask = xr.DataArray(mask, dims=('lines', 'elements'), coords={'lines': ds.lines, 'elements': ds.elements})
            # regrid
               # ds = ds_chunked.where(mask==1, drop=True)    
            regridder = xe.Regridder(ds_chunked, out_grid, 'bilinear')
            dr = ds_chunked["himawari_8_ahi_channel_13_brightness_temperature"]
            dr_out = regridder(dr)
            # create xr dataset with time coordinates
            dr_out.lon.attrs['units'] = 'degree_east'
            dr_out.lat.attrs['units'] = 'degree_south'
            fn = os.path.basename(file)                                     # extract filepath name
            print(fn)
            date_string = fn[20:30]                                         # extract out datetime info from filename
            date_format = "%Y%j.%H"                                         # give file format
            date_time_obj = datetime.strptime(date_string, date_format)     # convert him8 filename date info to a datetime object
            time = [date_time_obj ]                                         # create array with time values
            time_index = pd.to_datetime(time)
            ds0 = xr.Dataset({'ch13BT':dr_out})
            # add time to dims
            ds_time = ds0.expand_dims(time=[time_index[0]])
            # create xarray dataset
            ds_him8 = xr.Dataset({'ch13BT':ds_time.ch13BT}, 
                            attrs={'note':attrs_note})
            output.append(ds_him8)
    return output


def save_to_netcdf(regridded_ds: xr.Dataset,file_name: str) -> None:
    """Function to save the regridded satellite (ch13BT) dataset to a netcdf4 file

    Args:
        regridded_ds (xr.Dataset): Regridded dataset produced by the function named: regrid_H8_dataset
        file_name (str): Name saved file
    """
    # concatenate files by time
    concat_regridded_ds = xr.concat(regridded_ds,'time')
    concat_regridded_ds.to_netcdf(file_name+'.nc', format='NETCDF4', 
             encoding={'ch13BT':{
                       'shuffle':True,              # increases effectiveness of compression when True
                       'chunksizes':[1,446,502],    # length of [time, lat, lon] variables
                       'zlib':True,                 # compression type, required to be True to specify compression levels below
                       'complevel':5                # specifies compression levels, range(0,9) with 0: no compression, 9: fully compressed
            }})

# Results

In [4]:
# Regrid JFM 2016-2020 for climatology work
directory_clim = "/g/data/gy85/Himawari8_AusGeo1-0-3/L1/" 
file_paths_clim = list_files_in_directory(directory_clim)
file_paths_clim.sort()

# Get filepaths for Jan-Mar separating by year (the Coral Bleaching Season)
year_filenames = {}  
for file in file_paths_clim:
    fn = os.path.basename(file)
    ND_day_str = fn[24:27]        # day of year in filepath string
    ND_days = int(ND_day_str)     # convert day string to integer
    if ND_days >= 100:            # removing nov-dec files
        pass
    else:
        year = fn[20:24]          # Extract the year information from the file name
        # Check if the year is already in the dictionary, if not, create a new list
        if year not in year_filenames:
            year_filenames[year] = []
        # Append the file name to the list associated with that hour
        year_filenames[year].append(file)

# Convert the dictionary values (lists of file names) to lists
yearly_file_lists = list(year_filenames.values())

# Regrid each year of JFM hourly files - this will take some time (~1.5hours for each year)
# ch13bt_2016 = regrid_H8_dataset(yearly_file_lists[0],'2016 JFM regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')
# ch13bt_2017 = regrid_H8_dataset(yearly_file_lists[1],'2017 JFM regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')
# ch13bt_2018 = regrid_H8_dataset(yearly_file_lists[2],'2018 JFM regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')
# ch13bt_2019 = regrid_H8_dataset(yearly_file_lists[3],'2019 JFM regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')
# ch13bt_2020 = regrid_H8_dataset(yearly_file_lists[4],'2020 JFM regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')

# Save as netcdf4 file
# save_to_netcdf(ch13bt_2020,'2020_ch13BT_regridded_2kmres') #...

In [11]:
# Regrid DJF for case study work
directory_case_study = "/g/data/v46/lb5963/HIMAWARI/08_V46_SUMM_2021-22/L1/" # may no longer be stored here?
file_paths_case_study = list_files_in_directory(directory_case_study)
file_paths_case_study.sort()

# get filepaths for DJF for 2021-2022
filepaths = file_paths_case_study[720:2804] 

# Regrid
# ch13bt_2022 = regrid_H8_dataset(filepaths,'2021-2022 DJF regridded ch13BT for study domain, natural satellite res (2km). Dataset created with xarray and xesmf.')
# save_to_netcdf(ch13bt_2020,'2022_ch13BT_regridded')