In [1]:
import xarray as xr
import xesmf as xe
import numpy as np
from numpy import s_
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import os
from collections import defaultdict
import metpy
import metpy.calc as mpcalc
from metpy.units import units
from scipy.interpolate import griddata
from datetime import datetime
import pandas as pd

In [20]:
def list_files_in_directory(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".nc"):  # Check for the ".nc" file extension
                file_paths.append(os.path.join(root, file))
    return file_paths
directory = "/g/data/gy85/Himawari8_AusGeo1-0-3/L1/"
file_paths = list_files_in_directory(directory)
file_paths.sort()

year_filenames = {}  # store file names removing nov-dec
for file in file_paths:
    fn = os.path.basename(file)
    ND_day_str = fn[24:27]
    ND_days = int(ND_day_str)
    if ND_days >= 100:
        pass
    else:
        year = fn[20:24]  # Extract the year information from the file name
        # Check if the year is already in the dictionary, if not, create a new list
        if year not in year_filenames:
            year_filenames[year] = []
        # Append the file name to the list associated with that hour
        year_filenames[year].append(file)

# Convert the dictionary values (lists of file names) to lists
yearly_file_lists = list(year_filenames.values())

In [29]:
# create xarray files of output grid, with time dimensions
def diurnal_regrid_av_ch13(files):
    output = []
    for file in files:
        with xr.open_dataset(file, engine="h5netcdf").isel(elements=s_[::15], lines=s_[::15]) as ds:  
            # xesmf can only recognise 'lat' and 'lon'
            ds = ds.rename({
            "pixel_latitude": "lat",
            "pixel_longitude": "lon"
            })
            ds_chunked = ds.chunk({'elements': 'auto', 'lines': 'auto'}) # drastically decreases computational time for regridding
            # create latlong grid
            out_grid = xe.util.grid_2d(lat0_b=-22, lat1_b=-14, lon0_b=143, lon1_b=152, d_lat=0.2, d_lon=0.2)
            lon = np.arange(143, 152.2, 0.2)
            lon = xr.DataArray(lon, dims=('lon',), coords={'lon': lon}, attrs={'name': 'Longitude', 'units': 'degree_east'})
            lat = np.arange(-22, -13.8, 0.2)
            lat = xr.DataArray(lat, dims=('lat',), coords={'lat': lat}, attrs={'name': 'Latitude', 'units': 'degree_north'})
            out_grid=xr.Dataset({'lat': lat, 'lon': lon})
            mask = np.ones_like(ds.lat)
            mask = xr.DataArray(mask, dims=('lines', 'elements'), coords={'lines': ds.lines, 'elements': ds.elements})
            # regrid
            ds = ds_chunked.where(mask==1, drop=True)    # ds_chunked here is where the computational time is decreased
            regridder = xe.Regridder(ds_chunked, out_grid, 'bilinear')
            dr = ds_chunked["himawari_8_ahi_channel_13_brightness_temperature"]
            dr_out = regridder(dr)
            # create xr dataset with time coordinates
            dr_out.lon.attrs['units'] = 'degree_east'
            dr_out.lat.attrs['units'] = 'degree_south'
            fn = os.path.basename(file)                  # extract filepath name
            print(fn)
            date_string = fn[20:30]                                         # extract out datetime info from filename
            date_format = "%Y%j.%H"                                         # give file format
            date_time_obj = datetime.strptime(date_string, date_format)     # convert him8 filename date info to a datetime object
            time = [date_time_obj ]                                         # create array with time values
            time_index = pd.to_datetime(time)
            ds0 = xr.Dataset({'ch13BT':dr_out})
            # add time to dims
            ds_time = ds0.expand_dims(time=[time_index[0]])
            # create xarray dataset
            ds = xr.Dataset({'ch13BT':ds_time.ch13BT}, 
                            attrs={'note':'2016 JFM ch13BT with smaller domain, file created with xarray'})
            output.append(ds)
    return output

In [30]:
%%time
# run set of yearly JFM hourly files through function
bt_2016 = diurnal_regrid_av_ch13(yearly_file_lists[0])

geocatL1.HIMAWARI-8.2016001.000000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.010000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.020000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.030000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.040000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.050000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.060000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.070000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.080000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.090000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.100000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.110000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.120000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.130000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.140000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.150000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.160000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.170000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.180000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.190000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2016001.200000.FLDK.R20.nc
geocatL1.HIMA

In [31]:
%%time
bt_2016_concat=xr.concat(bt_2016,'time')
# convert to netcdf
bt_2016_concat.to_netcdf('2016_ch13BT_regridded_lowres.nc', format='NETCDF4', 
             encoding={'ch13BT':{
                       'shuffle':True,         # increases effectiveness of compression when True
                       'chunksizes':[1,41,46], # time, lat, lon
                       'zlib':True,            # compression type, required to be True to specify compression levels below
                       'complevel':5           # specifies compression levels, range(0,9) with 0: no compression, 9: fully compressed
            }})

CPU times: user 12min 26s, sys: 3min 50s, total: 16min 17s
Wall time: 34min 8s
