# This notebook contains:
* Functions to regrid the channel 13 brightness temperature to the study domain and save as a netcdf4 file

In [1]:
import xarray as xr
import xesmf as xe
import numpy as np
from numpy import s_
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import os
from collections import defaultdict
import metpy
import metpy.calc as mpcalc
from metpy.units import units
from scipy.interpolate import griddata
from datetime import datetime
import pandas as pd

In [2]:
# creating a function which lists all file paths
def list_files_in_directory(directory):
    file_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".nc"):  # Check for the ".nc" file extension
                file_paths.append(os.path.join(root, file))
    return file_paths
# directory = "/g/data/gy85/Himawari8_AusGeo1-0-3/L1/"
directory = "/g/data/v46/lb5963/HIMAWARI/08_V46_SUMM_2021-22/L1"
file_paths = list_files_in_directory(directory)
file_paths.sort()

# # get filepaths for Jan-Mar separating each year (only interested in Jan-Mar)
# year_filenames = {}  
# for file in file_paths:
#     fn = os.path.basename(file)
#     ND_day_str = fn[24:27]        # day of year in filepath string
#     ND_days = int(ND_day_str)     # convert day string to integer
#     if ND_days >= 100:            # removing nov-dec files
#         pass
#     else:
#         year = fn[20:24]          # Extract the year information from the file name
#         # Check if the year is already in the dictionary, if not, create a new list
#         if year not in year_filenames:
#             year_filenames[year] = []
#         # Append the file name to the list associated with that hour
#         year_filenames[year].append(file)

# # Convert the dictionary values (lists of file names) to lists
# yearly_file_lists = list(year_filenames.values())

# get filepaths for DJF for 2021-2022
filepaths = file_paths[720:2804] 

In [3]:
# create xarray files of output grid, with time dimensions
def diurnal_regrid_av_ch13(files):
    output = []
    for file in files:
        # select elements and lines (x,y) of smaller domain size of interest (otherwise computational time dramatically increases)
        with xr.open_dataset(file, engine="h5netcdf").sel(elements=slice(2500,3500),lines=slice(1800,2320)) as ds:
            # xesmf can only recognise 'lat' and 'lon', so rename pixel_latitude and pixel_longitude variables
            ds = ds.rename({
            "pixel_latitude": "lat",
            "pixel_longitude": "lon"
            })
            # chunk native coordinates of dataset to drastically decreases computational time for regridding
            ds_chunked = ds.chunk({'elements': 'auto', 'lines': 'auto'}) 
            # create a latlong grid using xesmf
            out_grid = xe.util.grid_2d(lat0_b=-22, lat1_b=-14, lon0_b=143, lon1_b=152, d_lat=0.018, d_lon=0.018)
            lon = np.arange(143, 152.018, 0.018)
            lon = xr.DataArray(lon, dims=('lon',), coords={'lon': lon}, attrs={'name': 'Longitude', 'units': 'degree_east'})
            lat = np.arange(-22, -13.982, 0.018)
            lat = xr.DataArray(lat, dims=('lat',), coords={'lat': lat}, attrs={'name': 'Latitude', 'units': 'degree_north'})
            out_grid=xr.Dataset({'lat': lat, 'lon': lon})
               # mask = np.ones_like(ds.lat)
               # mask = xr.DataArray(mask, dims=('lines', 'elements'), coords={'lines': ds.lines, 'elements': ds.elements})
            # # regrid
               # ds = ds_chunked.where(mask==1, drop=True)    
            regridder = xe.Regridder(ds_chunked, out_grid, 'bilinear')
            dr = ds_chunked["himawari_8_ahi_channel_13_brightness_temperature"]
            dr_out = regridder(dr)
            # create xr dataset with time coordinates
            dr_out.lon.attrs['units'] = 'degree_east'
            dr_out.lat.attrs['units'] = 'degree_south'
            fn = os.path.basename(file)                                     # extract filepath name
            print(fn)
            date_string = fn[20:30]                                         # extract out datetime info from filename
            date_format = "%Y%j.%H"                                         # give file format
            date_time_obj = datetime.strptime(date_string, date_format)     # convert him8 filename date info to a datetime object
            time = [date_time_obj ]                                         # create array with time values
            time_index = pd.to_datetime(time)
            ds0 = xr.Dataset({'ch13BT':dr_out})
            # add time to dims
            ds_time = ds0.expand_dims(time=[time_index[0]])
            # create xarray dataset
            ds_him8 = xr.Dataset({'ch13BT':ds_time.ch13BT}, 
                            attrs={'note':'JFM ch13BT with smaller domain, natural satellite res (2km) file created with xarray'})
            output.append(ds_him8)
    return output

In [4]:
%%time
# run set of yearly JFM hourly files through function - this will take some time (~1.5hours for each year)
# bt_2016 = diurnal_regrid_av_ch13(yearly_file_lists[0])
# bt_2017 = diurnal_regrid_av_ch13(yearly_file_lists[1])
# bt_2018 = diurnal_regrid_av_ch13(yearly_file_lists[2])
# bt_2019 = diurnal_regrid_av_ch13(yearly_file_lists[3])
# bt_2020 = diurnal_regrid_av_ch13(yearly_file_lists[4])
bt_2022 = diurnal_regrid_av_ch13(filepaths)

[gadi-cpu-bdw-0602.gadi.nci.org.au:2995977] shmem: mmap: an error occurred while determining whether or not /jobfs/119785810.gadi-pbs/ompi.gadi-cpu-bdw-0602.17388/jf.0/3139764224/shared_mem_cuda_pool.gadi-cpu-bdw-0602 could be created.
[gadi-cpu-bdw-0602.gadi.nci.org.au:2995977] create_and_attach: unable to create shared memory BTL coordinating structure :: size 134217728 


geocatL1.HIMAWARI-8.2021335.000000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.010000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.020000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.030000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.040000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.050000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.060000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.070000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.080000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.090000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.100000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.110000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.120000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.130000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.140000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.150000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.160000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.170000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.180000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.190000.FLDK.R20.nc
geocatL1.HIMAWARI-8.2021335.200000.FLDK.R20.nc
geocatL1.HIMA

KeyboardInterrupt: 

In [31]:
%%time
# concatenate files by time
# bt_2016_concat=xr.concat(bt_2016,'time')
# bt_2017_concat=xr.concat(bt_2017,'time')
# bt_2018_concat=xr.concat(bt_2018,'time')
# bt_2019_concat=xr.concat(bt_2019,'time')
# bt_2020_concat=xr.concat(bt_2020,'time')
bt_2022_concat=xr.concat(bt_2022,'time')

# e.g.(2016) convert each year to new netcdf files and save to home
bt_2022_concat.to_netcdf('2022_ch13BT_regridded_2kmres.nc', format='NETCDF4', 
             encoding={'ch13BT':{
                       'shuffle':True,         # increases effectiveness of compression when True
                       'chunksizes':[1,446,502], # length of time, lat, lon
                       'zlib':True,            # compression type, required to be True to specify compression levels below
                       'complevel':5           # specifies compression levels, range(0,9) with 0: no compression, 9: fully compressed
            }})

CPU times: user 12min 26s, sys: 3min 50s, total: 16min 17s
Wall time: 34min 8s
