In [51]:
import datetime
import os
import sys
import numpy as np
import pandas as pd
from osgeo import gdal
import xarray as xr
from pathlib import Path

from tqdm import tqdm

from joblib import Parallel, delayed

## GSMaP daily dat to netCDF

##### funcs

In [52]:
def read_dat(dat_file, data_shape=None, neg_to_zero=True):
    with open(dat_file, 'rb'):
        dat_data = np.fromfile(dat_file, dtype='<f', count=-1)

    # reshape
    if data_shape:
        dat_data = dat_data.reshape(data_shape)
        
    # negative to zero
    if neg_to_zero:
        dat_data[dat_data < 0] = 0
    
    return dat_data


def to_1day_netcdf(data, time, geotransform, nc_var_name):
    
    # # expand dims for 1day
    # # get data shape
    # if len(data.shape) == 2:
    #     lat_len, lon_len = data.shape
    #     data = np.expand_dims(data, axis=0)
    # else:
    #     _, lat_len, lon_len = data.shape
    
    # get data shape
    lat_len, lon_len = data.shape
    
    # calc lon and lat series
    lon = np.arange(geotransform[0], geotransform[0] + geotransform[1] * lon_len, geotransform[1])
    lon = np.round(lon, 2)
    lat = np.arange(geotransform[3], geotransform[3] + geotransform[5] * lat_len, geotransform[5])
    lat = np.round(lat, 2)

    
    # create xarray dataset
    dst = xr.Dataset(
        {
            'precipitation': (['latitude', 'longitude'], data)
        },
        coords={
            'longitude': lon,
            'latitude': lat,
            'time': time
        }
    )
    
    dst.attrs['title'] = nc_var_name + 'Daily Precipitation'
    dst.attrs['description'] = f'GSMap {nc_var_name} Daily Precipitation'
    
    dst['precipitation'].attrs['units'] = 'mm/day'
    dst['precipitation'].attrs['long_name'] = 'Daily Precipitation'
    
    return dst


##### run

In [53]:
glob_gsmap_geotrans = (0, 0.1, 0, 60, 0, -0.1)
glob_varname = 'GSMaP-STD-V8-G'
glob_datdir = Path(r'D:\DATA\gsmap_testdata\dat')
glob_ncdir = Path(r'D:\DATA\gsmap_testdata\output_nc')


In [54]:
# 用于文件名切片
st, ed = 12, 20

In [55]:
dat_files = list(glob_datdir.glob('*.dat'))
# 修改日期字符串 格式如 20190101
test_dt = dat_files[0].name[st:ed]
print(test_dt)
datetime.datetime.strptime(test_dt, '%Y%m%d')

20181201


datetime.datetime(2018, 12, 1, 0, 0)

In [56]:
for d in dat_files:
    print(f'[ INFO ] Convert - {d.stem}')
    # read dat
    data = read_dat(d, data_shape=(1200, 3600))
    # convert to mm/day
    data = data * 24
    
    # create time
    dt_str = d.name[st:ed]
    
    
    time = pd.to_datetime(dt_str, format='%Y%m%d')
    date_ = pd.date_range(time, periods=1 , freq='D')[0]
    
    # to netcdf
    nc_var_name = glob_varname
    dst = to_1day_netcdf(data, date_, glob_gsmap_geotrans, nc_var_name)
    
    # save
    nc_file = glob_ncdir.joinpath(f'{nc_var_name}_{dt_str}.nc')
    dst.to_netcdf(nc_file)

[ INFO ] Convert - gsmap_gauge.20181201.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181202.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181203.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181204.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181205.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181206.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181207.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181208.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181209.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181210.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181211.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181212.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181213.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.20181214.0.1d.daily.00Z-23Z.v8.0000.1
[ INFO ] Convert - gsmap_gauge.201