note on 2020.1.10:
- build a whole year (2019) jma radar dataset for testing kakuho
- cut image to shape(1000,1000), cut bit from float32 to uint8
- statistical study of rainy coverage, note that 132396 pixels are masked in 1 M pixels
- use 2 thresholds: >0.05;  > 1.0 mm/h and make 2 binary list with length 288*365 = 105120, or monthly list with a length of about 8640
- make a recommendation list that which data is suitable for testing, such as rain coverage and duration

In [1]:
import glob
import numpy as np
from datetime import datetime,timedelta
import calendar
from netCDF4 import Dataset
import joblib
import os
print(os.getcwd())

#data_folder = "/Volumes/own4T/jma_radar_401300210"
data_folder = "/Users/jiang/data/jma_radar"
output_folder = "/Users/jiang/data/jma_radar"

dt     = datetime(2020,1,29,0,0)  # included
dt_end = datetime(2020,1,31,0,5)  # not included
time_step = 5 * 60 # seconds

# initialize
#current_month = dt.month  # when change to new month, store previous month's data
#_, days_in_month = calendar.monthrange(dt.year, dt.month)
print(f"today: {dt}")
today = dt.day
daily_data = np.zeros(shape = (288,1000,1000), dtype = np.uint8)
output_file  = f"jma_radar_uint8_{dt.strftime('%Y_%m_%d')}.joblib"
output_path = os.path.join(output_folder, output_file)

while dt <= dt_end:
    # if new day store data and reset counter                     
    if dt.day != today:
        joblib.dump(daily_data, output_path)
        del daily_data            

        # reset
        print(f"today: {dt}")
        today = dt.day
        daily_data = np.zeros(shape = (288,1000,1000), dtype = np.uint8)
        output_file  = f"jma_radar_uint8_{dt.strftime('%Y_%m_%d')}.joblib"
        output_path = os.path.join(output_folder, output_file)
        
    # grib2 file to nc file
    grib_file = dt.strftime('%Y%m%d_%H%M00.000')
    source_folder = os.path.join(data_folder, dt.strftime("%Y/%m/%d"))
    source_path = os.path.join(source_folder, grib_file)
    nc_file = "temp1.nc"
    cmd = f"wgrib2 {source_path} -s | egrep 'surface'|wgrib2 -i {source_path} -netcdf {nc_file}"
    fail = os.system(cmd)
    if fail:
        print("wgrib2 wrong at ",source_path)
        dt += timedelta(seconds = time_step)
        continue
    
    # nc file to data
    #root = Dataset(nc_file, "r")
    #rain = root['var0_1_203_surface'][0,:,:] # masked array, shape(3360,3560)
    #rain_reduced = rain[1500:2500,1000:2000].copy()
    rain_reduced = Dataset(nc_file, "r")['var0_1_203_surface'][0,1500:2500,1000:2000]
    rain_reduced.fill_value = 0.0                    
    #rain_filled = rain_reduced.filled() 
    cnt = int(dt.hour*12 + (dt.minute)/5)                   
    daily_data[cnt] = (rain_reduced.filled() * 10).astype('uint8')                     
    
    # delete nc                     
    os.system(f"rm -r {nc_file}")                    
    dt += timedelta(seconds = time_step)

/Users/jiang/github_all/rainymotion/docs/notebooks
today: 2020-01-29 00:00:00
today: 2020-01-30 00:00:00
today: 2020-01-31 00:00:00


In [None]:
dt

In [5]:
# for download kakuho data or jma radar data
# source and goal folder should exist
from datetime import datetime,timedelta
import os
import urllib

# jma radar
#base_URL  = "http://stock1.wni.co.jp/stock_2020/401300210/0000300100200012/"
base_URL  = "http://stock1.wni.co.jp/stock/401300210/0000300100200012"
base_goal = "/Users/jiang/data/jma_radar"

dt =     datetime(2020,1,28,1,25)  # not included
dt_end = datetime(2020,1,29,0,15)  # included
time_step = 5 * 60 # seconds
while dt < dt_end:
    dt += timedelta(seconds = time_step)
    file_string = dt.strftime('%Y%m%d_%H%M00.000')
    source_folder = os.path.join(base_URL, dt.strftime("%Y/%m/%d"))
    goal_folder = os.path.join(base_goal, dt.strftime("%Y/%m/%d"))
    # from pathlib import Path
    # Path("/my/directory").mkdir(parents=True, exist_ok=True)
    if not os.path.exists(goal_folder):
        os.makedirs(goal_folder)

    source_path = os.path.join(source_folder, file_string)
    goal_path = os.path.join(goal_folder, file_string)
    try: 
        urllib.request.urlretrieve(source_path, goal_path)
    except: 
        print(f"not exist:{source_path}")
        continue
