In [1]:
%%time
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd
import dask
# import cmaps

# 国内政区图的绘制
# Load the border data, CN-border-La.dat is download from
# https://gmt-china.org/data/CN-border-La.dat
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.patches as mpatches

CPU times: user 954 ms, sys: 267 ms, total: 1.22 s
Wall time: 1.23 s



# 进行风和比湿拆分的绘制

2021.09.25

使用shumflux中进行前处理的脚本，将VR,RCM,OBS读作相同坐标的变量，之后再使用dict进行变量区域的筛选，筛选出850hPa风以及比湿之后放入REFERENCE中待下一步绘制

原本在49.19的绘图服务器上进行计算，但是有占用，IO大文件速度太慢。在存储数据的服务器进行NC变量的拆分处理

2021.10.08

修改为delayed数组，在最后进行计时并且输出；相比此前load的办法的20mins提升到现在的5mins（输出一层850hPa数据），但是这样做的内存开销较大.44G的多层数据写出的开销也只有11mins左右

## 数据读取、筛选部分

In [2]:
ds_wind = {}
ds_qv   = {}

# diag数据包含9-1，需要去掉尾部
dir_in = "/raid52/yycheng/MPAS/VR_postprocess/VR_merge_large/ke_daily_vi/"
ds_wind['vr'] = xr.open_mfdataset(dir_in + "????_VR_ke_daily_vi.nc", parallel=True)
ds_wind['vr'] = ds_wind['vr'].sel(Time = ds_wind['vr'].Time.dt.month.isin([4,5,6,7,8]))
dir_in = "/raid52/yycheng/MPAS/VR_postprocess/VR_merge_large/hum_theta_daily_vi/"
ds_qv['vr'] = xr.open_mfdataset(dir_in + "????_VR_hum_theta_daily_vi.nc", parallel=True)
ds_qv['vr'] = ds_qv['vr'].sel(Time = ds_qv['vr'].Time.dt.month.isin([4,5,6,7,8]))

dir_in = "/raid52/yycheng/MPAS/RCM_postprocess/RCM_merge_large/ke_daily_vi/"
ds_wind['rcm'] = xr.open_mfdataset(dir_in + "????_RCM_ke_daily_vi.nc", parallel=True)
ds_wind['rcm'] = ds_wind['rcm'].sel(Time = ds_wind['rcm'].Time.dt.month.isin([4,5,6,7,8]))
dir_in = "/raid52/yycheng/MPAS/RCM_postprocess/RCM_merge_large/hum_theta_daily_vi/"
ds_qv['rcm'] = xr.open_mfdataset(dir_in + "????_RCM_hum_theta_daily_vi.nc", parallel=True)
ds_qv['rcm'] = ds_qv['rcm'].sel(Time = ds_qv['rcm'].Time.dt.month.isin([4,5,6,7,8]))

In [3]:
# ----- select data range -----
lat_sel     = (ds_qv['vr'].latitude >= 5) & (ds_qv['vr'].latitude <= 60)
lon_sel     = (ds_qv['vr'].longitude >= 70) & (ds_qv['vr'].longitude <= 140)
plevels_sel = (ds_qv['vr'].plevels == 850) #(ds_qv['vr'].plevels >= 200) & (ds_qv['vr'].plevels <= 925)

# plevels_sel = (ds_qv['vr'].plevels == 100)
time_year    = (ds_qv['vr'].Time.dt.year >= 1998) # 时次相对较长，一开始使用1998年一年进行尝试
time_sel_am     = ds_qv['vr'].Time.dt.month.isin([4,5])
time_sel_jja    = ds_qv['vr'].Time.dt.month.isin([6,7,8])

sel_dict = {}
sel_dict['alltime'] = {'longitude':lon_sel, "latitude":lat_sel, "plevels":plevels_sel, "Time":(time_year)}
# sel_dict['am']    = {'longitude':lon_sel, "latitude":lat_sel, "plevels":plevels_sel, "Time":(time_sel_am & time_year)}
# sel_dict['jja']   = {'longitude':lon_sel, "latitude":lat_sel, "plevels":plevels_sel, "Time":(time_sel_jja & time_year)}
# sel_dict['jja']   = {'longitude':lon_sel, "latitude":lat_sel, "plevels":plevels_sel, "Time":(time_sel_jja & time_year)}

## 读写测试部分

写出单个文件所需要的时间

- test 1 Time chunks到3060

CPU times: user 3min 54s, sys: 25min 4s, total: 28min 59s

Wall time: 21min 2s


- test 2 使用自定义的chunks

CPU times: user 3min 41s, sys: 17min 42s, total: 21min 23s

Wall time: 16min 13s

- test 3 defalut的chunks，然后不进行load，直接进行写出，只是to_netcdf(compute = False)，在之后的delay写出

CPU times: user 1min 48s, sys: 4min 22s, total: 6min 10s

Wall time: 5min 17s

In [4]:
%%time
# ----- 选取变量 -----
mod_list    = ['vr', 'rcm']
season_list = ['am', 'jja']

u_sel = {}
v_sel = {}
qv_sel = {}

for imod in mod_list:

    u_sel[imod]  = ds_wind[imod]['uReconstructZonal'].isel(sel_dict['alltime'])
#     v_sel[imod]  = ds_wind[imod]['uReconstructMeridional'].isel(sel_dict['alltime'])
#     qv_sel[imod] = ds_qv[imod]['qv'].isel(sel_dict['alltime'])

CPU times: user 13.5 ms, sys: 998 µs, total: 14.5 ms
Wall time: 12.8 ms


## 创建临时输出
输出前先进行load，测试load的时间，输出变量不大，相对速度更快

In [5]:
%%time
from dask.diagnostics import ProgressBar
dir_out = "/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/dyn/wind_shum_850hPa/writing_test/"
delayed_obj = u_sel['vr'].to_netcdf(dir_out + "writting_test1.nc", compute = False)

with ProgressBar():
    results = delayed_obj.compute()
    
    
# var_squeues = [u_sel, v_sel, qv_sel]
# var_squeues = [qv_sel]

# var_squeues_path = {}

# for single_var in var_squeues:
#     for imod in ['vr']:
#         print("deal with: " + imod)
# #         single_var[imod].chunk((3060/10,220,280,1)).to_netcdf(dir_out + 'uwnd/'  + imod + "_uwind.nc")
# #         v_sel[imod].chunk((3060/10,220,280,1)).to_netcdf(dir_out + 'vwnd/'  + imod + "_vwind.nc")

[########################################] | 100% Completed |  8min 42.5s
CPU times: user 2min 8s, sys: 8min 23s, total: 10min 31s
Wall time: 8min 42s
