In [1]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd
import proplot as pplt
import xclim as xclim

In [2]:
xclim.core.options.set_options(data_validation='log') # 关闭CF文件规范的检查
xclim.set_options(check_missing="pct", missing_options={"pct": {"tolerance": 1.}}) # 计算一年中的片段（例如4-8月），关闭NAN的使用

<xclim.core.options.set_options at 0x2b694b047b80>

# XCLIM指标计算
2021.12.30

renew:
- 2021.01.05
    
暂时不考虑百分比指数的绘制

- 2022.01.10

如果处理不完整的一年存在NAN的问题，需要扩充非整年的数据到整年上，然后交给SCLIM处理，并且先关闭xclim的nan的容忍检查

In [3]:
def expand_timecoords(da, new_time_coords):
    """
用来给xclim的输出结果进行前处理的部分，扩展3D变量的时间维，以免错误识别了NAN，将一年中不完整的部分填充为NAN
da dims: time lat lon
    """
    lat = da.lat
    lon = da.lon
    allyeartime = new_time_coords
    da_newtime  = xr.DataArray(name = 'empty3dvar', coords = {"time":allyeartime, "lat":lat, "lon":lon}, dims = ['time','lat','lon'])
    da_newtime.loc[{"time":da.time}] = da
    return da_newtime

## 降水部分

### 数据读入
日降水数据，修改坐标名称

In [None]:
dir_in          = "/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/pre/ordata/"
filename_obs    = "obsmerge_pre_98-17.nc"
filename_vr     = "vr_pre_98-17.nc"
filename_rcm    = "rcm_pre_98-17.nc"

ds_or = {}
ds_or['obs']    = xr.open_dataset(dir_in + filename_obs)
ds_or['vr']     = xr.open_dataset(dir_in + filename_vr)
ds_or['rcm']    = xr.open_dataset(dir_in + filename_rcm  )
# 提取变量
var = {}
var['obs']      = ds_or['obs']['premerge']#[:,  :, :]
var['vr']       = ds_or['vr']['precip_MPAS']
var['rcm']      = ds_or['rcm']['precip_MPAS']

var['obs'] = var['obs'].reset_coords(names = 'lev', drop = True) # 去除掉obs中多余的lev coords

# change coords
var_list = ['obs', 'vr', 'rcm']
for i in var_list:
    rename_dict = dict(zip(var[i].coords.keys(), var['obs'].coords.keys()))
#     # show converting coords
    for rename_i in rename_dict:
        print(rename_i + " -----converting to----- " + rename_dict[rename_i])

    var[i] = var[i].rename(rename_dict)
    var[i]._coords = var['obs']._coords
    var[i] = var[i].rename(i)

### 极端指数的计算

In [None]:
def calc_extreme_pre( var_temp, fillna = -1e30 ):
    """
    输入一个降水的DataArray，使用xlim计算下面的极端气候指数，freq = 'MS'
    """
    import xclim as xclim
    xclim.core.options.set_options(data_validation='log')
    
    var_temp = var_temp.rename("pr")
    var_temp.attrs['units'] = "mm/d"

    dict_out_temp = {}

    # R95p
    # obs_temp_nandrydays = xr.where(var_temp < 1.0, np.nan, var_temp)
    # obs_temp_95th = obs_temp_nandrydays.reduce(np.nanpercentile, dim='time', q=95)
    # obs_temp_95th.attrs['units'] = "mm/d"
    # R95p = xclim.indicators.icclim.R95p(var_temp, obs_temp_95th, freq='AS')

    # create datset to store variables
    # dict_out_temp = xr.Dataset(coords = R95p.coords)
    # dict_out_temp['R95p'] = R95p

    # RX5day
    dict_out_temp['RX5day'] = xclim.indicators.icclim.RX5day(var_temp, freq='AS').fillna(fillna)
    # SDII
    dict_out_temp['SDII'] = xclim.indicators.icclim.SDII(var_temp, freq='AS').fillna(fillna)
    # CDD
    dict_out_temp['CDD'] = xclim.indicators.icclim.CDD(var_temp, freq='AS').fillna(fillna)
    # CWD
    dict_out_temp['CWD'] = xclim.indicators.icclim.CWD(var_temp, freq='AS').fillna(fillna)

    first_item = True
    for ikey in dict_out_temp:
        if (first_item):
            ds_out_temp = xr.Dataset(coords = dict_out_temp[ikey].coords)
            first_item = False
        ds_out_temp[ikey] = dict_out_temp[ikey]
        
    return ds_out_temp

In [None]:
# for mod_name in ['obs']:
for mod_name in var:
    print("dealing " + mod_name)
    mod_time  = var[mod_name]
    var_4to5 = var[mod_name].isel({"time":mod_time.time.dt.month.isin([4,5])})
    var_6to8 = var[mod_name].isel({"time":mod_time.time.dt.month.isin([6,7,8])})

    allyeartime = pd.date_range('1998-01-01', '2017-12-31', freq = 'D')
    allyeartime = xr.DataArray(allyeartime, name = 'time', coords = {"time":allyeartime}, dims = 'time')

    var_4to5_expand = expand_timecoords(var_4to5, allyeartime)
    var_6to8_expand = expand_timecoords(var_6to8, allyeartime)

    ds_out_temp_4to5 = calc_extreme_pre(var_4to5_expand, -1e30)
    ds_out_temp_6to8 = calc_extreme_pre(var_6to8_expand, -1e30)
    
    comp = {'_FillValue' : -1e30}
    encoding = {var:comp for var in list(ds_out_temp_4to5.data_vars)}

    ds_out_temp_4to5.to_netcdf("/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/pre/extreme/extreme_"+mod_name+"_4-5.nc", encoding = encoding)
    ds_out_temp_6to8.to_netcdf("/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/pre/extreme/extreme_"+mod_name+"_6-8.nc", encoding = encoding)

    del(ds_out_temp_4to5)
    del(ds_out_temp_6to8)

### 降水部分的检查
2022.01.08 检查已经结束

原始输出的dataset含有CCD CWD两个 units attrs 为 days 的数据；dtype=“float64”看上去是一个以 天数 为单位的整数

将上述dataset输出到netcdf之后，再使用xarray读入，会将 dtype为float64的天数 识别为 *‘<m8[ns]’* 的时间dtype

需要再 open_dataset 中添加参数 decode_times=None，将 天数 这个时间单位 编码为数字

官方的文档说：
```
ecode_times ( bool, optional ) -- 如果为 True，则将以标准 NetCDF 日期时间格式编码的时间解码为日期时间对象。否则，将它们编码为数字。并非所有后端都支持此关键字。
```

In [None]:
comp = {'_FillValue' : -1e30}
encoding = {var:comp for var in list(ds_out_temp_4to5.data_vars)}
# encoding = {var:comp for var in ['CDD']}

In [None]:
ds_out_temp_4to5['CDD'].attrs['units'] = "days but float64"

In [None]:
ds_out_temp_4to5.to_netcdf("/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/pre/extreme/extreme_testwrite_obs_4-5.nc",\
    encoding = encoding)

## 气温部分

In [4]:
dir_in = "/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA/mask_t2m/mask_res/"
filename_obs  = "mask_sel_CN05.1_Tm_1961_2018_daily_025x025.nc"
filename_vr     = "mask_mean_t2m_98-17_VR.nc"
filename_rcm    = "mask_mean_t2m_98-17_RCM.nc"

# t2m mean
ds_tm = {}
ds_tm['obs'] = xr.open_dataset(dir_in + filename_obs)['tm']
ds_tm['vr']     = xr.open_dataset(dir_in + filename_vr)['t2m'] - 273.15
ds_tm['rcm']    = xr.open_dataset(dir_in + filename_rcm  )['t2m'] - 273.15

# t2m max
filename_obs_max    = "mask_sel_CN05.1_Tmax_1961_2018_daily_025x025.nc"
filename_vr_max     = "mask_max_t2m_98-17_VR.nc"
filename_rcm_max    = "mask_max_t2m_98-17_RCM.nc"

ds_max = {}
ds_max['obs'] = xr.open_dataset(dir_in + filename_obs_max)['tmax']
ds_max['vr']     = xr.open_dataset(dir_in + filename_vr_max)['t2m'] - 273.15
ds_max['rcm']    = xr.open_dataset(dir_in + filename_rcm_max)['t2m'] - 273.15

# min temp
filename_obs_min    = "mask_sel_CN05.1_Tmin_1961_2018_daily_025x025.nc"
filename_vr_min     = "mask_min_t2m_98-17_VR.nc"
filename_rcm_min    = "mask_min_t2m_98-17_RCM.nc"

ds_min = {}
ds_min['obs'] = xr.open_dataset(dir_in + filename_obs_min)['tmin']
ds_min['vr']     = xr.open_dataset(dir_in + filename_vr_min)['t2m'] - 273.15
ds_min['rcm']    = xr.open_dataset(dir_in + filename_rcm_min)['t2m'] - 273.15



In [5]:
# change coords
var_list = ['obs', 'vr', 'rcm']

for var in [ds_min, ds_max, ds_tm]:
    for i in var_list:
        rename_dict = dict(zip(var[i].coords.keys(), var['obs'].coords.keys()))
    #     # show converting coords
        for rename_i in rename_dict:
            print(rename_i + " -----converting to----- " + rename_dict[rename_i])

        var[i] = var[i].rename(rename_dict)
        var[i]._coords = var['obs']._coords
        var[i] = var[i].rename(i)

time -----converting to----- time
lon -----converting to----- lon
lat -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat
time -----converting to----- time
lon -----converting to----- lon
lat -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat
time -----converting to----- time
lon -----converting to----- lon
lat -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat
Time -----converting to----- time
longitude -----converting to----- lon
latitude -----converting to----- lat


### 气温极端指数计算

In [10]:
def calc_extreme_t2m( varmin_temp, varmax_temp, varmean_temp ):
    """
    输入一个气温的DataArray，使用xlim计算下面的极端气候指数，freq = 'AS'
    """
    import xclim as xclim
    xclim.core.options.set_options(data_validation='log')
    
    for imod in [varmin_temp, varmax_temp,varmean_temp]:
        imod = imod.rename("t2m")
        imod.attrs['units'] = "degC"

    dict_out_temp = {}

    dict_out_temp['daily_temperature_range'] = xclim.indicators.atmos.daily_temperature_range(varmin_temp, varmax_temp, freq = "AS")    

    dict_out_temp['daily_temperature_range_variability'] = xclim.indicators.atmos.daily_temperature_range_variability(varmin_temp, varmax_temp, freq = "AS")
    # dict_out_temp['heat_wave_frequency']     = xclim.indicators.atmos.heat_wave_frequency(varmin_temp, varmax_temp, freq = "AS",\
    # thresh_tasmin = '22.0 degC', thresh_tasmax = '30.0 degC', window = 3)

    dict_out_temp['heat_wave_total_length'] = xclim.indices.heat_wave_total_length(varmin_temp, varmax_temp, freq = "AS",\
    thresh_tasmin = '-273.15 degC', thresh_tasmax = '35.0 degC', window = 3)
    
    dict_out_temp['heat_wave_max_length']    = xclim.indicators.atmos.heat_wave_max_length(varmin_temp, varmax_temp, freq = "AS",\
    thresh_tasmin = '-273.15 degC', thresh_tasmax = '35.0 degC', window = 3)
    


    # dict_out_temp['first_day_above']         = xclim.indicators.atmos.first_day_above( varmin_temp, freq = 'AS',\
    # thresh = '22 degC', after_date = '04-01', window = 1)
    first_item = True
    for ikey in dict_out_temp:
        if (first_item):
            ds_out_temp = xr.Dataset(coords = dict_out_temp[ikey].coords)
            first_item = False
        ds_out_temp[ikey] = dict_out_temp[ikey]
        
    return ds_out_temp

def wrapper(func, args): # without star
    """
    通过list, 批量传入参数
    """
    return func(*args)

In [11]:
for mod_name in ['obs', 'vr', 'rcm']:
    print("dealing " + mod_name)

    
    allyeartime = pd.date_range('1998-01-01', '2017-12-31', freq = 'D')
    allyeartime = xr.DataArray(allyeartime, name = 'time', coords = {"time":allyeartime}, dims = 'time')

    mod_time  = ds_tm[mod_name].time
    list_minmaxmean = [ds_min[mod_name], ds_max[mod_name], ds_tm[mod_name]]

    var_4to5 = [i.isel({"time":mod_time.dt.month.isin([4,5])}) for i in list_minmaxmean]
    var_6to8 = [i.isel({"time":mod_time.dt.month.isin([6,7,8])}) for i in list_minmaxmean]

    var_4to5_expand = [expand_timecoords(i, allyeartime) for i in var_4to5]
    var_6to8_expand = [expand_timecoords(i, allyeartime) for i in var_6to8]

    ds_out_temp_4to5 = wrapper(calc_extreme_t2m, var_4to5)
    ds_out_temp_6to8 = wrapper(calc_extreme_t2m, var_6to8)
    
    ds_out_temp_4to5.to_netcdf("/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/t2m/extreme/extreme_"+mod_name+"_4-5.nc")
    ds_out_temp_6to8.to_netcdf("/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/t2m/extreme/extreme_"+mod_name+"_6-8.nc")

dealing obs


  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])


dealing vr


  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])


dealing rcm


  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
  _check_cell_methods(
  check_valid(vardata, "standard_name", data["standard_name"])
