In [None]:
import xarray as xr
import numpy  as np
import pandas as pd
import proplot as pplt
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
import copy

# 进行空间pattern的PDF分布的绘制
2021.07.15

需要先计算四个区域的平均的时间序列，将AM和JJA拆分开来，之后计算AM JJA各自的经验分布函数，并绘图

读取数据计算平均的函数

-   计算am jja两个时间段
-   处理为空间pattern

2022.01.30

-   计算整体的MJJA时间段的情况

2022.02.26

-   不再使用核密度估计，直接使用bins + histogram统计一定范围内的频率
-   合并T2m + Precip

## 数据读取

### 日降水

In [None]:
# 提取AM & JJA的降水的时间序列
# get partial mask_res collect into a dict
partial_list = ["NE", "NC","NWC", "YZ", "SC","SW"]
path_in = "/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/partial_pre"
file_ref= "obsmerge_pre_98-17.nc"
file_vr = "vr_pre_98-17.nc"
file_rcm = "rcm_pre_98-17.nc"

partial_mean = {} # partial mask_res
partial_mean['precip'] = {}

for iregion in partial_list:
    partial_mean['precip'][iregion] = {}
    path_in_partial = path_in + "/" + iregion + "/ordata" # refine path
    print(path_in_partial)

    # obs
    partial_mean['precip'][iregion]['obs'] = {}
    xr_obs = xr.open_dataset(path_in_partial + "/" + file_ref)['premerge']
    xr_obs = xr_obs.reset_coords(names = 'lev', drop = True) # 去除掉obs中多余的lev coords
    partial_mean['precip'][iregion]['obs']['mjja'] = xr_obs.sel(time = xr_obs.time.dt.month.isin([5,6,7,8])).mean(['time'])

    # vr
    partial_mean['precip'][iregion]['vr'] = {}
    xr_vr = xr.open_dataset(path_in_partial + "/" + file_vr)['precip_MPAS']
    partial_mean['precip'][iregion]['vr']['mjja'] = xr_vr.sel(Time = xr_vr.Time.dt.month.isin([5,6,7,8])).mean(['Time'])

    # rcm
    partial_mean['precip'][iregion]['rcm'] = {}
    xr_rcm = xr.open_dataset(path_in_partial + "/" + file_rcm)['precip_MPAS']
    partial_mean['precip'][iregion]['rcm']['mjja'] = xr_rcm.sel(Time = xr_rcm.Time.dt.month.isin([5,6,7,8])).mean(['Time'])

### 日平均气温

In [None]:
# 提取AM & JJA的降水的时间序列
# get partial mask_res collect into a dict
partial_list = ["NE", "NC", "YZ", "SC",'SW','NWC']
path_in = "/raid52/yycheng/MPAS/REFERENCE/TEMP_DATA_large/partial_t2m"
file_ref= "mask_sel_CN05.1_Tm_1961_2018_daily_025x025.nc"
file_vr = "mask_mean_t2m_98-17_VR.nc"
file_rcm = "mask_mean_t2m_98-17_RCM.nc"

partial_mean['t2m'] = {}

for iregion in partial_list:
    partial_mean['t2m'][iregion] = {}
    path_in_partial = path_in + "/" + iregion + "/ordata" # refine path
    print(path_in_partial)

    # obs
    partial_mean['t2m'][iregion]['obs'] = {}
    xr_obs = xr.open_dataset(path_in_partial + "/" + file_ref)['tm']
    partial_mean['t2m'][iregion]['obs']['mjja'] = xr_obs.sel(time = xr_obs.time.dt.month.isin([5,6,7,8])).mean(['time'])

    # vr
    partial_mean['t2m'][iregion]['vr'] = {}
    xr_vr = xr.open_dataset(path_in_partial + "/" + file_vr)['t2m']
    partial_mean['t2m'][iregion]['vr']['mjja'] = xr_vr.sel(Time = xr_vr.Time.dt.month.isin([5,6,7,8])).mean(['Time']) - 273.15

    # rcm
    partial_mean['t2m'][iregion]['rcm'] = {}
    xr_rcm = xr.open_dataset(path_in_partial + "/" + file_rcm)['t2m']
    partial_mean['t2m'][iregion]['rcm']['mjja'] = xr_rcm.sel(Time = xr_rcm.Time.dt.month.isin([5,6,7,8])).mean(['Time']) - 273.15

## 计算bins下的频率分布

In [None]:
def get_bins_interval(bins):
    """
    用于生成左闭右开的区间字符串
    """
    freq_bins_left = bins[0:-1] # 左区间
    freq_bins_right = bins[1:] # 右区间
    freq_bins_str = []
    for interval_ind in range(0,len(freq_bins_left)):
        if (interval_ind == len(freq_bins_left) - 1 ):
            str_temp = '[' + str(freq_bins_left[interval_ind]) + "," + str(freq_bins_right[interval_ind]) + "]"
        else:
            str_temp = '[' + str(freq_bins_left[interval_ind]) + "," + str(freq_bins_right[interval_ind]) + ")"
        freq_bins_str.append(str_temp)
    return freq_bins_str

# 进行频率估计的数据准备
var_hist   = copy.deepcopy(partial_mean) # 存放直方图
var_kernel = copy.deepcopy(partial_mean) # 存放核密度估计结果
var_bins   = copy.deepcopy(partial_mean) # 存放x轴 bins

# 制作bins区间、区间标签
freq_bins = {}
freq_bins['precip'] = np.arange(0,42,2)
freq_bins['t2m']    = np.arange(-12,40,4)
str_bins  = {}
str_bins['precip']  = get_bins_interval(freq_bins['precip'])
str_bins['t2m']  = get_bins_interval(freq_bins['t2m'])


df_plot = {}
for vartype in partial_mean.keys():
    df_plot[vartype] = {}
    for iregion in partial_mean[vartype].keys():
        df_plot[vartype][iregion] = pd.DataFrame(
            columns=pd.Index(["obs", "vr","rcm"], name='freq bins'),
            index=str_bins[vartype])

for vartype in partial_mean.keys():
    for iregion in partial_mean[vartype].keys():
        for imod in partial_mean[vartype][iregion].keys():
            for iseason in partial_mean[vartype][iregion][imod].keys():
                var_np = partial_mean[vartype][iregion][imod][iseason].values

                histogram, bins = np.histogram(var_np, bins=freq_bins[vartype]) # count pdf
                histogram = histogram / histogram.sum() # 手动进行归一化
                # add to dict
                var_hist[vartype][iregion][imod][iseason]   = histogram
                var_bins[vartype][iregion][imod][iseason]   = freq_bins[vartype]
                # add to df
                df_plot[vartype][iregion][imod] = histogram

## 绘图部分

### 绘制频率图

In [None]:
# fig, axs = pplt.subplots(ncols = 3, nrows = 4, wspace=0, hspace=(0,1,0), spanx=True, spany=True, share=3, figsize = (12,8))
fig, axs = pplt.subplots(ncols = 3, nrows = 4, share = 0)
# wspace hspace设置宽度， span share共享坐标轴
axs_index = 0

for iregion in ['NE', 'NC', 'NWC', 'SC', 'SW', 'YZ']:
    # precip
    barplot = axs[axs_index].bar(df_plot['precip'][iregion][0:10], cycle = ['g','r','b'])
    axs[axs_index].format(title = f'{iregion}' ,titleweight='bold',titleloc='uc')
    # t2m
    if (iregion in ['NE','NC']):
        barplot = axs[axs_index+6].bar(df_plot['t2m'][iregion][4:], cycle = ['g','r','b'])
    elif (iregion in ['SC','SW', 'YZ']):
        barplot = axs[axs_index+6].bar(df_plot['t2m'][iregion][5:], cycle = ['g','r','b'])
    elif (iregion in ['NWC']):
        barplot = axs[axs_index+6].bar(df_plot['t2m'][iregion][1:-1], cycle = ['g','r','b'])

    axs[axs_index + 6].format(title = f'{iregion}' ,titleweight='bold',titleloc='uc')

    # 添加百分号标签，单独设置ticks（把小数转化成百分数）
    # axs[axs_index].set_yticklabels(['{:.1f}%'.format(x*100) for x in axs[axs_index].get_yticks()]) 
    axs_index = axs_index + 1
    # break

axs[0].format(ltitle = 'daily precipitation')
axs[4].format(xlabel = "units:[mm/d]")
axs[6].format(ltitle = 'surface air temperature')
axs[10].format(xlabel = "units:[" + r"$^{\circ}C$" + "]")
axs[:,0].format(ylabel = "frequency")
axs.format(
abc = True,
abcloc="ul",
ylim = (0,0.95),
ylocator = 0.1,
xticklabelsize = 5,
# leftlabels=('daily precipitation', '', 'surface air temperature',''),
# xlim = (0,23),
# xlocator = 5. ,
# xminorlocator = 1. ,
# suptitle = "daily precipitation",
# ylabel = "Frequency",
# xlabel = "domain average precipitation (mm)"
)
# axs[0,:].format(ylim = (0,0.6))
# axs[1,:].format(ylim = (0,0.39))
# ----- add legend -----
axs[2].legend(barplot, loc='lr', ncols=1, frame=False, labels = ['OBS','VR','RCM'], title = '')

# saving figs
plt.savefig("./output_pic/pre&t2m_SpatialFreq.2022.02.16.png", dpi = 600, facecolor = "white")