In [1]:
#!/usr/bin/env python
# coding: utf-8

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from pyproj import Proj
import os, shutil
import numpy as np
import xarray as xr
from itertools import chain

def plot_basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,ax,lat_0,lon_0,ny,nx):

    m = Basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,resolution='l',projection='cyl', ax=ax)   

#     m.drawmapboundary(color='grey', linewidth=1.5)
    m.drawstates(linewidth=1, linestyle='solid', color='grey')
    m.drawcountries(linewidth=1, linestyle='solid', color='k')
    m.drawcoastlines(linewidth=.75, linestyle='solid', color='k')
#     m.drawrivers(linewidth=0.5, linestyle='solid', color='blue')
   
    return m

# ===============================================================================
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'
grid_info_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')

outfolder = 'scripts/step1_sample_stnlist_random'
if os.path.exists(os.path.join(root_dir, outfolder)):
    shutil.rmtree(os.path.join(root_dir, outfolder))
os.makedirs(os.path.join(root_dir, outfolder))
ofile_name_base = 'stnlist'
np.random.seed(40)
dpi_value = 90

# ==========================================================================================
# read NLDAS grid info
f = xr.open_dataset(os.path.join(root_dir,grid_info_file))
mask = f['mask'].values[:] # 1 is valid. 0 is invalid. 
latitude = f['latitude'].values[:] 
longitude = f['longitude'].values[:] 
elev = f['elev'].values[:] 
gradient_n_s = f['gradient_n_s'].values[:] 
gradient_w_e = f['gradient_w_e'].values[:] 

(ny,nx)=np.shape(mask)
(y_ids,x_ids)=np.where(mask==1)
total_stn_num = len(y_ids)

stn_index = np.arange(total_stn_num)
choice_perctls = np.arange(0.1,1.1,0.1) # (start, end, interval)
choice_nums = [int(round(p*total_stn_num)) for p in choice_perctls]

# ==========================================================================================
# uniformly sample and save
print('save stnlist.txt')
choice_num_previous = 0
for i,choice_num in enumerate(choice_nums):
    
    choice_perctl = choice_perctls[i]
    choice_index = np.random.choice(stn_index, size=choice_num, replace=False)
    
    if choice_num!=choice_num_previous:
        print('choice perctl = '+str(choice_perctl)+', choice num = '+str(choice_num))

        ofile = ofile_name_base +'_'+str('%05d' %(choice_num))+'grids'+ '_perctl'+str(int(choice_perctl*100))+'.txt'
        f_out = open(os.path.join(root_dir, outfolder, ofile), 'w') 
        f_out.write('NSITES\t'+str(choice_num)+'\n') # total number line
        f_out.write('STA_ID LAT LON ELEV SLP_N SLP_E STA_NAME\n') # title line
        for j in range(len(choice_index)):
            choice_row = y_ids[choice_index[j]]
            choice_col = x_ids[choice_index[j]]
            
            sta_id = 'Row'+str('%03d' %(choice_row))+'Col'+str('%03d' %(choice_col))
            lat_i=latitude[choice_row,choice_col]
            lon_i=longitude[choice_row,choice_col]
            ele_i=elev[choice_row,choice_col]
            gradient_n_s_i=gradient_n_s[choice_row,choice_col]
            gradient_w_e_i=gradient_w_e[choice_row,choice_col]
            stn_name = '"'+sta_id+'"'
            f_out.write('%s, %f, %f, %f, %f, %f, %s\n' \
                        % (sta_id, lat_i, lon_i, ele_i, gradient_n_s_i, gradient_w_e_i, stn_name)) 
        f_out.close()
        choice_num_previous=choice_num        

# ==========================================================================================
print('plot distribution')
llcrnrlon=longitude[0,0]-1
llcrnrlat=latitude[0,0]-1
urcrnrlon=longitude[-1,-1]+1
urcrnrlat=latitude[-1,-1]+1

lat_0=0.5*(llcrnrlat+urcrnrlat)
lon_0=0.5*(llcrnrlon+urcrnrlon)

stnlist_files = [f for f in os.listdir(os.path.join(root_dir, outfolder)) if ofile_name_base in f]
stnlist_files = sorted(stnlist_files)

# plot
ncol = 3
nrow = int(np.ceil(len(stnlist_files)/ncol))

fig, ax = plt.subplots(nrow, ncol)
fig.set_figwidth(4.5*ncol) 
fig.set_figheight(4.5*0.75*nrow)

for i in range(nrow):
    for j in range(ncol):

        k = i*ncol+j            
        if k<len(stnlist_files):  

            # read sampled stnlist.txt
            stnlist_file = os.path.join(root_dir, outfolder, stnlist_files[k])
            perctl = int(stnlist_files[k].split('.')[0].split('_')[2].split('perctl')[1])
            data = np.loadtxt(stnlist_file, skiprows=2, usecols=[1,2],delimiter=',') #STA_ID[0], LAT[1], LON[2], ELEV[3], SLP_N[4], SLP_E[5], STA_NAME[6]
            stn_num = len(data)
            stn_lons = [float(data[i][1]) for i in range(stn_num)]
            stn_lats = [float(data[i][0]) for i in range(stn_num)]
            print(str(stn_num) +' Grids')

            m = plot_basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,ax[i,j],lat_0,lon_0,ny,nx) # plot Basemap 

            x, y = m(stn_lons,stn_lats) # convert the lat/lon values to x/y projections.
            m.plot(x, y, 'bs', markersize=0.5) # plot sampeld grid points

            # set title
            title_str = '('+chr(ord('a') + k) +') ' + str(stn_num)  +' samples ('+str(perctl)+'%)'
            ax[i,j].set_title(title_str, fontsize='small', fontweight='semibold')

        else: # blank axis
            ax[i,j].axis('off')

# save plot
fig.tight_layout()
ofile = 'sample_grids_dist.png'
fig.savefig(os.path.join(root_dir, outfolder, ofile), dpi=dpi_value)
plt.close(fig)    

print('Done')


KeyError: 'PROJ_LIB'

In [4]:
choice_index

array([70878, 48739, 70824, ..., 19265, 60509, 57141])

In [10]:
mask

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]], dtype=int32)

In [15]:
y_ids,x_ids=np.where(mask==1)
y_ids,x_ids

(array([  0,   0,   0, ..., 223, 223, 223]),
 array([101, 102, 103, ..., 461, 462, 463]))

In [23]:
mask[72,378],elev[0,0]

(1, nan)