In [24]:
import os, argparse
import numpy as np
import matplotlib as mpl
import matplotlib.rcsetup as rcsetup
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from netCDF4 import Dataset, num2date 
from datetime import datetime
from descartes.patch import PolygonPatch
from PIL import Image

conf_file = '/glade/u/home/hongli/work/sharp/basins/tayprk_huc12/scripts_hongli/step9_config_upco.txt'

def process_command_line():
    '''Parse the commandline'''
    parser = argparse.ArgumentParser(description='Script to plot netcdf variables for user specified HUCs and time period.')
    parser.add_argument('conf_file',
                        help='path of configure file.')
    args = parser.parse_args()
    return(args)

def read_confg_file(conf_file):
    '''Read the configuration file'''
    conf = []
    with open(conf_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith('!'):
                conf.append((line.split('!')[0]).strip())
    return conf

if __name__ == '__main__':
    
    # process command line
#     args = process_command_line()
    
    # read configuration file
#     conf = read_confg_file(args.conf_file)
    conf = read_confg_file(conf_file)
    
    geo_file =  conf[0]
    nc_file = conf[1]
    hucID_file = conf[2]
    
    var_names = list(map(lambda x: x.strip(), conf[3].split(',')))
    start_time_str = conf[4]
    end_time_str = conf[5]
    output_dir = conf[6]
    
    start_time = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S')
    end_time = datetime.strptime(end_time_str, '%Y-%m-%d %H:%M:%S')
    
    try:        
        root_dir = os.path.dirname(os.path.realpath(__file__))
    except NameError:
        root_dir = '/glade/u/home/hongli/work/sharp/basins/tayprk_huc12/scripts_hongli'
    if not os.path.exists(os.path.join(root_dir, output_dir)):
        os.mkdir(os.path.join(root_dir, output_dir))

    # read the IDs to subset
    with open(hucID_file, 'r') as f:
        ids = [int(x) for x in f]

    # subset shapefile based on hucID
    geo_handle = gpd.read_file(geo_file)
    criterion = geo_handle['HUC12'].map(lambda x: int(x) in ids)
        
    # subset netcdf based on hucID and time
    f = Dataset(nc_file)
    hruId = f.variables['hruId'][:]
    time = f.variables['time']
    time = num2date(time[:], time.units)
    
    hruId_index = list((map(lambda x: x in ids, hruId.data)))
    time_index = list(map(lambda x: (x >= start_time) & (x<= end_time), time))

    hruId_subset = list(map(lambda x: str(x), hruId[hruId_index]))
    time_subset = time[time_index]
    time_subset_str = list(map(lambda x: x.strftime('%Y-%m-%d %H'), time_subset))
    if len(hruId_subset) == 0 or len(time_subset_str) == 0:
        quit('Please provide a valid hurId or time period.')

    var_figs = []
    for var_name in var_names:
        print(var_name)
        
        # subset variable data
        var_value = f.variables[var_name][:]
        var_unit = f.variables[var_name].units
        var_longname = f.variables[var_name].long_name
        if var_unit == "K":
            var_value = var_value-273.15  
            var_unit = "$^\circ$C"
            cmap_str = 'Reds'
        elif var_unit == "kg m-2 s-1":
            var_value = var_value*3*3600 #3hour cumulation
            var_unit = "mm/month"
            cmap_str = 'Blues'
        else:
            cmap_str = 'jet'
        var_subset = var_value[:,hruId_index][time_index,:] 

        # construct a dataframe with hruId
        df1 = pd.DataFrame(data={'HUC12': hruId_subset})
#         df2 = pd.DataFrame(var_subset.T, columns=time_subset_str)
#         var_frame = pd.concat([df1, df2], axis = 1)
    
        # calculate monthly average for T /sum for P
        df3 = pd.DataFrame(var_subset, index= time_subset, columns=ids)
        if var_name == 'airtemp':
            df4 = df3.resample('M', label='right').mean()
        else:
            df4 = df3.resample('M', label='right').sum()
        
        df5 = df4.T
        df5.index=df1.index
        df5.columns =df5.columns.map(lambda t: t.strftime('%Y-%m'))
        
        var_frame = pd.concat([df1, df5], axis = 1)                
        var_min = df5.values.min()
        var_max = df5.values.max()
        
        # join netcdf dataframe with shapefile geo-dataframe (once for each variable)
        geo_subset = geo_handle[criterion]
        geo_subset = geo_subset.merge(var_frame, on='HUC12')

        # plot each time step 
        temp_figs = []  
        for t in var_frame.columns[1:]: #the first element is HUC, so don't consider.                       
            temp_fig = 'image_'+t+'.png'
            temp_figs.append(temp_fig)            
            
            norm = mpl.colors.Normalize(vmin=var_min,vmax=var_max)
            ax  = geo_subset.plot(column = t, legend = False, vmin=var_min, vmax=var_max, 
                                  cmap = cmap_str, alpha=0.9, edgecolor='grey', linewidth=0.5)
            
            data = ax.collections[0]
            cbar_label = var_longname.capitalize() + ' (' + var_unit+ ')'
            plt.colorbar(data, ax=ax, cmap=cmap_str, orientation='horizontal', 
                         label=cbar_label, shrink=0.7)            

            ax.set_xlabel('Longitude', fontsize='large')
            ax.set_ylabel('Latitude', fontsize='large')
            
            title_str = var_longname.capitalize() + '  '+ t
            ax.set_title(title_str, fontsize='large')
            
            fig = plt.gcf()
            fig.set_size_inches(10,10)
            fig.savefig(os.path.join(root_dir, output_dir, temp_fig))
            plt.close(fig)
            
        # concatenate differnt time step plots into one plot (per variable)    
        widths = []
        heights = []
        var_fig_file = var_name+'.png'
        var_figs.append(var_fig_file)
        for temp_fig in temp_figs:
            im = Image.open(os.path.join(root_dir, output_dir, temp_fig))
            widths.append(im.width)
            heights.append(im.height)

        max_width = max(widths)
        total_height = sum(heights)
        new_im = Image.new('RGB', (max_width, total_height))

        x_offset = 0
        for temp_fig in temp_figs:
            im = Image.open(os.path.join(root_dir, output_dir, temp_fig))    
            new_im.paste(im, (0,x_offset))
            x_offset += im.size[1]
        new_im.save(os.path.join(root_dir, output_dir, var_fig_file))

        for file in os.listdir(os.path.join(root_dir, output_dir)):
            if file.startswith('image'):
                os.remove(os.path.join(root_dir, output_dir, file))
        [os.remove(file) for file in os.getcwd() if file.startswith('image')] 
        del geo_subset, df1, df3, df4, df5, var_frame

    f.close()
    del geo_handle

    # concatenate different variable plots into one plot    
    widths = []
    heights = []
    output_fig_file = '{0:%Y}'.format(time_subset[0])+'_monthly.png'
    for temp_fig in var_figs:
        im = Image.open(os.path.join(root_dir, output_dir, temp_fig))
        widths.append(im.width)
        heights.append(im.height)

    max_width = sum(widths)
    total_height = max(heights)
    new_im = Image.new('RGB', (max_width, total_height))

    x_offset = 0
    for temp_fig in var_figs:
        im = Image.open(os.path.join(root_dir, output_dir, temp_fig))    
        new_im.paste(im, (x_offset,0))
        x_offset += im.size[0]
    new_im.save(os.path.join(root_dir, output_dir, output_fig_file))
    
    print('Done')

airtemp
pptrate
Done


In [72]:
# check HUC12 order/consistency before concatenating two dataframes
a1=df4.T.index.values
a2=np.asarray(list(map(lambda x: int(x), df1['HUC12'].values)))
sum(a1-a2)

In [19]:
# check teh HUC12 and variable value consistency after geopandas merging
np.where(geo_subset['HUC12']=='130100011008')
np.where(var_frame['HUC12']=='130100011008')[0]
geo_subset['2014-01'][535], var_frame['2014-01'][0]

(array([535]),)