In [1]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import argparse
import subprocess
from scipy.spatial import cKDTree
import os
from netCDF4 import Dataset, default_fillvals

# define fill value 
FILL_VALUE = -9999
NC_DOUBLE = 'f8'
FILLVALUE_F = default_fillvals[NC_DOUBLE]

import pandas as pd
import xarray as xr

from collections import OrderedDict

In [2]:
# import NetCDF params file
direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
ncparams_25km_filename = "vic_params_wr25b_vic4.dev_20180307.nc"
ncparams_25km = os.path.join(direc, ncparams_25km_filename)

# filenames for ASCII parameters
soil_file = os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km/new_25km_ascii_params', 
                              'soil_params_25km.txt')
veg_file = os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km/new_25km_ascii_params', 
                              'veg_params_25km.txt')
snow_file = os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km/new_25km_ascii_params', 
                              'snow_params_25km.txt')

In [3]:
soil_50km = pd.read_csv(os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km', 
                                                 'rasm.vic.soil.1090.20140818'), 
                       delim_whitespace=True, 
                       index_col=None,
                       header=None
                       )

In [4]:
snow_50km = pd.read_csv(os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km', 
                                                 'snow_param_wr50a.062012'), 
                       delim_whitespace=True, 
                       index_col=None,
                       header=None
                       )

In [46]:
veg_50km = pd.read_csv(os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km',
                                      'veg_param_wr50a.100924'),
                       sep='\t',
                       header=None)

In [6]:
initdata_50km = pd.read_csv(os.path.join('/Users/diana/Dropbox/UW/Research/rasm/data/25km', 
                                                 'vic_initdata_wr50a.08311989'), 
                       delim_whitespace=True, 
                       index_col=None,
                       header=None
                       )

In [7]:
domain_25km_filename = 'domain.lnd.wr25b_ar9v4.170413.nc'
direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
domain_25km = xr.open_dataset(os.path.join(direc, domain_25km_filename))
lon_25km = domain_25km['xc'].values
lat_25km = domain_25km['yc'].values
mask_25km = domain_25km['mask'].values

domain_50km_filename = 'domain.lnd.wr50a_ar9v4.100920.nc'
domain_50km = xr.open_dataset(os.path.join(direc, domain_50km_filename))
lon_50km = domain_50km['xc'].values
lat_50km = domain_50km['yc'].values
mask_50km = domain_50km['mask'].values

In [8]:
def lon_lat_to_cartesian(lon, lat, R = 1):
    """
    calculates lon, lat coordinates of a point on a sphere with
    radius R
    """
    lon_r = np.radians(lon)
    lat_r = np.radians(lat)

    x = R * np.cos(lat_r) * np.cos(lon_r)
    y = R * np.cos(lat_r) * np.sin(lon_r)
    z = R * np.sin(lat_r)

    return x, y, z

In [9]:
# lon, lat from coarser res data
xs, ys, zs = lon_lat_to_cartesian(lon_50km.flatten(), lat_50km.flatten()) 

# lon, lat from higher res data
xt, yt, zt = lon_lat_to_cartesian(lon_25km.flatten(), lat_25km.flatten())

# build KD tree
zipped_50km = np.dstack(([xs, ys, zs]))[0]
tree = cKDTree(zipped_50km)

# find indices of the nearest neighbors in the flattened array 
zipped_25km = np.dstack(([xt, yt, zt]))[0]
d, inds = tree.query(zipped_25km, k=1)

soil parameters file

In [10]:
# create DataFrame for soil parameters
soil_params = pd.DataFrame()

In [11]:
num_columns = len(soil_50km.columns)
for column in range(num_columns):
    var_50km = soil_50km[soil_50km.columns[column]].values.reshape(205, 275)
    nearest_var = var_50km.flatten()[inds]
    soil_params[column] = nearest_var

In [12]:
# create gridcell numbers array for first column of soil parameters
gridcell_nums_25km = np.linspace(1, len(nearest_var), num=len(nearest_var), dtype='int')

# replace gridcell number column 
soil_params[2] = gridcell_nums_25km

# replace 50km lats with 25km lats 
soil_params[3] = lat_25km.flatten()

# replace 50km lons with 25km lons
soil_params[4] = lon_25km.flatten()

In [18]:
# save soil parameter file
direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
soil_params[3] = soil_params[3].round(4)
soil_params[4] = soil_params[4].round(4)
soil_params[26] = soil_params[26].round(4)
soil_params[32] = soil_params[32].round(4)
soil_params[33] = soil_params[33].round(4)
soil_params[41] = soil_params[41].round(4)
soil_params[44] = soil_params[44].round(4)
soil_params.to_csv(os.path.join(direc, 'soil_params_20190329.txt'), sep=' ', index=False, header=False)

snow bands file 

In [21]:
# create DataFrame for snow parameters
snow_params = pd.DataFrame()

In [22]:
num_columns = len(snow_50km.columns)
for column in range(num_columns):
    var_50km = snow_50km[snow_50km.columns[column]].values.reshape(205, 275)
    nearest_var = var_50km.flatten()[inds]
    snow_params[column] = nearest_var

In [23]:
# replace first column with gridcell number in 25km array instead of nearest neighbor gridcell number
snow_params[0] = gridcell_nums_25km

In [24]:
# save snow bands file
direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
# update number of decimal places in snow state to match the original file 
snow_params[6] = snow_params[6].round(5)
snow_params.to_csv(os.path.join(direc, 'snow_bands_20190124.txt'), sep=' ', index=False, header=False)

init data file (initial state)

In [25]:
# create DataFrame for snow parameters
init_state = pd.DataFrame()

In [26]:
num_columns = len(initdata_50km.columns)
for column in range(num_columns):
    var_50km = initdata_50km[initdata_50km.columns[column]].values.reshape(205, 275)
    nearest_var = var_50km.flatten()[inds]
    init_state[column] = nearest_var
    
# replace first column with gridcell number in 25km array instead of nearest neighbor gridcell number
init_state[0] = gridcell_nums_25km

In [27]:
# save initial state file
direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
# update number of decimal places in snow state to match the original file 
init_state[1] = init_state[1].round(4)
init_state[2] = init_state[2].round(4)
init_state[3] = init_state[3].round(4)
init_state[4] = init_state[4].round(3)
init_state.to_csv(os.path.join(direc, 'init_state_20190124.txt'), sep=' ', index=False, header=False)

veg parameter file

In [47]:
veg_50km_split = veg_50km.iloc[:, 0].str.split(' ', expand=True)
veg_50km_split.rename(columns={ veg_50km_split.columns[0]: "gridcell_number" }, inplace=True)

In [31]:
nn_gridcells = inds + 1

In [23]:
veg = False
if veg:
    veg_params = OrderedDict()

    # array of 25km gridcell nums: gridcell_nums_25km
    # array of nearest neighbor indices: inds
    # veg param gridcells go from 1-56375. So nearest neighbor gridcells are inds + 1

    start = time.clock()

    gridcell_nums_25km = gridcell_nums_25km

    veg_params_row_count = 0
    for i, num_gc_25km in enumerate(gridcell_nums_25km):
        nn_gc = nn_gridcells[i]

        if nn_gc <= 34: 
            veg_params[veg_params_row_count] = np.array('%s 0' %num_gc_25km)
            veg_params_row_count += 1
        elif (nn_gc > 34):
            row_num = veg_50km_split[veg_50km_split['gridcell_number'].values == str(nn_gc)].index[0]
            active_veg_types = veg_50km.values[row_num][0].split()[1]
            print("gridcell num is %f, nn gc is %f, row num is %f and avt is %s" %(num_gc_25km, 
                                                                                   nn_gc, 
                                                                                   row_num, 
                                                                                   active_veg_types))
            if active_veg_types == '0':
                veg_params[veg_params_row_count] = np.array('%s 0' %num_gc_25km)
                veg_params_row_count += 1
            else: 
                veg_params[veg_params_row_count] = np.array('%s %s' %(num_gc_25km, active_veg_types))
                veg_params_row_count += 1
                param_loop_num = row_num + 1
                while len(veg_50km.values[param_loop_num][0].split()) > 2:
                    veg_params[veg_params_row_count] = veg_50km.values[param_loop_num][0]
                    param_loop_num += 1
                    veg_params_row_count += 1

    print(time.clock() - start)

    df_veg_params = pd.DataFrame.from_dict(veg_params, orient='index')

In [24]:
if veg:
    direc = '/Users/diana/Dropbox/UW/Research/rasm/data/25km'
    df_veg_params.to_csv(os.path.join(direc, 'veg_params_20190327.txt'), index=False, header=False)
    print("finished making veg parameter file")