In [5]:
import os
from datetime import datetime
import numpy as np
import pandas as pd
import xarray as xr

def read_snow17_sac_forcing(file):
    # read txt
    time = np.loadtxt(file, usecols=[0,1,2], dtype='str', skiprows=4)
    data = np.loadtxt(file, skiprows=4) 
    data = data[:,4:10+1]
    
    # convert time
    time_obj = []
    for i in range(len(time)):
        t_str = time[i,0]+' '+time[i,1]+' '+time[i,2]
        time_obj.append(datetime.strptime(t_str,'%Y %m %d'))
        
    # create dataframe (time, data)
    df = pd.DataFrame(data,columns=['Dayl(s)','PRCP(mm/day)','SRAD(W/m2)',
                                        'SWE(mm)','Tmax(C)','Tmin(C)','Vp(Pa)'])
    df['Date'] = time_obj
    df = df.set_index('Date')   
    return df

source_code_dir = '/glade/u/home/hongli/github/2020_04_21nldas_gmet/snow17_sac'

# target_polyid = '13310700'
# weight_file = os.path.join(source_code_dir, 'region_17_lump_weights.nc')
# forcing_tpl = os.path.join(source_code_dir, '13310700_lump_nldas_forcing_leap.txt')
# # source (hydro-c1):/d2/anewman/region_weights/nldas/region_17_lump_weights.nc
# # source (hydro-c1):/d5/anewman/basin_forcing_data/nldas/17/13310700_lump_nldas_forcing_leap.txt

target_polyid = '09081600' 
weight_file = os.path.join(source_code_dir, 'region_14_lump_weights.nc')
forcing_tpl = os.path.join(source_code_dir, '09081600_lump_nldas_forcing_leap.txt')
# source: /d2/anewman/region_weights/nldas/region_14_lump_weights.nc
# source (hydro-c1):/d5/anewman/basin_forcing_data/nldas/14/09081600_lump_nldas_forcing_leap.txt

source_ens_dr = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet/test_uniform_perturb/18212grids/gmet_ens_bc'
forcing_basename = 'ens_forc' #ens_forc.2000.086.nc
start_time = '2005/10/01'
end_time = '2006/09/30'
time_fmt = '%Y/%m/%d'
start_time_obj = datetime.strptime(start_time,time_fmt)
end_time_obj = datetime.strptime(end_time,time_fmt)

start_yr = start_time_obj.year
end_yr = end_time_obj.year
yr_num = end_yr-start_yr+1
day_num = (end_time_obj-start_time_obj).days+1
ens_num = 100

root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet/test_uniform_perturb/18212grids/gmet_ens_snow17_sac'
out_dir = os.path.join(root_dir, target_polyid)
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
file_basename = target_polyid+'_lump_nldas_forcing_leap'

# PART1. identify overlapping grid lat/lon and weight
print('# PART1. identify overlapping grid lat/lon and weight')
f=xr.open_dataset(weight_file)
polyid = f.variables['polyid'].values[:]
weight = f.variables['weight'].values[:]
overlaps = f.variables['overlaps'].values[:]
latitude = f.variables['latitude'].values[:]
longitude = f.variables['longitude'].values[:]

target_index = list(polyid).index(target_polyid)
overlap_num = overlaps[target_index]

overlap_lats = latitude[target_index, 0:overlap_num]
overlap_lons = longitude[target_index, 0:overlap_num]
overlap_weights = weight[target_index, 0:overlap_num]

# PART2. identify y/x index for overlapping grid lat/lon and weight
print('# PART2. identify y/x index ')
forcing_file = os.path.join(source_ens_dr,('%s.%d.%003d.nc')%(forcing_basename,start_yr,1))
f=xr.open_dataset(forcing_file)
latitude = list(f.variables['latitude'].values[:,0])   #(y,x)=(224,464) -> (y)=(224)
longitude = list(f.variables['longitude'].values[0,:]) #(y,x)=(224,464) -> (x)=(464)

y_index = [latitude.index(round(lat,4)) for lat in overlap_lats]
x_index = [longitude.index(round(lon,4)) for lon in overlap_lons]

# PART3. extract ensemble forcings
print('# PART3. extract ensemble forcings')
for mb in range(ens_num):
# for mb in range(2):
    print(mb+1)
    for yr in range(start_yr, end_yr+1):
        forcing_file = os.path.join(source_ens_dr,('%s.%d.%003d.nc')%(forcing_basename,yr,mb+1))
        f=xr.open_dataset(forcing_file)
        time = f.variables['time'].values
        pcp = f.variables['pcp'].values
        t_min = f.variables['t_min'].values
        t_max = f.variables['t_max'].values
        
        # concatenate for years
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            t_min_concat = t_min
            t_max_concat = t_max
        else:
            time_concat = np.concatenate((time_concat,time), axis=0)
            pcp_concat = np.concatenate((pcp_concat,pcp), axis=0)
            t_min_concat = np.concatenate((t_min_concat,t_min), axis=0)
            t_max_concat = np.concatenate((t_max_concat,t_max), axis=0)
    
    # extract overlapping grids (time,overlap_num)
    overlap_pcp = pcp_concat[:,y_index,x_index]
    overlap_t_min = t_min_concat[:,y_index,x_index]
    overlap_t_max = t_max_concat[:,y_index,x_index]
    
    # extract useful time period (time_useful,overlap_num)
    time_concat = pd.DatetimeIndex(time_concat)
    mask = (time_concat>=start_time_obj) & (time_concat<=end_time_obj)
    overlap_pcp = overlap_pcp[mask,:]
    overlap_t_min = overlap_t_min[mask,:]
    overlap_t_max = overlap_t_max[mask,:]
    
    # calcualte weighted sum (time_useful)
    lump_pcp = np.matmul(overlap_pcp,overlap_weights)
    lump_t_min = np.matmul(overlap_t_min,overlap_weights)
    lump_t_max = np.matmul(overlap_t_max,overlap_weights)
    
    # update reference forcing date
    df = read_snow17_sac_forcing(forcing_tpl)
    df.at[start_time_obj:end_time_obj,'PRCP(mm/day)'] = lump_pcp
    df.at[start_time_obj:end_time_obj,'Tmax(C)'] = lump_t_max    
    df.at[start_time_obj:end_time_obj,'Tmin(C)'] = lump_t_min
    
    # write ensemble forcing txt
    ofile = os.path.join(out_dir, ('%s_%003d.txt')%(file_basename,mb+1)) #'13310700_lump_nldas_forcing_leap.txt'
    with open(forcing_tpl, 'r') as f_in:
        lines = f_in.readlines()
        with open(ofile,'w') as f_out:
            for iline, line in enumerate(lines):
                if iline<=3:
                    f_out.write(line)
                elif iline>=4:
                    splits=line.split()
                    splits[5] = ('%.2f')%(df.iloc[iline-4,1])
                    splits[-3] = ('%.2f')%(df.iloc[iline-4,-3])
                    splits[-2] = ('%.2f')%(df.iloc[iline-4,-2])
                    update_line='\t'.join(splits)
                    f_out.write(update_line)
                    f_out.write('\n')    
    
print('Done')

# PART1. identify overlapping grid lat/lon and weight
# PART2. identify y/x index 
# PART3. extract ensemble forcings
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
Done


In [2]:
polyid

array(['12010000', '12056500', '14092750', '12145500', '14020000',
       '12186000', '12381400', '13340000', '12383500', '14303200',
       '12043000', '14138800', '14400000', '13011900', '12115500',
       '14362250', '13331500', '12092000', '14158790', '12189500',
       '13083000', '12141300', '14137000', '14166500', '14187000',
       '14301000', '10396000', '13340600', '12025000', '14138900',
       '13338500', '12041200', '12143600', '14185900', '12095000',
       '12048000', '12147600', '14141500', '12374250', '13161500',
       '12115000', '12013500', '12082500', '14306500', '12411000',
       '12375900', '14306340', '12488500', '12020000', '12167000',
       '12040500', '12388400', '12073500', '12054000', '13240000',
       '14182500', '14158500', '13235000', '12025700', '12390700',
       '13313000', '14096850', '14154500', '12451000', '12377150',
       '14139800', '12114500', '13011500', '12414500', '12147500',
       '14222500', '14138870', '12035000', '14316700', '124473

In [4]:
df

Unnamed: 0_level_0,Dayl(s),PRCP(mm/day),SRAD(W/m2),SWE(mm),Tmax(C),Tmin(C),Vp(Pa)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-01-01,31236.10,9.68,131.43,0.0,-1.03,-1.03,517.04
1980-01-02,31308.86,1.31,188.29,0.0,-3.96,-3.96,402.22
1980-01-03,31407.67,2.68,194.72,0.0,-2.30,-2.30,450.44
1980-01-04,31449.60,5.02,144.25,0.0,-2.80,-2.80,453.40
1980-01-05,31449.60,27.12,115.70,0.0,-1.29,-1.29,487.83
...,...,...,...,...,...,...,...
2014-12-27,31104.00,0.00,146.20,0.0,-10.40,-10.40,240.21
2014-12-28,31104.00,12.71,164.88,0.0,-5.86,-5.86,337.70
2014-12-29,31104.00,2.17,176.54,0.0,-8.05,-8.05,288.81
2014-12-30,31118.72,0.16,218.80,0.0,-14.12,-14.12,166.80


In [6]:
df[start_time_obj:end_time_obj] 

Unnamed: 0_level_0,Dayl(s),PRCP(mm/day),SRAD(W/m2),SWE(mm),Tmax(C),Tmin(C),Vp(Pa)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
