In [1]:
### Make CSV of CONUS2 flow at matched gages ###
### DTT, 05/23

# This script is split into two main parts: 1) reading in CONUS2 gridded total (aggregated) daily flow for the full domain and creating a dataframe of CONUS2 flow for cells that have been matched with USGS gages in the `NWM_Gage_Adjustments_final.csv`. 2) matching the gages that both have flow between the PF csv and USGS csv retrieved from hydrodata. 
# Note that flow is converted in this script from daily accumulated flow in [m^3/h] to daily mean flow in cms or [m^3/s].

### Inputs:
# - `NWM_Gage_Adjustments_final.csv` - this can be found on the CONUS2 Dropbox or in /glade/p/univ/ucsm0002/CONUS2/domain_files
# - Daily total streamflow PFCLM outputs as PFBs - processed using `compute_daily_PF_averages.py`
# - USGS daily flow csv - from the hydrodata repository on Verde

### Outputs:
# - CSV of PFCLM daily mean flow (cms) with gage ID, lat/long, and CONUS2 cell location
# - two flow-matched CSVs for PF and USGS flow
# - note that the CSV outputs with 'day 001' which starts at the water year (001 == October 1)***

# Notes:
# - need to fix the no_days, currently this will only be accurate if this is started at the begninning of a water year and need to add in some dictionary or if statement to specify num days in a month or something like that.
# - ***need to change day headings so that they are more descriptive than 'day 001' and have an actual date

import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime


# Directory where PF flow PFBs are saved in
directory_out = '/glade/p/univ/ucsm0002/CONUS2/CONUS2.spinup.WY2003/averages'
organized_dir = '/glade/work/tijerina/PFCONUS2-analysis/scripts/Validation/Streamflow/Organized_Daily_Flow'

obs_data_file = 'Streamflow_USGS_obs_daily_avg_WY2003.csv' #csv of USGS flow from hydrodata
metadata_file = 'Streamflow_USGS_obs_metadata_daily_avg_WY2003.csv' #csv of USGS flow from hydrodata

ny = 3256
nx = 4442

# need to change water year and number of days
water_year = 2003
no_days = 364 

# date variables for datetime for wateryear
start_date = datetime.date(water_year, 10, 1)
end_date = datetime.date(water_year, 9, 30)
# daily time step for reading daily files (this could change depending on how you are analyzing)
# delta time
delta = datetime.timedelta(days=1)

In [2]:
# Read observation data and organize
obs_data = pd.read_csv(f'{organized_dir}/{obs_data_file}', index_col=['site_id'])
obs_data = obs_data.drop(columns=['Unnamed: 0'])
# remove sites with less than 365 days of observations
obs_data = obs_data.loc[(obs_data['num_obs']==365)]
# Read metadata and organize
metadata = pd.read_csv(f'{organized_dir}/{metadata_file}', index_col=['site_id'])
metadata = metadata.drop(columns=['Unnamed: 0'])
# also remove the sites with less than 365 obs from the metadata
metadata = metadata[metadata.index.isin(obs_data.index)]

# add number of observations column from the obs_data df
metadata['num_obs'] = obs_data['num_obs']
# remove num_obs from data so we can sum and calc stats
obs_data = obs_data.drop(columns=['num_obs'])

In [3]:
metadata

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,date_metadata_last_updated,latitude,longitude,state,huc,...,topo_cd,instruments_cd,construction_dt,inventory_dt,drain_area_va,contrib_drain_area_va,local_time_fg,reliability_cd,project_no,num_obs
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,stream gauge,USGS,"Allagash River near Allagash, Maine",1011000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.069722,-69.079444,ME,1010002,...,,YNNNYNNNNYNNNNNNYNNNNNNNNNNNNN,,,1478.00,1229.00,Y,,442300100.0,365
1013500,stream gauge,USGS,"Fish River near Fort Kent, Maine",1013500,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.237500,-68.582778,ME,1010003,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,873.00,873.00,Y,,442300100.0,365
1015800,stream gauge,USGS,"Aroostook River near Masardis, Maine",1015800,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.523056,-68.371667,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,892.00,892.00,Y,,442300100.0,365
1017000,stream gauge,USGS,"Aroostook River at Washburn, Maine",1017000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.777222,-68.157222,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,1654.00,1654.00,Y,,442300100.0,365
1017550,stream gauge,USGS,"Williams Brook at Phair, Maine",1017550,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.628056,-67.953056,ME,1010005,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,3.82,3.82,Y,,442300100.0,365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14400000,stream gauge,USGS,"CHETCO RIVER NEAR BROOKINGS, OR",14400000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,42.123443,-124.187311,OR,17100312,...,C,NNNNYNNNNNNNNNNNNNNNNNNNNNNNNN,,,271.00,,Y,C,,365
393109104464500,stream gauge,USGS,"CHERRY CREEK NEAR PARKER, CO",393109104464500,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,39.519156,-104.779697,CO,10190003,...,,YYNNYNNNNYNNNNNNNNNNNNNNNNNNNN,,,287.00,,Y,,460800120,365
394839104570300,stream gauge,USGS,"SAND CREEK AT MOUTH NR COMMERCE CITY,CO",394839104570300,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,39.810972,-104.951583,CO,10190003,...,C,NYNNYNNNNYNNNNNNNNNNNNNNNNNNNN,,19920128.0,187.00,,Y,,460800120,365
401733105392404,stream gauge,USGS,THE LOCH OUTLET - LOCH VALE,401733105392404,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,40.293056,-105.654444,CO,10190006,...,,NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN,,,2.62,,Y,,,365


In [65]:
obs_data

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.414600,7.386300,6.990100,6.565600,6.367500,6.197700,5.744900,5.065700,4.528000,4.414800,...,12.650100,12.367100,11.942600,12.706700,13.697200,13.612300,13.159500,14.008500,66.222000,76.127000
1013500,3.509200,3.424300,3.169600,3.113000,3.339400,2.858300,2.858300,2.631900,2.405500,2.504550,...,14.376400,13.810400,13.612300,13.555700,13.159500,12.253900,11.942600,15.763100,58.298000,51.223000
1015800,6.707100,6.226000,5.716600,5.377000,5.150600,5.037400,4.754400,4.499700,4.160100,4.188400,...,3.763900,3.650700,3.537500,3.594100,3.735600,3.792200,3.707300,4.216700,25.413400,60.845000
1017000,9.876700,9.367300,8.461700,7.952300,7.810800,7.131600,7.046700,6.678800,6.282600,6.282600,...,7.075000,6.905200,6.792000,6.820300,6.820300,6.848600,6.763700,7.895700,50.657000,147.726000
1017550,0.023772,0.024338,0.020659,0.018395,0.019810,0.019810,0.018678,0.018678,0.018961,0.021791,...,0.080655,0.043016,0.049525,0.073580,0.053204,0.026319,0.023206,0.063675,0.113200,0.105559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14400000,1.845160,1.842330,1.831010,1.847990,1.876290,1.870630,1.839500,1.811200,1.785730,1.774410,...,2.462100,2.433800,2.433800,2.433800,2.405500,2.405500,2.405500,2.377200,2.377200,2.377200
393109104464500,0.048110,0.281868,0.249889,0.219325,0.191591,0.169517,0.162442,0.158197,0.150273,0.145179,...,0.173762,0.171215,0.176309,0.178573,0.181120,0.180554,0.180554,0.178573,0.181686,0.177441
394839104570300,1.267840,2.810190,1.239540,0.687690,0.478270,0.447140,0.427330,0.413180,0.393370,0.416010,...,0.280736,0.267718,0.242248,0.239701,0.238852,0.256398,0.302810,0.260360,0.265737,0.248757
401733105392404,0.071316,0.071316,0.069618,0.068203,0.066788,0.065373,0.063675,0.062260,0.060845,0.059147,...,0.156499,0.125086,0.093390,0.087447,0.080655,0.078674,0.078674,0.078674,0.074995,0.070467


In [4]:
obs_data.shape

(5714, 365)

# Need to finish the Obs data annual average

In [5]:
# compute annual average for observations
obs_data_sum = obs_data.sum(axis=1)
obs_data_sum

site_id
1011000            15270.340400
1013500            14383.027860
1015800            12261.456100
1017000            24401.816500
1017550               52.182936
                       ...     
14400000           25202.148990
393109104464500      105.331185
394839104570300      559.956535
401733105392404       74.228070
402114105350101      596.102427
Length: 5714, dtype: float64

In [6]:
### set up pandas dataframe with gage id and dates for PFCONUS2 flow
pf_flow_df = pd.DataFrame(index = obs_data.index)#, columns = obs_data.columns

In [7]:
pf_flow_df

1011000
1013500
1015800
1017000
1017550
...
14400000
393109104464500
394839104570300
401733105392404
402114105350101


In [20]:
# pf_flow = np.zeros(obs_data.shape) #pf_wtd_df = np.zeros((no_days, ny, nx))
# pf_flow.shape


In [67]:
test = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.001.pfb'))/3600/24
test.shape

(3256, 4442)

In [68]:
test[2783,4210]

37.266463978539505

In [69]:
test[2810,4237]

18.686342036256757

In [70]:
test[2747,4276]

30.46872761299625

In [36]:
testarray = np.zeros(5714)
testarray.shape

(5714,)

In [42]:
pf_flow_matched = np.zeros(obs_data.shape)
pf_flow_matched.shape

(5714, 365)

In [None]:
# make CONUS2 x and y into arrays for the for loop
conusy = np.asarray(metadata['conus2_y'],dtype = 'int')
conusx = np.asarray(metadata['conus2_x'],dtype = 'int')

pf_flow_array = np.zeros((no_days, ny, nx))
pf_flow_matched = np.zeros(obs_data.shape)

### READ STREAMFLOW PFBs ###
#Read in CONUS2 daily streamflow PFBs and save as df in flow_df, convert to total accumulated in m^3/h to mean daily in cms
for i in range(3):#no_days
    step = str(int(i+1)).rjust(3, '0')
    print(f'{directory_out}/flow.2003.daily.{step}.pfb')
    pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.{step}.pfb'))
    pf_flow_array[i,...] = pf_flow_pfb/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    print(f'reading flow for day {step} and converting from total accumulated flow in m^3/h, to daily mean flow in cms')
    for j in range(len(obs_data.index)):
        if conusy[j] < 0:
            pf_flow_matched[j] = 'nan'
            print('Gage is outside of CONUS2 range')
        else:
            pf_flow_matched[j,i] = pf_flow_array[i, conusy[j], conusx[j]]
            print('Finding value in CONUS2 grid')
    
    
    

# for i in range(5):
#     step = str(int(i+1)).rjust(3, '0')
#     print(f'{directory_out}/flow.2003.daily.{step}.pfb')
#     pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.{step}.pfb'))
#     pf_flow_df[start_date] = pf_flow_pfb[metadata.conus2_y,metadata.]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
#     print(f'reading flow for day {step} and converting from total accumulated flow in m^3/h, to daily mean flow in cms')
#     start_date += delta

    
#     # if np.isnan(conus2_y[i]) == True:
#     #     pf_flow_df[i,...] = 'nan'
#     #     print('Gage is outside of CONUS2 range')
#     # else:
#     pf_flow[...,i] = pf_flow_pfb[int(conus2_y[i]),int(conus2_x[i])]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
#     #pf_flow_df[start_date] = pf_flow_pfb[int(conus2_y),int(conus2_x)]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
#     
#     start_date += delta
#     #pf_flow_df[i] = pf_flow[i]
                    

        
# for index, row in pf_flow_df.iterrows():
#     step = str(int(i+1)).rjust(3, '0')
#     print(step)
#     print(pf_flow_df.index[i])
#     print(f'{directory_out}/flow.2003.daily.{step}.pfb')
#     pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.{step}.pfb'))
#     if np.isnan(conus2_y[i]) == True:
#         pf_flow_df[i,...] = 'nan'
#         print('Gage is outside of CONUS2 range')
#     else:
#         pf_flow_df[i] = pf_flow_pfb[int(conus2_y[i]),int(conus2_x[i])]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    
    
# SAVE OUT PANDAS DF FOR CONUS2 FLOW
### save csv file of all matching gage locations and CONUS2 daily flow, note the USGS STNID's drop the leading zeros when saving
#pf_flow_df.to_csv(f'{organized_dir}/CONUS2_matched_flow_{water_year}.csv', sep = ",")

In [81]:
# Organize daily flow matched array with the same index and dates as the obs_data
pf_flow_match_df = pd.DataFrame(pf_flow_matched)
pf_flow_match_df = pf_flow_match_df.set_index(obs_data.index)
column_headers = list(obs_data.columns.values)
pf_flow_match_df.columns = column_headers
# save as csv
pf_flow_match_df.to_csv(f'{organized_dir}/PFCONUS2_Daily_matched_flow_cms{water_year}.csv', sep = ",")

In [None]:
# Calculate Annual Average Flow for PFCLM and save
pf_flow_avg = np.mean(pf_flow_array, axis = 0) #monthly average 
pf_flow_avg.shape
np.savetxt(f'{organized_dir}/PFCONUS2_Annual_Avg_Flow_cms_WY2003.csv', pf_flow_avg, delimiter=",")

In [None]:
pf_flow_array.shape

In [None]:
pf_flow_match.shape

In [None]:
pf_flow_match = np.zeros(obs_data.shape)

for j in range(no_days):
    day_array = pf_flow_array[j,...]
    for k in range(5714): 
        pf_flow_match[k,j] = day_array[int(conus2_y[i]),int(conus2_x[i])]
    print(i)
    # if np.isnan(conus2_y[i]) == True:
    #     pf_flow_df[i,...] = 'nan'
    #     print('Gage is outside of CONUS2 range')
    # else:
    #pf_flow[...,i] = pf_flow_pfb[int(conus2_y[i]),int(conus2_x[i])]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    #pf_flow_df[start_date] = pf_flow_pfb[int(conus2_y),int(conus2_x)]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    
    start_date += delta
    #pf_flow_df[i] = pf_flow[i]
                    

In [None]:
###
### read in csv that was just created of gage locations and CONUS2 flow
pf_flow_df = pd.read_csv(f'{organized_dir}/CONUS2_NWM_matched_flow_{water_year}.csv',index_col=['STNID'])
pf_flow_df = pf_flow_df.drop(columns=['Unnamed: 0','matched']) #drop unnecessary columns for now
pf_flow_df.index.names = ['site_id']
                          
### read in USGS flow for all gages with flow in October
# streamflow from hydrodata has already been converted to cms!!!
usgs_flow_df = pd.read_csv(f'{organized_dir}/{usgs_data}',index_col=['site_id']) 
usgs_flow_df = usgs_flow_df.drop(columns=['Unnamed: 0', 'num_obs'])
                          
# # merge the CONUS2 and USGS dataframes so that we remove all gage locations that don't match between the two
# combine_df = pf_flow_df.merge(usgs_flow_df, on='site_id', how='inner')
# combine_df

# find matching gage locations (index = 'site_id') between the PF flow and USGS flow 
# result is a list of matching indices ('match_index') which is then used to parse down the PF and USGS flow dataframes 
pf_flow_index = pf_flow_df.index
usgs_flow_index = usgs_flow_df.index
match_index = pf_flow_index.intersection(usgs_flow_index)
print(f'There are {len(match_index)} matching gages between PF and USGS!')

In [None]:
# reorganize PF flow dataframe to keep only gages that match with USGS
pf_flow_matched = pf_flow_df[pf_flow_df.index.isin(match_index)]
print(pf_flow_matched.shape)
pf_flow_matched.head()

In [None]:
# reorganize USGS flow dataframe to keep only gages that match with PF
usgs_flow_matched = usgs_flow_df[usgs_flow_df.index.isin(match_index)]
# reindex so that USGS is in the same order as the PF dataframe
usgs_flow_matched = usgs_flow_matched.reindex(index=pf_flow_matched.index)
print(usgs_flow_matched.shape)
usgs_flow_matched

### Reorganize metadata for flow matched gages

In [None]:
### read in USGS streamflow gage metadata for all gages with flow in October
usgs_gage_metadata = pd.read_csv(f'{organized_dir}/USGS_WY2003_oct_raw_metadata.csv',index_col=['site_id'])#, 
usgs_gage_metadata = usgs_gage_metadata.drop(columns=['Unnamed: 0'])
#usgs_all_gages = usgs_all_gages.transpose() #transpose df for easier plotting
usgs_gage_metadata.shape

In [None]:
# reorganize USGS metadata dataframe to keep only gages that match with PF and USGS
meta_flow_matched = usgs_gage_metadata[usgs_gage_metadata.index.isin(match_index)]
# reindex so that metadata is in the same order as the PF dataframe
meta_flow_matched = meta_flow_matched.reindex(index=pf_flow_matched.index)
meta_flow_matched.shape

In [None]:
# Do a check to make sure that all dataframes have the same indexes
if pf_flow_matched.index.equals(usgs_flow_matched.index) == True: 
    print('PF and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.')
else:
    print('Gages DO NOT match between PF and USGS! Go back and check the dataframes...')
    
    
# check metadata also has the same gages as these
if meta_flow_matched.index.equals(usgs_flow_matched.index):
    print('Metadata and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.')
else:
    print('Gages DO NOT match between metadata dataframe and USGS! Go back and check the metadata workflow...')

In [None]:
# Save Flow Matched CSVs
pf_flow_matched.to_csv(f'{organized_dir}/FlowMatch_CONUS2_daily_cms_{water_year}.csv', sep = ",")
usgs_flow_matched.to_csv(f'{organized_dir}/FlowMatch_USGS_daily_cms_{water_year}.csv', sep = ",")#usgs_flow_matched
meta_flow_matched.to_csv(f'{organized_dir}/FlowMatch_metadata_{water_year}.csv', sep = ",")