In [1]:
### Make CSV of CONUS2 flow at matched gages ###
### DTT, 10/22

# This script is split into two main parts: 1) reading in CONUS2 gridded total (aggregated) daily flow for the full domain and creating a dataframe of CONUS2 flow for cells that have been matched with USGS gages in the `NWM_Gage_Adjustments_final.csv`. 2) matching the gages that both have flow between the PF csv and USGS csv retrieved from hydrodata. 
# Note that flow is converted in this script from daily accumulated flow in [m^3/h] to daily mean flow in cms or [m^3/s].

### Inputs:
# - `NWM_Gage_Adjustments_final.csv` - this can be found on the CONUS2 Dropbox or in /glade/p/univ/ucsm0002/CONUS2/domain_files
# - Daily total streamflow PFCLM outputs as PFBs - processed using `compute_daily_PF_averages.py`
# - USGS daily flow csv - from the hydrodata repository on Verde

### Outputs:
# - CSV of PFCLM daily mean flow (cms) with gage ID, lat/long, and CONUS2 cell location
# - two flow-matched CSVs for PF and USGS flow
# - note that the CSV outputs with 'day 001' which starts at the water year (001 == October 1)***

# Notes:
# - need to fix the no_days, currently this will only be accurate if this is started at the begninning of a water year and need to add in some dictionary or if statement to specify num days in a month or something like that.
# - ***need to change day headings so that they are more descriptive than 'day 001' and have an actual date

import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# Directory where PF flow PFBs are saved in
directory_out = '/glade/p/univ/ucsm0002/CONUS2/CONUS2.spinup.WY2003/averages'
organized_dir = '/glade/work/tijerina/PFCONUS2-analysis/scripts/Validation/Streamflow/Organized_Daily_Flow'

obs_data_file = 'Streamflow_USGS_obs_daily_avg_WY2003.csv' #csv of USGS flow from hydrodata
metadata_file = 'Streamflow_USGS_obs_metadata_daily_avg_WY2003.csv' #csv of USGS flow from hydrodata

# need to change water year and number of days
water_year = 2003
no_days = 364 

# ### check gage locations for daily flow
# NWM_gage_csv = pd.read_csv('/glade/p/univ/ucsm0002/CONUS2/domain_files/NWM_Gage_Adjustments_final.csv')


# ### set up pandas dataframe of gage ID, lat/long, CONUS2 x and y indices ###
# pf_flow_df = pd.DataFrame(columns = ['STNID', 'USGS_lat', 'USGS_lon', 'x_new_adj', 'y_new_adj'])
# pf_flow_df['STNID'] = NWM_gage_csv['STNID'].astype(int)
# pf_flow_df['USGS_lat'] = NWM_gage_csv['USGS_lat']
# pf_flow_df['USGS_lon'] = NWM_gage_csv['USGS_lon']
# pf_flow_df['x_new_adj'] = NWM_gage_csv['x_new_adj']
# pf_flow_df['y_new_adj'] = NWM_gage_csv['y_new_adj']

# # add leading zeros to USGS gages
# pf_flow_df['STNID'] = pf_flow_df['STNID'].astype('str').str.zfill(8)

# pf_flow_df


In [2]:
# Read observation data and organize
obs_data = pd.read_csv(f'{organized_dir}/{obs_data_file}', index_col=['site_id'])
obs_data  = obs_data.drop(columns=['Unnamed: 0'])
# Read metadata and organize
metadata = pd.read_csv(f'{organized_dir}/{metadata_file}', index_col=['site_id'])
metadata  = metadata.drop(columns=['Unnamed: 0'])

# add number of observations column from the obs_data df
metadata['num_obs'] = obs_data['num_obs']
# remove num_obs from data so we can sum and calc stats
obs_data = obs_data.drop(columns=['num_obs'])

In [3]:
metadata.head()

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,date_metadata_last_updated,latitude,longitude,state,huc,...,topo_cd,instruments_cd,construction_dt,inventory_dt,drain_area_va,contrib_drain_area_va,local_time_fg,reliability_cd,project_no,num_obs
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,stream gauge,USGS,"Allagash River near Allagash, Maine",1011000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.069722,-69.079444,ME,1010002,...,,YNNNYNNNNYNNNNNNYNNNNNNNNNNNNN,,,1478.0,1229.0,Y,,442300100.0,365
1013500,stream gauge,USGS,"Fish River near Fort Kent, Maine",1013500,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.2375,-68.582778,ME,1010003,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,873.0,873.0,Y,,442300100.0,365
1015800,stream gauge,USGS,"Aroostook River near Masardis, Maine",1015800,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.523056,-68.371667,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,892.0,892.0,Y,,442300100.0,365
1017000,stream gauge,USGS,"Aroostook River at Washburn, Maine",1017000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.777222,-68.157222,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,1654.0,1654.0,Y,,442300100.0,365
1017550,stream gauge,USGS,"Williams Brook at Phair, Maine",1017550,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.628056,-67.953056,ME,1010005,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,3.82,3.82,Y,,442300100.0,365


In [4]:
obs_data.head()

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.4146,7.3863,6.9901,6.5656,6.3675,6.1977,5.7449,5.0657,4.528,4.4148,...,12.6501,12.3671,11.9426,12.7067,13.6972,13.6123,13.1595,14.0085,66.222,76.127
1013500,3.5092,3.4243,3.1696,3.113,3.3394,2.8583,2.8583,2.6319,2.4055,2.50455,...,14.3764,13.8104,13.6123,13.5557,13.1595,12.2539,11.9426,15.7631,58.298,51.223
1015800,6.7071,6.226,5.7166,5.377,5.1506,5.0374,4.7544,4.4997,4.1601,4.1884,...,3.7639,3.6507,3.5375,3.5941,3.7356,3.7922,3.7073,4.2167,25.4134,60.845
1017000,9.8767,9.3673,8.4617,7.9523,7.8108,7.1316,7.0467,6.6788,6.2826,6.2826,...,7.075,6.9052,6.792,6.8203,6.8203,6.8486,6.7637,7.8957,50.657,147.726
1017550,0.023772,0.024338,0.020659,0.018395,0.01981,0.01981,0.018678,0.018678,0.018961,0.021791,...,0.080655,0.043016,0.049525,0.07358,0.053204,0.026319,0.023206,0.063675,0.1132,0.105559


In [6]:
# compute annual average for observations
obs_data_sum = obs_data.sum(axis=1)
obs_data_sum

site_id
1011000            15270.340400
1013500            14383.027860
1015800            12261.456100
1017000            24401.816500
1017550               52.182936
                       ...     
393109104464500      105.331185
394308105413800      868.082690
394839104570300      559.956535
401733105392404       74.228070
402114105350101      596.102427
Length: 6101, dtype: float64

In [7]:
### set up pandas dataframe with gage id and dates for PFCONUS2 flow
pf_flow_df = pd.DataFrame(index = obs_data.index, columns = obs_data.columns)

In [5]:
### READ STREAMFLOW PFBs ###
# Read in CONUS2 daily streamflow PFBs and save as df in flow_df, convert to total accumulated in m^3/h to mean daily in cms
for i in range(no_days):
    step = str(int(i+1)).rjust(3, '0')
    pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.{step}.pfb'))
    pf_flow_df[f'day {step}'] = pf_flow_pfb[metadata['conus2_y'],metadata['conus2_x']]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    print(f'reading flow for day {step} and converting from total accumulated flow in m^3/h, to daily mean flow in cms')

    
# Create column for matching/have flow (=1) and not matching/have no flow (=0) gages
pf_flow_df['matched'] = np.where(pf_flow_df['day 001']>0, 1, 0)

# remove cells with no flow and make new pandas df with matching flow at CONUS2 cells and USGS gages
pf_flow_df_NWM_match = pf_flow_df[pf_flow_df.matched != 0]

# SAVE OUT PANDAS DF FOR CONUS2 FLOW
### save csv file of all matching gage locations and CONUS2 daily flow, note the USGS STNID's drop the leading zeros when saving
pf_flow_df_NWM_match.to_csv(f'{organized_dir}/CONUS2_NWM_matched_flow_{water_year}.csv', sep = ",")

reading flow for day 001 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 002 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 003 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 004 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 005 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 006 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 007 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 008 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 009 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 010 and converting from total accumulated f

  


reading flow for day 096 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 097 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 098 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 099 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 100 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 101 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 102 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 103 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 104 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 105 and converting from total accumulated f

  # This is added back by InteractiveShellApp.init_path()


In [6]:
###
### read in csv that was just created of gage locations and CONUS2 flow
pf_flow_df = pd.read_csv(f'{organized_dir}/CONUS2_NWM_matched_flow_{water_year}.csv',index_col=['STNID'])
pf_flow_df = pf_flow_df.drop(columns=['Unnamed: 0','matched']) #drop unnecessary columns for now
pf_flow_df.index.names = ['site_id']
                          
### read in USGS flow for all gages with flow in October
# streamflow from hydrodata has already been converted to cms!!!
usgs_flow_df = pd.read_csv(f'{organized_dir}/{usgs_data}',index_col=['site_id']) 
usgs_flow_df = usgs_flow_df.drop(columns=['Unnamed: 0', 'num_obs'])
                          
# # merge the CONUS2 and USGS dataframes so that we remove all gage locations that don't match between the two
# combine_df = pf_flow_df.merge(usgs_flow_df, on='site_id', how='inner')
# combine_df

# find matching gage locations (index = 'site_id') between the PF flow and USGS flow 
# result is a list of matching indices ('match_index') which is then used to parse down the PF and USGS flow dataframes 
pf_flow_index = pf_flow_df.index
usgs_flow_index = usgs_flow_df.index
match_index = pf_flow_index.intersection(usgs_flow_index)
print(f'There are {len(match_index)} matching gages between PF and USGS!')

There are 4999 matching gages between PF and USGS!


In [8]:
# reorganize PF flow dataframe to keep only gages that match with USGS
pf_flow_matched = pf_flow_df[pf_flow_df.index.isin(match_index)]
print(pf_flow_matched.shape)
pf_flow_matched.head()

(4999, 35)


Unnamed: 0_level_0,USGS_lat,USGS_lon,x_new_adj,y_new_adj,day 001,day 002,day 003,day 004,day 005,day 006,...,day 022,day 023,day 024,day 025,day 026,day 027,day 028,day 029,day 030,day 031
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,47.069611,-69.079544,4210,2783,37.266464,34.151593,31.949418,28.746042,24.794453,21.037749,...,26.36649,42.798333,53.046882,43.618294,34.649396,32.664747,28.395134,27.1413,25.50489,25.276254
1013500,47.237394,-68.582642,4237,2810,18.686342,17.994405,17.17183,14.470901,14.358695,12.452931,...,25.460269,25.297993,25.892275,26.625347,25.821571,29.422915,27.471481,23.582704,20.091685,17.37602
1015800,46.523003,-68.371764,4276,2747,30.468728,29.614383,27.459663,24.822553,22.551353,19.456863,...,58.912677,71.743174,63.942664,52.078266,42.038084,36.339844,30.665801,28.090918,26.373447,24.965231
1017000,46.777294,-68.157194,4281,2773,64.752051,64.284799,62.117018,57.702024,52.771326,48.076338,...,67.899391,94.396899,100.104568,110.776426,108.378785,96.324758,83.791588,75.230466,67.490577,59.05152
1017550,46.628056,-67.953056,4300,2762,0.044492,0.028687,0.010159,0.003003,0.00438,0.001411,...,0.155844,0.092079,0.059114,0.039842,0.053557,0.127573,0.071467,0.049377,0.033976,0.025259


In [9]:
# reorganize USGS flow dataframe to keep only gages that match with PF
usgs_flow_matched = usgs_flow_df[usgs_flow_df.index.isin(match_index)]
# reindex so that USGS is in the same order as the PF dataframe
usgs_flow_matched = usgs_flow_matched.reindex(index=pf_flow_matched.index)
print(usgs_flow_matched.shape)
usgs_flow_matched

(4999, 31)


Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2002-10-22,2002-10-23,2002-10-24,2002-10-25,2002-10-26,2002-10-27,2002-10-28,2002-10-29,2002-10-30,2002-10-31
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.414600,7.386300,6.990100,6.565600,6.367500,6.197700,5.744900,5.065700,4.528000,4.414800,...,9.310700,8.574900,8.546600,8.631500,9.820100,12.508600,15.508400,16.527200,15.791400,14.319800
1013500,3.509200,3.424300,3.169600,3.113000,3.339400,2.858300,2.858300,2.631900,2.405500,2.504550,...,5.263800,5.405300,5.660000,5.914700,6.197700,6.990100,7.556100,7.301400,7.188200,7.273100
1015800,6.707100,6.226000,5.716600,5.377000,5.150600,5.037400,4.754400,4.499700,4.160100,4.188400,...,22.300400,19.074200,16.725300,14.914100,13.725500,12.904800,12.848200,13.725500,13.725500,13.074600
1017000,9.876700,9.367300,8.461700,7.952300,7.810800,7.131600,7.046700,6.678800,6.282600,6.282600,...,38.205000,33.394000,28.583000,24.507800,21.762700,20.376000,20.715600,21.791000,21.734400,20.347700
1017550,0.023772,0.024338,0.020659,0.018395,0.019810,0.019810,0.018678,0.018678,0.018961,0.021791,...,0.044997,0.036790,0.036224,0.035092,0.030847,0.041884,0.051789,0.039620,0.031979,0.027451
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242354750,0.882960,0.755610,0.642410,0.588640,0.537700,0.520720,6.480700,2.363050,1.417830,1.092380,...,0.667880,0.608450,0.577320,0.554680,0.554680,0.543360,1.313120,1.652720,1.426320,1.075400
344894205,0.718820,0.585810,0.486760,0.424500,0.396200,0.348090,0.305640,0.283000,0.272246,0.273944,...,0.452800,0.407520,0.373560,0.370730,0.399030,0.370730,0.582980,0.795230,1.058420,0.911260
351706800,1.015970,0.885790,0.809380,0.741460,0.979180,0.829190,1.013140,0.942390,0.772590,0.730140,...,0.843340,0.769760,0.732970,0.741460,0.837680,0.766930,1.267840,1.859310,4.273300,2.447950
422026250,0.166404,0.167819,0.190742,0.171781,0.175743,0.169517,0.170366,0.166121,0.167253,0.166404,...,0.191874,0.253568,0.198666,0.190742,0.251870,0.210552,0.199515,0.198383,0.197251,0.190459


### Reorganize metadata for flow matched gages

In [10]:
### read in USGS streamflow gage metadata for all gages with flow in October
usgs_gage_metadata = pd.read_csv(f'{organized_dir}/USGS_WY2003_oct_raw_metadata.csv',index_col=['site_id'])#, 
usgs_gage_metadata = usgs_gage_metadata.drop(columns=['Unnamed: 0'])
#usgs_all_gages = usgs_all_gages.transpose() #transpose df for easier plotting
usgs_gage_metadata.shape

(5874, 82)

In [11]:
# reorganize USGS metadata dataframe to keep only gages that match with PF and USGS
meta_flow_matched = usgs_gage_metadata[usgs_gage_metadata.index.isin(match_index)]
# reindex so that metadata is in the same order as the PF dataframe
meta_flow_matched = meta_flow_matched.reindex(index=pf_flow_matched.index)
meta_flow_matched.shape

(4999, 82)

In [12]:
# Do a check to make sure that all dataframes have the same indexes
if pf_flow_matched.index.equals(usgs_flow_matched.index) == True: 
    print('PF and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.')
else:
    print('Gages DO NOT match between PF and USGS! Go back and check the dataframes...')
    
    
# check metadata also has the same gages as these
if meta_flow_matched.index.equals(usgs_flow_matched.index):
    print('Metadata and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.')
else:
    print('Gages DO NOT match between metadata dataframe and USGS! Go back and check the metadata workflow...')

PF and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.
Metadata and USGS gages MATCH!!! Now, save the dataframes for use in other scripts.


In [13]:
# Save Flow Matched CSVs
pf_flow_matched.to_csv(f'{organized_dir}/FlowMatch_CONUS2_daily_cms_{water_year}.csv', sep = ",")
usgs_flow_matched.to_csv(f'{organized_dir}/FlowMatch_USGS_daily_cms_{water_year}.csv', sep = ",")#usgs_flow_matched
meta_flow_matched.to_csv(f'{organized_dir}/FlowMatch_metadata_{water_year}.csv', sep = ",")