In [4]:
### Make CSV of CONUS2 flow at matched gages ###
### DTT, 10/22

# This script is split into two main parts: 1) reading in CONUS2 gridded total (aggregated) daily flow for the full domain and creating a dataframe of CONUS2 flow for cells that have been matched with USGS gages in the `NWM_Gage_Adjustments_final.csv`. 2) matching the gages that both have flow between the PF csv and USGS csv retrieved from hydrodata. 
# Note that flow is converted in this script from daily accumulated flow in [m^3/h] to daily mean flow in cms or [m^3/s].

### Inputs:
# - `NWM_Gage_Adjustments_final.csv` - this can be found on the CONUS2 Dropbox or in /glade/p/univ/ucsm0002/CONUS2/domain_files
# - Daily total streamflow PFCLM outputs as PFBs - processed using `compute_daily_PF_averages.py`
# - USGS daily flow csv - from the hydrodata repository on Verde

### Outputs:
# - CSV of PFCLM daily mean flow (cms) with gage ID, lat/long, and CONUS2 cell location
# - two flow-matched CSVs for PF and USGS flow
# - note that the CSV outputs with 'day 001' which starts at the water year (001 == October 1)***

# Notes:
# - need to fix the no_days, currently this will only be accurate if this is started at the begninning of a water year and need to add in some dictionary or if statement to specify num days in a month or something like that.
# - ***need to change day headings so that they are more descriptive than 'day 001' and have an actual date

import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# Directory where PF flow PFBs are saved in
directory_out = '/glade/scratch/tijerina/CONUS2/spinup_WY2003/averages'
organized_dir = '/glade/p/univ/ucsm0002/CONUS2/Validation/Streamflow/Organized_Daily_Flow'

usgs_data = 'USGS_WY2003_oct_mean_flow_HydroData.csv' #csv of USGS flow from hydrodata

# need to change water year and number of days
water_year = 2003
no_days = 31 

### check gage locations for daily flow
NWM_gage_csv = pd.read_csv('/glade/p/univ/ucsm0002/CONUS2/domain_files/NWM_Gage_Adjustments_final.csv')


### set up pandas dataframe of gage ID, lat/long, CONUS2 x and y indices ###
pf_flow_df = pd.DataFrame(columns = ['STNID', 'USGS_lat', 'USGS_lon', 'x_new_adj', 'y_new_adj'])
pf_flow_df['STNID'] = NWM_gage_csv['STNID'].astype(int)
pf_flow_df['USGS_lat'] = NWM_gage_csv['USGS_lat']
pf_flow_df['USGS_lon'] = NWM_gage_csv['USGS_lon']
pf_flow_df['x_new_adj'] = NWM_gage_csv['x_new_adj']
pf_flow_df['y_new_adj'] = NWM_gage_csv['y_new_adj']

# add leading zeros to USGS gages
pf_flow_df['STNID'] = pf_flow_df['STNID'].astype('str').str.zfill(8)

pf_flow_df


Unnamed: 0,STNID,USGS_lat,USGS_lon,x_new_adj,y_new_adj
0,01011000,47.069611,-69.079544,4210,2783
1,01013500,47.237394,-68.582642,4237,2810
2,01015800,46.523003,-68.371764,4276,2747
3,01017000,46.777294,-68.157194,4281,2773
4,01017060,46.783611,-67.989167,4295,2780
...,...,...,...,...,...
7537,11455350,38.212778,-121.669167,142,1797
7538,11447905,38.238804,-121.522731,151,1792
7539,11447903,38.237222,-121.517500,149,1790
7540,11455420,38.149044,-121.688944,139,1789


In [5]:
### READ STREAMFLOW PFBs ###
# Read in CONUS2 daily streamflow PFBs and save as df in flow_df, convert to total accumulated in m^3/h to mean daily in cms
for i in range(no_days):
    step = str(int(i+1)).rjust(3, '0')
    pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.{step}.pfb'))
    pf_flow_df[f'day {step}'] = pf_flow_pfb[pf_flow_df['y_new_adj'],pf_flow_df['x_new_adj']]/3600/24 # CONVERT FROM m^3/h to cms AND from daily accumulated to daily mean
    print(f'reading flow for day {step} and converting from total accumulated flow in m^3/h, to daily mean flow in cms')

    
# Create column for matching/have flow (=1) and not matching/have no flow (=0) gages
pf_flow_df['matched'] = np.where(pf_flow_df['day 001']>0, 1, 0)

# remove cells with no flow and make new pandas df with matching flow at CONUS2 cells and USGS gages
pf_flow_df_NWM_match = pf_flow_df[pf_flow_df.matched != 0]

# SAVE OUT PANDAS DF FOR CONUS2 FLOW
### save csv file of all matching gage locations and CONUS2 daily flow, note the USGS STNID's drop the leading zeros when saving
pf_flow_df_NWM_match.to_csv(f'{organized_dir}/CONUS2_NWM_matched_flow_{water_year}.csv', sep = ",")

reading flow for day 001 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 002 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 003 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 004 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 005 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 006 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 007 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 008 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 009 and converting from total accumulated flow in m^3/h, to daily mean flow in cms
reading flow for day 010 and converting from total accumulated f

In [7]:
###
### read in csv that was just created of gage locations and CONUS2 flow
pf_flow_df = pd.read_csv(f'{organized_dir}/CONUS2_NWM_matched_flow_{water_year}.csv',index_col=['STNID'])
pf_flow_df = pf_flow_df.drop(columns=['Unnamed: 0','matched']) #drop unnecessary columns for now
pf_flow_df.index.names = ['site_id']
                          
### read in USGS flow for all gages with flow in October
# streamflow from hydrodata has already been converted to cms!!!
usgs_flow_df = pd.read_csv(f'{organized_dir}/{usgs_data}',index_col=['site_id']) 
usgs_flow_df = usgs_flow_df.drop(columns=['Unnamed: 0', 'num_obs'])
                          
# merge the CONUS2 and USGS dataframes so that we remove all gage locations that don't match between the two
combine_df = pf_flow_df.merge(usgs_flow_df, on='site_id', how='inner')
combine_df


Unnamed: 0_level_0,USGS_lat,USGS_lon,x_new_adj,y_new_adj,day 001,day 002,day 003,day 004,day 005,day 006,...,2002-10-22,2002-10-23,2002-10-24,2002-10-25,2002-10-26,2002-10-27,2002-10-28,2002-10-29,2002-10-30,2002-10-31
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,47.069611,-69.079544,4210,2783,4.339120,3.903565,3.598231,3.739801,3.472192,2.964904,...,9.310700,8.574900,8.546600,8.631500,9.820100,12.508600,15.508400,16.527200,15.791400,14.319800
1013500,47.237394,-68.582642,4237,2810,2.607411,2.592114,2.714427,2.122667,2.426324,1.780095,...,5.263800,5.405300,5.660000,5.914700,6.197700,6.990100,7.556100,7.301400,7.188200,7.273100
1015800,46.523003,-68.371764,4276,2747,3.538157,3.513982,3.389648,3.025167,2.979538,2.794375,...,22.300400,19.074200,16.725300,14.914100,13.725500,12.904800,12.848200,13.725500,13.725500,13.074600
1017000,46.777294,-68.157194,4281,2773,8.474701,8.578840,8.433320,8.120163,7.786723,7.233890,...,38.205000,33.394000,28.583000,24.507800,21.762700,20.376000,20.715600,21.791000,21.734400,20.347700
1017550,46.628056,-67.953056,4300,2762,0.000110,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.044997,0.036790,0.036224,0.035092,0.030847,0.041884,0.051789,0.039620,0.031979,0.027451
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214657975,35.158610,-80.713330,3650,1291,0.000002,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.073297,0.030564,0.026885,0.027451,0.030564,0.028300,0.141500,0.079523,0.054336,0.043865
242354750,33.313445,-86.806378,3138,998,0.048062,0.012137,0.000002,0.000000,0.000000,0.000000,...,0.667880,0.608450,0.577320,0.554680,0.554680,0.543360,1.313120,1.652720,1.426320,1.075400
344894205,35.683333,-82.333056,3498,1321,0.000323,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.452800,0.407520,0.373560,0.370730,0.399030,0.370730,0.582980,0.795230,1.058420,0.911260
351706800,35.438333,-83.918889,3363,1269,0.629557,0.336959,0.127858,0.018200,0.288745,0.166981,...,0.843340,0.769760,0.732970,0.741460,0.837680,0.766930,1.267840,1.859310,4.273300,2.447950


In [10]:
#********* FIX these next lines, the dates are hardcoded in ************
# separate the CONUS2 flow from the USGS flow in the combine_df
pf_flow_matched = pd.DataFrame(combine_df.iloc[:,0:35]) # get only CONUS2 flow
usgs_flow_matched = pd.DataFrame(combine_df.iloc[:,35:66]) #get only USGS flow 

# Save Flow Matched CSVs
pf_flow_matched.to_csv(f'{organized_dir}/CONUS2_daily_FlowMatch_cms_{water_year}.csv', sep = ",")
usgs_flow_matched.to_csv(f'{organized_dir}/USGS_daily_FlowMatch_cms_{water_year}.csv', sep = ",")