In [1]:
### Make CSV of CONUS2 flow at matched gages ###
### DTT, 05/23

# This script is split into two main parts: 1) reading in CONUS2 gridded total (aggregated) daily flow for the full domain and creating a dataframe of CONUS2 flow for cells that have been matched with USGS gages in the `NWM_Gage_Adjustments_final.csv`. 2) matching the gages that both have flow between the PF csv and USGS csv retrieved from hydrodata. 
# Note that flow is converted in this script from daily accumulated flow in [m^3/h] to daily mean flow in cms or [m^3/s].

### Inputs:
# - `NWM_Gage_Adjustments_final.csv` - this can be found on the CONUS2 Dropbox or in /glade/p/univ/ucsm0002/CONUS2/domain_files
# - Daily total streamflow PFCLM outputs as PFBs - processed using `compute_daily_PF_averages.py`
# - USGS daily flow csv - from the hydrodata repository on Verde

### Outputs:
# - CSV of PFCLM daily mean flow (in Cubic Meters per Second) with gage ID, lat/long, and CONUS2 cell location
# - two flow-matched CSVs for PF and USGS flow
# - note that the CSV outputs with 'day 001' which starts at the water year (001 == October 1)***

# Notes:
# - need to fix the no_days, currently this will only be accurate if this is started at the begninning of a water year and need to add in some dictionary or if statement to specify num days in a month or something like that.
# - ***need to change day headings so that they are more descriptive than 'day 001' and have an actual date

import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime


### Directory where PF flow PFBs are saved in ###
## Cheyenne
#directory_out = '/glade/p/univ/ucsm0002/CONUS2/CONUS2.spinup.WY2003/averages'
#organized_dir = '/glade/work/tijerina/PFCONUS2-analysis/scripts/Validation/Streamflow/Organized_Daily_Flow'
## Hydrodata
directory_out = '/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages'
organized_dir = './Organized_LH'

obs_data_file = 'Hydrodata_latent-heat-flux_obs_hourly_total_WY2003.csv' #csv of USGS flow from hydrodata
metadata_file = 'Hydrodata_latent-heat-flux_metadata_hourly_total_WY2003.csv' #csv of USGS flow from hydrodata

ny = 3256
nx = 4442

# need to change water year and number of days
water_year = 2003
no_days = 364 

# date variables for datetime for wateryear
start_date = datetime.date(water_year, 10, 1)
end_date = datetime.date(water_year, 9, 30)
# daily time step for reading daily files (this could change depending on how you are analyzing)
# delta time
delta = datetime.timedelta(days=1)

In [4]:
# Read observation data and organize
obs_data = pd.read_csv(f'{organized_dir}/{obs_data_file}', index_col=['site_id'])
obs_data = obs_data.drop(columns=['Unnamed: 0'])
# # remove sites with less than 365 days of observations
# obs_data = obs_data.loc[(obs_data['num_obs']==365)]
# Read metadata and organize
metadata = pd.read_csv(f'{organized_dir}/{metadata_file}', index_col=['site_id'])
metadata = metadata.drop(columns=['Unnamed: 0'])
# also remove the sites with less than 365 obs from the metadata
metadata = metadata[metadata.index.isin(obs_data.index)]

# add number of observations column from the obs_data df
metadata['num_obs'] = obs_data['num_obs']
# remove num_obs from data so we can sum and calc stats
obs_data = obs_data.drop(columns=['num_obs'])
obs_data

Unnamed: 0_level_0,2002-10-01 00:00:00,2002-10-01 01:00:00,2002-10-01 02:00:00,2002-10-01 03:00:00,2002-10-01 04:00:00,2002-10-01 05:00:00,2002-10-01 06:00:00,2002-10-01 07:00:00,2002-10-01 08:00:00,2002-10-01 09:00:00,...,2003-09-29 15:00:00,2003-09-29 16:00:00,2003-09-29 17:00:00,2003-09-29 18:00:00,2003-09-29 19:00:00,2003-09-29 20:00:00,2003-09-29 21:00:00,2003-09-29 22:00:00,2003-09-29 23:00:00,2003-09-30 00:00:00
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
US-ARM,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
US-Blo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,81.698,86.234,249.731,228.428,260.023,271.887,317.925,316.995,169.443,134.388
US-Bo1,3.0,-1.0,-3.0,-2.0,0.0,-7.0,1.0,-6.0,-6.0,0.0,...,159.0,165.0,202.0,207.0,85.0,159.0,129.0,84.0,33.0,14.0
US-CMW,71.359402,12.017639,-13.517526,-15.882821,10.088744,25.00629,2.864254,-32.073075,1.52734,-0.757087,...,401.584104,611.891544,597.124792,687.145139,619.86866,765.009194,521.543262,428.275266,302.769649,59.134963
US-Cop,23.545,12.115,19.318,20.19,4.659,15.651,14.044,13.244,10.372,5.184,...,7.583,14.748,18.875,17.06,17.502,14.541,13.846,9.938,4.663,2.174
US-Dk1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,389.12,420.34,404.18,413.74,324.44,284.43,150.76,4.406,-10.362,-0.269
US-Dk2,-5.257,1.389,8.37,-0.776,2.366,3.876,12.231,-1.407,-0.79,-1.118,...,318.28,240.06,320.88,254.16,229.425,330.625,161.128,35.272,-5.248,20.13
US-Dk3,-1.214,2.062,1.726,6.202,152.107,-8.449,0.397,-169.54,-17.505,-11.044,...,352.93,351.11,330.15,365.93,269.62,383.07,58.06,37.313,-14.458,-4.576
US-GBT,57.383,16.06,20.064,16.409,9.397,7.879,6.932,1.525,2.223,-0.262,...,43.262,65.246,81.29,70.601,55.896,98.452,124.522,42.92,21.389,6.531
US-GLE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
metadata

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,date_metadata_last_updated,latitude,longitude,state,huc,...,site_funding,acknowledgement,acknowledgement_comment,doi_citation,alternate_url,conus1_x,conus1_y,conus2_x,conus2_y,num_obs
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
US-ARM,flux tower,AmeriFlux,ARM Southern Great Plains site- Lamont,US-ARM,https://ameriflux.lbl.gov/sites/siteinfo/US-ARM,2023-05-02,36.6058,-97.4888,OK,,...,DOE/ARM,This research was supported by the Office of B...,,"Sebastien Biraud, Marc Fischer, Stephen Chan, ...",http://www.arm.gov/sites/sgp,1752.0,342.0,2165,1301,6523
US-Blo,flux tower,AmeriFlux,Blodgett Forest,US-Blo,https://ameriflux.lbl.gov/sites/siteinfo/US-Blo,2023-05-02,38.8953,-120.6328,CA,,...,"DOE/NIGEC, EPA, University of California",,,"Allen Goldstein (2019), AmeriFlux BASE US-Blo ...",http://www.cnr.berkeley.edu/~ahg/tgbl/group.html,,,248,1840,8737
US-Bo1,flux tower,AmeriFlux,Bondville,US-Bo1,https://ameriflux.lbl.gov/sites/siteinfo/US-Bo1,2023-05-02,40.0062,-88.2904,IL,,...,NOAA/GEWEX,,,"Tilden Meyers (2016), AmeriFlux BASE US-Bo1 Bo...",http://www.life.illinois.edu/bernacchi/,2539.0,744.0,2926,1708,8737
US-CMW,flux tower,AmeriFlux,Charleston Mesquite Woodland,US-CMW,https://ameriflux.lbl.gov/sites/siteinfo/US-CMW,2023-05-02,31.6637,-110.1777,AZ,,...,"USDA, NSF",,,"Russell Scott (2022), AmeriFlux BASE US-CMW Ch...",,,,975,862,8737
US-Cop,flux tower,AmeriFlux,Corral Pocket,US-Cop,https://ameriflux.lbl.gov/sites/siteinfo/US-Cop,2023-05-02,38.09,-109.39,UT,,...,University of Utah and USGS,,,"David Bowling (2019), AmeriFlux BASE US-Cop Co...",http://esp.cr.usgs.gov/info/sw/clim-met/corral...,720.0,591.0,1156,1544,8737
US-Dk1,flux tower,AmeriFlux,Duke Forest-open field,US-Dk1,https://ameriflux.lbl.gov/sites/siteinfo/US-Dk1,2023-05-02,35.9712,-79.0934,NC,,...,DOE/TCP,,,"Chris Oishi, Kim Novick, Paul Stoy (2018), Ame...",http://www.nicholas.duke.edu/other/AMERIFLUX/a...,,,3772,1408,8737
US-Dk2,flux tower,AmeriFlux,Duke Forest-hardwoods,US-Dk2,https://ameriflux.lbl.gov/sites/siteinfo/US-Dk2,2023-05-02,35.9736,-79.1004,NC,,...,DOE/NIGEC,,,"Chris Oishi, Kim Novick, Paul Stoy (2018), Ame...",http://www.nicholas.duke.edu/other/AMERIFLUX/a...,,,3771,1408,8737
US-Dk3,flux tower,AmeriFlux,Duke Forest - loblolly pine,US-Dk3,https://ameriflux.lbl.gov/sites/siteinfo/US-Dk3,2023-05-02,35.9782,-79.0942,NC,,...,DOE/TCP,,,"Chris Oishi, Kim Novick, Paul Stoy (2018), Ame...",http://www.nicholas.duke.edu/other/AMERIFLUX/a...,,,3771,1408,8737
US-GBT,flux tower,AmeriFlux,GLEES Brooklyn Tower,US-GBT,https://ameriflux.lbl.gov/sites/siteinfo/US-GBT,2023-05-02,41.3658,-106.2397,WY,,...,USDA/Forest Service,,,"Bill Massman (2016), AmeriFlux BASE US-GBT GLE...",http://www.fs.fed.us/rmrs/experimental-forests...,1034.0,915.0,1463,1859,8737
US-GLE,flux tower,AmeriFlux,GLEES,US-GLE,https://ameriflux.lbl.gov/sites/siteinfo/US-GLE,2023-05-02,41.3665,-106.2399,WY,,...,USDA/Forest Service,,,"John Frank, Bill Massman (2021), AmeriFlux BAS...",https://www.fs.usda.gov/rmrs/experimental-fore...,1034.0,915.0,1463,1859,8737


In [5]:
obs_data

Unnamed: 0_level_0,2002-10-01 00:00:00,2002-10-01 01:00:00,2002-10-01 02:00:00,2002-10-01 03:00:00,2002-10-01 04:00:00,2002-10-01 05:00:00,2002-10-01 06:00:00,2002-10-01 07:00:00,2002-10-01 08:00:00,2002-10-01 09:00:00,...,2003-09-29 15:00:00,2003-09-29 16:00:00,2003-09-29 17:00:00,2003-09-29 18:00:00,2003-09-29 19:00:00,2003-09-29 20:00:00,2003-09-29 21:00:00,2003-09-29 22:00:00,2003-09-29 23:00:00,2003-09-30 00:00:00
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
US-ARM,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
US-Blo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,81.698,86.234,249.731,228.428,260.023,271.887,317.925,316.995,169.443,134.388
US-Bo1,3.0,-1.0,-3.0,-2.0,0.0,-7.0,1.0,-6.0,-6.0,0.0,...,159.0,165.0,202.0,207.0,85.0,159.0,129.0,84.0,33.0,14.0
US-CMW,71.359402,12.017639,-13.517526,-15.882821,10.088744,25.00629,2.864254,-32.073075,1.52734,-0.757087,...,401.584104,611.891544,597.124792,687.145139,619.86866,765.009194,521.543262,428.275266,302.769649,59.134963
US-Cop,23.545,12.115,19.318,20.19,4.659,15.651,14.044,13.244,10.372,5.184,...,7.583,14.748,18.875,17.06,17.502,14.541,13.846,9.938,4.663,2.174
US-Dk1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,389.12,420.34,404.18,413.74,324.44,284.43,150.76,4.406,-10.362,-0.269
US-Dk2,-5.257,1.389,8.37,-0.776,2.366,3.876,12.231,-1.407,-0.79,-1.118,...,318.28,240.06,320.88,254.16,229.425,330.625,161.128,35.272,-5.248,20.13
US-Dk3,-1.214,2.062,1.726,6.202,152.107,-8.449,0.397,-169.54,-17.505,-11.044,...,352.93,351.11,330.15,365.93,269.62,383.07,58.06,37.313,-14.458,-4.576
US-GBT,57.383,16.06,20.064,16.409,9.397,7.879,6.932,1.525,2.223,-0.262,...,43.262,65.246,81.29,70.601,55.896,98.452,124.522,42.92,21.389,6.531
US-GLE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# make CONUS2 x and y into arrays for the for loop
conusy = np.asarray(metadata['conus2_y'],dtype = 'int')
conusx = np.asarray(metadata['conus2_x'],dtype = 'int')

# Set up arrays 
pf_flow_array = np.zeros((no_days, ny, nx))
pf_flow_matched = np.zeros(obs_data.shape)

### READ STREAMFLOW PFBs ###
#Read in CONUS2 daily streamflow PFBs and save as df in flow_df, convert to total accumulated in m^3/h to mean daily in cms
for i in range(no_days):
    step = str(int(i+1)).rjust(3, '0')
    print(f'{directory_out}/eflx_lh_tot.2003.daily.{step}.pfb')
    pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/eflx_lh_tot.2003.daily.{step}.pfb'))
    pf_flow_array[i,...] = pf_flow_pfb
    print(f'reading PF-CONUS2 output for day {step}')
    for j in range(len(obs_data.index)):
        if conusy[j] < 0:
            pf_flow_matched[j] = 'nan'
            #print('Gage is outside of CONUS2 range')
        else:
            pf_flow_matched[j,i] = pf_flow_array[i, conusy[j], conusx[j]]
            #print('Finding value in CONUS2 grid')
clm_output += read_pfb(f'{path_outputs}{runname}.out.clm_output.{timestamp_reading}.C.pfb')

In [6]:
# make CONUS2 x and y into arrays for the for loop
conusy = np.asarray(metadata['conus2_y'],dtype = 'int')
conusx = np.asarray(metadata['conus2_x'],dtype = 'int')

# Set up arrays 
pf_flow_array = np.zeros((no_days, ny, nx))
pf_flow_matched = np.zeros(obs_data.shape)

### READ STREAMFLOW PFBs ###
#Read in CONUS2 daily streamflow PFBs and save as df in flow_df, convert to total accumulated in m^3/h to mean daily in cms
for i in range(no_days):
    step = str(int(i+1)).rjust(3, '0')
    print(f'{directory_out}/eflx_lh_tot.2003.daily.{step}.pfb')
    pf_flow_pfb = np.squeeze(read_pfb(f'{directory_out}/eflx_lh_tot.2003.daily.{step}.pfb'))
    pf_flow_array[i,...] = pf_flow_pfb
    print(f'reading PF-CONUS2 output for day {step}')
    for j in range(len(obs_data.index)):
        if conusy[j] < 0:
            pf_flow_matched[j] = 'nan'
            #print('Gage is outside of CONUS2 range')
        else:
            pf_flow_matched[j,i] = pf_flow_array[i, conusy[j], conusx[j]]
            #print('Finding value in CONUS2 grid')

/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.001.pfb
reading flow for day 001 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.002.pfb
reading flow for day 002 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.003.pfb
reading flow for day 003 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.004.pfb
reading flow for day 004 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.005.pfb
reading flow for day 005 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.006.pfb
reading flow for day 006 and converting from m^3/h to m^3/s
/hydrodata/temp/CONUS2_transfers/CONUS2/spinup_WY2003/averages/swe_out.2003.daily.007.pfb
reading fl

In [7]:
# Organize daily flow matched array with the same index and dates as the obs_data
pf_flow_match_df = pd.DataFrame(pf_flow_matched)
pf_flow_match_df = pf_flow_match_df.set_index(obs_data.index)
column_headers = list(obs_data.columns.values)
pf_flow_match_df.columns = column_headers
pf_flow_match_df.shape

(640, 365)

In [8]:
pf_flow_match_df_new = pf_flow_match_df.dropna()
pf_flow_match_df_new

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000:OR:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005:CO:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1006:NV:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1008:MT:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009:MT:SNTL,0.0,0.0,0.0,0.925932,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984:WA:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
985:WA:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
988:ID:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
989:ID:SNTL,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
obs_data_new = obs_data[obs_data.index.isin(pf_flow_match_df_new.index)]
obs_data_new

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000:OR:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005:CO:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1006:NV:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1008:MT:SNTL,0.0,0.0,0.0,0.0,2.54,7.62,2.54,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1009:MT:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984:WA:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
985:WA:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
988:ID:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
989:ID:SNTL,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
metadata_new = metadata[metadata.index.isin(pf_flow_match_df_new.index)]
metadata_new

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,date_metadata_last_updated,latitude,longitude,state,huc,...,file_path,county_name,elevation,shef_id,acton_id,conus1_x,conus1_y,conus2_x,conus2_y,num_obs
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000:OR:SNTL,SNOTEL station,NRCS,Annie Springs,1000:OR:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,42.87007,-122.16518,OR,180102030101,...,/hydrodata/national_obs/swe/data/daily/1000:OR...,Klamath,6010.0,ANSO3,22G06S,,,258,2286,365
1005:CO:SNTL,SNOTEL station,NRCS,Ute Creek,1005:CO:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,37.61480,-105.37322,CO,130100020604,...,/hydrodata/national_obs/swe/data/daily/1005:CO...,Costilla,10734.0,UTCC2,05M17S,1063.0,495.0,1490,1448,365
1006:NV:SNTL,SNOTEL station,NRCS,Lewis Peak,1006:NV:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,40.35720,-116.86470,NV,160401070903,...,/hydrodata/national_obs/swe/data/daily/1006:NV...,Lander,7370.0,LWPN2,16J01S,137.0,957.0,592,1908,365
1008:MT:SNTL,SNOTEL station,NRCS,Onion Park,1008:MT:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,46.91348,-110.85360,MT,100301030801,...,/hydrodata/national_obs/swe/data/daily/1008:MT...,Meagher,7410.0,ONPM8,10C22S,753.0,1575.0,1196,2499,365
1009:MT:SNTL,SNOTEL station,NRCS,Stringer Creek,1009:MT:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,46.92690,-110.90198,MT,100301030801,...,/hydrodata/national_obs/swe/data/daily/1009:MT...,Meagher,6550.0,STCM8,10C23S,750.0,1577.0,1193,2501,365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984:WA:SNTL,SNOTEL station,NRCS,Spruce Springs,984:WA:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,46.18287,-117.54155,WA,170601030201,...,/hydrodata/national_obs/swe/data/daily/984:WA:...,Garfield,5700.0,SPGW1,17C04S,233.0,1598.0,697,2528,365
985:WA:SNTL,SNOTEL station,NRCS,Sourdough Gulch,985:WA:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,46.23718,-117.39438,WA,170601030202,...,/hydrodata/national_obs/swe/data/daily/985:WA:...,Asotin,4000.0,SGUW1,17C06S,245.0,1601.0,709,2531,365
988:ID:SNTL,SNOTEL station,NRCS,Hidden Lake,988:ID:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,48.89370,-116.75748,ID,170101040502,...,/hydrodata/national_obs/swe/data/daily/988:ID:...,Boundary,5040.0,HDLI1,16A12S,361.0,1879.0,825,2796,365
989:ID:SNTL,SNOTEL station,NRCS,Moscow Mountain,989:ID:SNTL,https://wcc.sc.egov.usda.gov/awdbWebService/we...,2023-03-07,46.80500,-116.85350,ID,170603061002,...,/hydrodata/national_obs/swe/data/daily/989:ID:...,Latah,4700.0,MSCI1,16C02S,300.0,1653.0,763,2580,365


In [12]:
# save matched daily average flow and metadata as csv's
pf_flow_match_df_new.to_csv(f'{organized_dir}/PFCONUS2_Daily_matched_SWE_mm_{water_year}.csv', sep = ",")
obs_data_new.to_csv(f'{organized_dir}/SNOTEL_Daily_matched_SWE_mm_{water_year}.csv', sep = ",")
metadata_new.to_csv(f'{organized_dir}/SNOTEL_metadata_matched_SWE_mm_{water_year}.csv', sep = ",")

In [None]:
# # Calculate Annual Average Flow for PFCLM and save
# pf_flow_avg = np.mean(pf_flow_array, axis = 0) #monthly average 
# pf_flow_avg.shape
# np.savetxt(f'{organized_dir}/PFCONUS2_Annual_Avg_Flow_cms_WY2003.csv', pf_flow_avg, delimiter=",")

In [None]:
pf_flow_array.shape

In [None]:
pf_flow_match.shape