### Compare CONUS2 and USGS streamflow ###
DTT, 10/22

This notebook reads in CSVs of CONUS2 daily mean flow [cms] and daily mean USGS flow [cms] and creates comparison hydrographs. Gages have been matched between the two csv's to make sure that we're comparing viable gage locations in `1_Organize_Streamflow.ipynb`. 

Inputs:
- CONUS2 daily mean flow csv in [cms], ouput from `1_Organize_Streamflow.ipynb`
- USGS daily mean flow csv in [cms], ouput from `1_Organize_Streamflow.ipynb` 
- USGS gage metadata csv, ouput from `1_Organize_Streamflow.ipynb` 

Outputs:
- Hydrographs of USGS and CONUS2 flow

Notes:
- as of 11/22 there are no stats included in these hydrographs. Need to either add a calculation here or pull from the SaveStreamflowStats.ipynb and add these to the plots

In [1]:
import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
water_year = 2003
# Directory containing output csv's from `Make_CONUS2_streamflow_csv.py`
organized_dir = '/glade/work/tijerina/PFCONUS2-analysis/scripts/Validation/Streamflow/Organized_Daily_Flow'
# Where you want to save plots, etc. to
plot_dir = '/glade/work/tijerina/PFCONUS2-analysis/scripts/Validation/Streamflow/Figures'

# names of csv files
usgs_csv = 'USGS_Daily_matched_flow_cms_2003.csv'
pf_csv = 'PFCONUS2_Daily_matched_flow_cms_2003.csv'
metadata_csv = 'USGS_metadata_matched_flow_2003.csv'

### Read in files
Read in PF flow, USGS flow, USGS metadata, and calculated stats from `SaveStreamflowStats.ipynb`

In [4]:
### read in CONUS2 daily flow matched csv
pf_flow_matched = pd.read_csv(f'{organized_dir}/{pf_csv}',index_col=['site_id'])
#pf_flow_matched = pfdata.drop(columns=['USGS_lat', 'USGS_lon', 'x_new_adj', 'y_new_adj'])
pf_flow_matched.head()

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,37.266464,34.151593,31.949418,28.746042,24.794453,21.037749,17.72566,14.927488,12.823219,11.328315,...,0.38303,0.248542,0.270354,0.370566,0.447849,0.437744,0.346303,1.857882,32.173826,0.0
1013500,18.686342,17.994405,17.17183,14.470901,14.358695,12.452931,11.061765,10.040905,9.119768,8.49928,...,0.542694,0.269608,0.231789,0.451696,0.32128,0.249696,0.204132,1.203276,30.743592,0.0
1015800,30.468728,29.614383,27.459663,24.822553,22.551353,19.456863,16.647201,14.524086,12.689513,11.405875,...,0.397749,0.306602,0.316849,0.505316,0.353142,0.297726,0.204833,0.469169,6.413734,0.0
1017000,64.752051,64.284799,62.117018,57.702024,52.771326,48.076338,43.703179,39.512687,35.268489,31.090277,...,1.916867,1.738682,1.658574,1.623249,1.463889,1.393643,1.3411,1.439029,3.284965,0.0
1017550,0.044492,0.028687,0.010159,0.003003,0.00438,0.001411,0.0,0.0,0.0,3.6e-05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
### read in USGS daily flow matched csv
usgs_flow_matched = pd.read_csv(f'{organized_dir}/{usgs_csv}',index_col=['site_id']) 
usgs_flow_matched.head()

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2003-09-21,2003-09-22,2003-09-23,2003-09-24,2003-09-25,2003-09-26,2003-09-27,2003-09-28,2003-09-29,2003-09-30
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.4146,7.3863,6.9901,6.5656,6.3675,6.1977,5.7449,5.0657,4.528,4.4148,...,12.6501,12.3671,11.9426,12.7067,13.6972,13.6123,13.1595,14.0085,66.222,76.127
1013500,3.5092,3.4243,3.1696,3.113,3.3394,2.8583,2.8583,2.6319,2.4055,2.50455,...,14.3764,13.8104,13.6123,13.5557,13.1595,12.2539,11.9426,15.7631,58.298,51.223
1015800,6.7071,6.226,5.7166,5.377,5.1506,5.0374,4.7544,4.4997,4.1601,4.1884,...,3.7639,3.6507,3.5375,3.5941,3.7356,3.7922,3.7073,4.2167,25.4134,60.845
1017000,9.8767,9.3673,8.4617,7.9523,7.8108,7.1316,7.0467,6.6788,6.2826,6.2826,...,7.075,6.9052,6.792,6.8203,6.8203,6.8486,6.7637,7.8957,50.657,147.726
1017550,0.023772,0.024338,0.020659,0.018395,0.01981,0.01981,0.018678,0.018678,0.018961,0.021791,...,0.080655,0.043016,0.049525,0.07358,0.053204,0.026319,0.023206,0.063675,0.1132,0.105559


In [5]:
### read in USGS streamflow gage metadata for all gages with flow in October
usgs_gage_metadata = pd.read_csv(f'{organized_dir}/{metadata_csv}',index_col=['site_id'])#, 
#usgs_gage_metadata = usgs_gage_metadata.drop(columns=['Unnamed: 0'])
#usgs_all_gages = usgs_all_gages.transpose() #transpose df for easier plotting
usgs_gage_metadata.head()

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,date_metadata_last_updated,latitude,longitude,state,huc,...,topo_cd,instruments_cd,construction_dt,inventory_dt,drain_area_va,contrib_drain_area_va,local_time_fg,reliability_cd,project_no,num_obs
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,stream gauge,USGS,"Allagash River near Allagash, Maine",1011000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.069722,-69.079444,ME,1010002,...,,YNNNYNNNNYNNNNNNYNNNNNNNNNNNNN,,,1478.0,1229.0,Y,,442300100.0,365
1013500,stream gauge,USGS,"Fish River near Fort Kent, Maine",1013500,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.2375,-68.582778,ME,1010003,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,873.0,873.0,Y,,442300100.0,365
1015800,stream gauge,USGS,"Aroostook River near Masardis, Maine",1015800,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.523056,-68.371667,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,892.0,892.0,Y,,442300100.0,365
1017000,stream gauge,USGS,"Aroostook River at Washburn, Maine",1017000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.777222,-68.157222,ME,1010004,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,1654.0,1654.0,Y,,442300100.0,365
1017550,stream gauge,USGS,"Williams Brook at Phair, Maine",1017550,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.628056,-67.953056,ME,1010005,...,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,3.82,3.82,Y,,442300100.0,365


In [8]:
len(pf_flow_matched.columns)

365

In [10]:
pf_flow_matched.loc[1011000]

2002-10-01    37.266464
2002-10-02    34.151593
2002-10-03    31.949418
2002-10-04    28.746042
2002-10-05    24.794453
                ...    
2003-09-26     0.437744
2003-09-27     0.346303
2003-09-28     1.857882
2003-09-29    32.173826
2003-09-30     0.000000
Name: 1011000, Length: 365, dtype: float64

In [12]:
### Make a plot of CONUS2 vs observed
xlength = range(len(pf_flow_matched.columns))

for i in range(len(usgs_flow_matched.index)):
    gage = usgs_flow_matched.index[i]
    station_name = usgs_gage_metadata['site_name'][gage]
    
    # make hydrographs
    fig, ax = plt.subplots(figsize=(7,5), dpi = 200)
    ax.plot(xlength, pf_flow_matched.loc[gage],label= 'CONUS2')
    ax.plot(xlength, usgs_flow_matched.loc[gage],label= 'USGS')
    ax.set_ylabel('flow [cms]')
    plt.xlabel('Days')
    plt.title(f'Mean Daily Flow - {station_name} [cms]')
    ax.legend()
    plt.savefig(f'{plot_dir}/Timeseries_WY2003/Timeseries_{usgs_flow_matched.index[i]}_cms.png')
    plt.close()