### Compare CONUS2 and USGS streamflow ###
DTT, 10/22

This notebook reads in CSVs of CONUS2 daily mean flow [cms] and daily mean USGS flow [cms] and creates comparison hydrographs and runs comparison statistics. Gages are matched between the two csv's to make sure that we're comparing viable gage locations. 

Inputs:
- CONUS2 daily mean flow csv in [cms] - the output of `Make_CONUS2_streamflow_csv.py`
- USGS daily mean flow csv in [cms] - the output of `point_obs_CONUS2_Streamflow.ipynb`
- USGS gage metadata csv - the output of `point_obs_CONUS2_Streamflow.ipynb`

Outputs:
- Hydrographs of USGS and CONUS2 flow
- CSV of statistics

In [10]:
import sys
from parflow.tools.io import read_pfb,write_pfb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [11]:
water_year = 2003
# Directory where averages are saved in
directory_out = '/glade/scratch/tijerina/CONUS2/spinup_WY2003/averages'
# Where you want to save plots, etc. to
plot_dir = '/glade/p/univ/ucsm0002/CONUS2/Validation/Streamflow/Figures'

### Compare CONUS2 to USGS

In [12]:
### read in csv that was just created of gage locations and CONUS2 flow
conus2_all_gages_cms = pd.read_csv(f'CONUS2_daily_flow_cms_{water_year}.csv',index_col=['STNID'])
conus2_all_gages_cms = conus2_all_gages_cms.drop(columns=['Unnamed: 0','USGS_lat','USGS_lon','x_new','y_new','matched']) #drop unnecessary columns for now
conus2_all_gages_cms.index.names = ['site_id']
#conus2_all_gages = conus2_all_gages.transpose() #transpose df for easier plotting
conus2_all_gages_cms

Unnamed: 0_level_0,day 001,day 002,day 003,day 004,day 005,day 006,day 007,day 008,day 009,day 010,...,day 022,day 023,day 024,day 025,day 026,day 027,day 028,day 029,day 030,day 031
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,4.339120,3.903565,3.598231,3.739801,3.472192,2.964904,2.711756,2.519148,2.262331,2.004412,...,8.415931,9.427277,9.732380,19.499970,22.328073,21.454524,17.873519,15.879472,14.244763,12.567175
1013500,2.607411,2.592114,2.714427,2.122667,2.426324,1.780095,1.482548,1.307613,1.202425,1.285236,...,10.531182,10.841114,10.288957,10.600749,12.061306,16.507953,16.798331,14.819481,13.234019,11.639035
1015800,3.538157,3.513982,3.389648,3.025167,2.979538,2.794375,2.514015,2.242985,2.058773,2.187055,...,23.623506,33.084342,38.403587,35.184230,30.383635,27.200526,22.780802,20.339741,18.833875,17.107330
1017000,8.474701,8.578840,8.433320,8.120163,7.786723,7.233890,6.638142,6.070441,5.608575,5.285559,...,17.952113,33.277058,46.507889,49.141900,53.410219,58.835318,57.151723,53.937779,50.008866,44.393264
1017060,9.365423,9.230022,8.846584,8.886665,8.997490,8.595441,8.483485,7.985592,7.376596,6.927071,...,30.045680,27.118460,30.716634,47.878220,55.283767,61.190188,66.408090,65.556763,61.238056,56.564409
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11313440,50.773669,49.613323,48.062815,46.003073,43.441176,40.875480,38.542183,36.399452,34.203651,31.898496,...,9.928659,8.950403,8.103057,7.364269,6.698057,6.052058,5.462674,4.928310,4.433904,3.987403
11425500,116.148481,112.907463,109.381179,105.021334,99.839985,94.592943,89.220363,82.997513,75.778338,68.308351,...,20.874158,18.800698,16.983615,15.405093,14.005518,12.747374,11.621958,10.624358,9.723645,8.921323
11447830,139.874860,136.197904,131.543324,127.058090,121.711475,115.287256,108.343587,101.499093,94.540471,86.909916,...,27.412672,24.851737,22.568657,20.514646,18.671976,16.973208,15.445735,14.102672,12.903022,11.833389
11447850,140.278566,137.044614,132.543953,128.079305,123.055965,116.928999,110.087605,103.254751,96.436137,89.122187,...,28.590975,25.941239,23.592657,21.483862,19.568796,17.794967,16.202855,14.793646,13.534206,12.408862


In [13]:
### read in USGS flow for all gages with flow in October
# streamflow from hydrodata has already been converted to cms!!!
usgs_all_gages_cms = pd.read_csv('USGS_WY2003_oct_mean_flow.csv',index_col=['site_id'])#, 
usgs_all_gages_cms = usgs_all_gages_cms.drop(columns=['Unnamed: 0', 'num_obs'])
#usgs_all_gages = usgs_all_gages.transpose() #transpose df for easier plotting
usgs_all_gages_cms

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2002-10-22,2002-10-23,2002-10-24,2002-10-25,2002-10-26,2002-10-27,2002-10-28,2002-10-29,2002-10-30,2002-10-31
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.414600,7.386300,6.990100,6.565600,6.367500,6.197700,5.744900,5.065700,4.528000,4.414800,...,9.310700,8.574900,8.546600,8.631500,9.820100,12.508600,15.508400,16.527200,15.791400,14.319800
1013500,3.509200,3.424300,3.169600,3.113000,3.339400,2.858300,2.858300,2.631900,2.405500,2.504550,...,5.263800,5.405300,5.660000,5.914700,6.197700,6.990100,7.556100,7.301400,7.188200,7.273100
1015800,6.707100,6.226000,5.716600,5.377000,5.150600,5.037400,4.754400,4.499700,4.160100,4.188400,...,22.300400,19.074200,16.725300,14.914100,13.725500,12.904800,12.848200,13.725500,13.725500,13.074600
1017000,9.876700,9.367300,8.461700,7.952300,7.810800,7.131600,7.046700,6.678800,6.282600,6.282600,...,38.205000,33.394000,28.583000,24.507800,21.762700,20.376000,20.715600,21.791000,21.734400,20.347700
1017550,0.023772,0.024338,0.020659,0.018395,0.019810,0.019810,0.018678,0.018678,0.018961,0.021791,...,0.044997,0.036790,0.036224,0.035092,0.030847,0.041884,0.051789,0.039620,0.031979,0.027451
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
393109104464500,0.048110,0.281868,0.249889,0.219325,0.191591,0.169517,0.162442,0.158197,0.150273,0.145179,...,0.119143,0.119143,0.119143,0.119143,0.118577,0.121407,0.121690,0.121973,0.121690,0.118294
394308105413800,0.619770,0.815040,0.888620,0.701840,0.648070,0.608450,0.792400,0.636750,0.713160,0.591470,...,0.489590,0.432990,0.481100,0.441480,0.407520,0.399030,0.430160,0.345260,0.342430,0.345260
394839104570300,1.267840,2.810190,1.239540,0.687690,0.478270,0.447140,0.427330,0.413180,0.393370,0.416010,...,0.500910,0.469780,0.444310,0.432990,0.416010,0.684860,0.469780,2.595110,1.448960,0.580150
401733105392404,0.071316,0.071316,0.069618,0.068203,0.066788,0.065373,0.063675,0.062260,0.060845,0.059147,...,0.041318,0.039903,0.038205,0.036790,0.035375,0.033677,0.032262,0.030847,0.030281,0.030281


In [15]:
### read in USGS streamflow gage metadata for all gages with flow in October
usgs_gage_metadata = pd.read_csv('USGS_WY2003_oct_metadata.csv',index_col=['site_id'])#, 
usgs_gage_metadata = usgs_gage_metadata.drop(columns=['Unnamed: 0'])
#usgs_all_gages = usgs_all_gages.transpose() #transpose df for easier plotting
usgs_gage_metadata.head()

Unnamed: 0_level_0,site_type,agency,site_name,site_id_agency,site_query_url,site_query_access_date,latitude,longitude,state,huc,...,basin_cd,topo_cd,instruments_cd,construction_dt,inventory_dt,drain_area_va,contrib_drain_area_va,local_time_fg,reliability_cd,project_no
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,stream gauge,USGS,"Allagash River near Allagash, Maine",1011000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.069722,-69.079444,ME,1010002,...,,,YNNNYNNNNYNNNNNNYNNNNNNNNNNNNN,,,1478.0,1229.0,Y,,442300100.0
1013500,stream gauge,USGS,"Fish River near Fort Kent, Maine",1013500,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,47.2375,-68.582778,ME,1010003,...,,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,873.0,873.0,Y,,442300100.0
1015800,stream gauge,USGS,"Aroostook River near Masardis, Maine",1015800,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.523056,-68.371667,ME,1010004,...,,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,892.0,892.0,Y,,442300100.0
1017000,stream gauge,USGS,"Aroostook River at Washburn, Maine",1017000,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.777222,-68.157222,ME,1010004,...,,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,1654.0,1654.0,Y,,442300100.0
1017550,stream gauge,USGS,"Williams Brook at Phair, Maine",1017550,https://waterservices.usgs.gov/nwis/site/?form...,2022-08-30,46.628056,-67.953056,ME,1010005,...,,,YNNNYNYNNYNNNNNNYNNNNNNNNNNNNN,,,3.82,3.82,Y,,442300100.0


In [16]:
# merge the CONUS2 and USGS dataframes so that we remove all gage locations that don't match between the two
combine_df = conus2_all_gages_cms.merge(usgs_all_gages_cms, on='site_id', how='inner')

In [26]:
combine_df.shape

(4418, 62)

In [23]:
## THIS WAS TO TRIM METADATA DF DOWN TO SHARED USGS-CONUS2 GAGES, BUT DON'T NEED IF REFERENCING THE PANDAS INDEX
# combine_df_meta = combine_df.merge(usgs_gage_metadata, on='site_id', how='inner')
# combine_df_meta
# metadata_oct = pd.DataFrame(combine_df_meta.iloc[:,63:])
# metadata_oct

In [27]:
#********* FIX these next lines, the dates are hardcoded in ************
# separate the CONUS2 flow from the USGS flow in the combine_df
conus2_flow_oct = pd.DataFrame(combine_df.iloc[:,0:31]) # get only CONUS2 flow
#conus2_flow_oct = conus2_flow_oct.transpose()
conus2_flow_oct.head()

Unnamed: 0_level_0,day 001,day 002,day 003,day 004,day 005,day 006,day 007,day 008,day 009,day 010,...,day 022,day 023,day 024,day 025,day 026,day 027,day 028,day 029,day 030,day 031
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,4.33912,3.903565,3.598231,3.739801,3.472192,2.964904,2.711756,2.519148,2.262331,2.004412,...,8.415931,9.427277,9.73238,19.49997,22.328073,21.454524,17.873519,15.879472,14.244763,12.567175
1013500,2.607411,2.592114,2.714427,2.122667,2.426324,1.780095,1.482548,1.307613,1.202425,1.285236,...,10.531182,10.841114,10.288957,10.600749,12.061306,16.507953,16.798331,14.819481,13.234019,11.639035
1015800,3.538157,3.513982,3.389648,3.025167,2.979538,2.794375,2.514015,2.242985,2.058773,2.187055,...,23.623506,33.084342,38.403587,35.18423,30.383635,27.200526,22.780802,20.339741,18.833875,17.10733
1017000,8.474701,8.57884,8.43332,8.120163,7.786723,7.23389,6.638142,6.070441,5.608575,5.285559,...,17.952113,33.277058,46.507889,49.1419,53.410219,58.835318,57.151723,53.937779,50.008866,44.393264
1017550,0.00011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.126239,0.07756,0.050756,0.034315,0.048476,0.110372,0.062056,0.042434,0.028833,0.021246


In [19]:
usgs_flow_oct = pd.DataFrame(combine_df.iloc[:,31:62]) #get only USGS flow 
#usgs_flow_oct = usgs_flow_oct.transpose()
usgs_flow_oct.head()

Unnamed: 0_level_0,2002-10-01,2002-10-02,2002-10-03,2002-10-04,2002-10-05,2002-10-06,2002-10-07,2002-10-08,2002-10-09,2002-10-10,...,2002-10-22,2002-10-23,2002-10-24,2002-10-25,2002-10-26,2002-10-27,2002-10-28,2002-10-29,2002-10-30,2002-10-31
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011000,7.4146,7.3863,6.9901,6.5656,6.3675,6.1977,5.7449,5.0657,4.528,4.4148,...,9.3107,8.5749,8.5466,8.6315,9.8201,12.5086,15.5084,16.5272,15.7914,14.3198
1013500,3.5092,3.4243,3.1696,3.113,3.3394,2.8583,2.8583,2.6319,2.4055,2.50455,...,5.2638,5.4053,5.66,5.9147,6.1977,6.9901,7.5561,7.3014,7.1882,7.2731
1015800,6.7071,6.226,5.7166,5.377,5.1506,5.0374,4.7544,4.4997,4.1601,4.1884,...,22.3004,19.0742,16.7253,14.9141,13.7255,12.9048,12.8482,13.7255,13.7255,13.0746
1017000,9.8767,9.3673,8.4617,7.9523,7.8108,7.1316,7.0467,6.6788,6.2826,6.2826,...,38.205,33.394,28.583,24.5078,21.7627,20.376,20.7156,21.791,21.7344,20.3477
1017550,0.023772,0.024338,0.020659,0.018395,0.01981,0.01981,0.018678,0.018678,0.018961,0.021791,...,0.044997,0.03679,0.036224,0.035092,0.030847,0.041884,0.051789,0.03962,0.031979,0.027451


In [25]:
### just testing some indexing here...
#usgs_flow_oct.index[7]
#usgs_flow_oct.loc[1011000]
#len(usgs_flow_oct.index)

In [24]:
### Make a plot of CONUS2 vs observed
xlength = range(31)

for i in range(1000):#range(len(usgs_flow_oct.index))
    gage = usgs_flow_oct.index[i]
    name = usgs_gage_metadata['site_name'][gage]
    #print(gage)
    fig, ax = plt.subplots(figsize=(7,5))
    ax.plot(xlength, conus2_flow_oct.loc[gage],label= 'CONUS2')
    ax.plot(xlength, usgs_flow_oct.loc[gage],label= 'USGS')
    ax.set_ylabel('flow [cms]')
    plt.xlabel('Days')
    plt.title(f'Mean Daily Flow - {name} [cms]')
    ax.legend()
    plt.savefig(f'{plot_dir}/Timeseries_{usgs_flow_oct.index[i]}_cms.png')
    plt.close()

### Calculate statistics

In [53]:
### Make a plot of CONUS2 vs observed

for i in range(1000):#range(len(usgs_flow_oct.index))
    gage = usgs_flow_oct.index[i]
    print(conus2_flow_oct.loc[gage])
    print(usgs_flow_oct.loc[gage])

day 001     4.339120
day 002     3.903565
day 003     3.598231
day 004     3.739801
day 005     3.472192
day 006     2.964904
day 007     2.711756
day 008     2.519148
day 009     2.262331
day 010     2.004412
day 011     1.773122
day 012     1.548321
day 013     1.385107
day 014     1.637810
day 015     1.481148
day 016     1.381073
day 017     1.832310
day 018     2.283993
day 019     3.026683
day 020     5.563889
day 021     6.369144
day 022     8.415931
day 023     9.427277
day 024     9.732380
day 025    19.499970
day 026    22.328073
day 027    21.454524
day 028    17.873519
day 029    15.879472
day 030    14.244763
day 031    12.567175
Name: 1011000, dtype: float64
2002-10-01     7.4146
2002-10-02     7.3863
2002-10-03     6.9901
2002-10-04     6.5656
2002-10-05     6.3675
2002-10-06     6.1977
2002-10-07     5.7449
2002-10-08     5.0657
2002-10-09     4.5280
2002-10-10     4.4148
2002-10-11     4.1884
2002-10-12     4.0469
2002-10-13     3.9337
2002-10-14     4.1601
2002-10-15 

In [None]:
gage = 1451000
name = 'Milk River at Nashua MT'
################################################

start_date = '2002-10-01'
end_date = '2002-10-31'
gage0 = str(gage).zfill(8) #add leading zeros again (they were dropped when saved to CSV)
# import streamflow and gage data #
url = 'https://nwis.waterdata.usgs.gov/usa/nwis/uv/?cb_00060=on&cb_00065=on&format=rdb&site_no='+gage0+'&period=&begin_date='+start_date+'&end_date='+end_date
#print(url)
obs_flow = pd.read_csv(url, sep='\t', skiprows=29)
obs_flow.columns = ['agency','site','datetime','timezone','discharge (cfs)','discharge approval']
print(obs_flow.head())
obs_flow = obs_flow.drop(columns = ['agency', 'site', 'timezone', 'discharge approval'])
obs_flow.set_index('datetime')

#convert cfs to cms
obs_flow['discharge (cms)'] = obs_flow['discharge (cfs)']*0.028316847 

obs_flow

In [None]:
len(obs_flow.index)/48

In [None]:
# aggregate 15 min flow into 
d = {'datetime': 'first','discharge (cfs)': 'sum', 'discharge (cms)': 'sum'}
obs_flow = obs_flow.groupby(obs_flow.index // 46).agg(d) #aggregate to hour
obs_flow

In [None]:
### Make a plot of CONUS2 vs observed
# xlength = range(28)
fig, ax = plt.subplots(figsize=(7,5))
ax.plot(range(len(conus2_flow.index)), conus2_flow[gage],label= 'CONUS2')
ax.plot(range(len(obs_flow['datetime'])), obs_flow['discharge (cms)'],label= 'USGS')
ax.set_ylabel('flow [cms]')
plt.xlabel('Simulated Hours')
plt.title(f'{name} [cms]')
ax.legend()
#plt.savefig(f'test_{name}_cms.png')

In [None]:
obs_flow['discharge (cms)']
len(conus2_flow.index)

In [None]:
conus2_flow[gage]

In [None]:
plt.plot(range(len(daily_flow)),daily_flow)
plt.xlabel("hours")
plt.ylabel("flow (m^3/s)")
plt.title("Total Daily Flow")