In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as pltc
import datetime as dt
from scipy import stats
from sklearn import preprocessing
import matplotlib as mpl
from scipy.stats import skew
import geopandas
import seaborn as sns

## Plotting parameters
mpl.rcParams['pdf.fonttype'] = 42

## Cubic feet to cubic meters conversion factor
cfs_2_cms = 0.0283168466

In [2]:
def readModelFile(i):
    try:
        df = pd.read_csv('../data/pnwNP_modeledData_NWM3/'+str(pnwNP['gage'][i])+".csv")
        df['time'] = pd.to_datetime(df['time'])
       
    ### Deliniate time periods
        df['climate_year'] = df.time.dt.year.where(df.time.dt.month < 4, df.time.dt.year + 1)
        df.loc[(df.time.dt.month > 7) & (df.time.dt.month < 10), "flow_season"] = "late_summer"
        df.loc[(df.time.dt.month > 2) & (df.time.dt.month < 6), "season"] = "spring"
        df.loc[(df.time.dt.month > 5) & (df.time.dt.month < 9), "season"] = "summer"
        df.loc[(df.time.dt.month > 8) & (df.time.dt.month < 12), "season"] = "fall"
        df.loc[(df.time.dt.month > 11), "season"]  = "winter"
        df.loc[(df.time.dt.month < 3), "season"] = "winter"
        df['month'] = df.time.dt.month
        df["DOY"] = df.time.dt.day_of_year
        
        # df = df.drop(['Unnamed: 0.1'],axis=1)
        df = df[df["streamflow_NWIS"].notna()].reset_index(drop=True)
        return df

    except:
        pass

In [3]:
def calc7Q10(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate frequency statistics.
    '''
    

    stat = "7Q10"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:
        
        #### Round columns to nearest 0.1 cfs, therefore anything less than 0.05 cfs is q=0
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df[column] = (df[column]*cfs_2_cms)+epsil
        else:
            pass
        
        df.loc[~df[column].isnull(),column] = df[column]+epsil

        ## Calculate 7-day rolling average 
        df['7Q10'] = df[column].rolling(window=7).mean()


        lf7q10 = df[['climate_year','7Q10']]
        lf7q10 = lf7q10.groupby(['climate_year']).min()
        lf7q10 = lf7q10[df['7Q10'].isnull().groupby(df['climate_year']).sum() <= 36.6] ## makes sure that gage has at least 90% of year with data

        lf7q10["RI"] = (1+len(lf7q10))/lf7q10['7Q10'].rank(method = "min")
        lf7q10["ExcProb"] = 1/lf7q10["RI"]


        #Measures of the distribution
        Xbar = np.mean(np.log10(lf7q10['7Q10']))
        S = np.std(np.log10(lf7q10['7Q10']))
        g = skew(np.log10(lf7q10['7Q10']))

        ## Calculate Pearson type 3 coeff 
        lf7q10["z"] = 4.91 * ((1 / lf7q10["RI"])**0.14 -(1 - 1 / lf7q10["RI"])**0.14)
        lf7q10["K"] = (2/g) * ((((lf7q10["z"] - (g/6))*(g/6)+1)**3)-1)
        lf7q10["Qfit"] = 10**(Xbar + (lf7q10["K"] * S))

        ## Calculate 7q#
        RetIntCalc = 10
        z = 4.91 * ((1 / RetIntCalc)**0.14 -(1 - 1 / RetIntCalc)**0.14)
        K = (2/g) * ((((z - (g/6))*(g/6)+1)**3)-1)
        calc7q10 = 10**(Xbar + (K * S))
        out.extend([calc7q10])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out


In [4]:
def calc7Q2(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate frequency statistics.
    '''
    

    stat = "7Q2"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:
       
        #### Round columns to nearest 0.1 cfs, therefore anything less than 0.05 cfs is q=0
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df[column] = (df[column]*cfs_2_cms)+epsil
        else:
            pass
        
        df.loc[~df[column].isnull(),column] = df[column]+epsil


        ## Calculate 7-day rolling average 
        df['7Q10'] = df[column].rolling(window=7).mean()


        lf7q10 = df[['climate_year','7Q10']]
        lf7q10 = lf7q10.groupby(['climate_year']).min()
        lf7q10 = lf7q10[df['7Q10'].isnull().groupby(df['climate_year']).sum() <= 36.6] ## makes sure that gage has at least 90% of year with data

        lf7q10["RI"] = (1+len(lf7q10))/lf7q10['7Q10'].rank(method = "min")
        lf7q10["ExcProb"] = 1/lf7q10["RI"]


        #Measures of the distribution
        Xbar = np.mean(np.log10(lf7q10['7Q10']))
        S = np.std(np.log10(lf7q10['7Q10']))
        g = skew(np.log10(lf7q10['7Q10']))

        ## Calculate Pearson type 3 coeff 
        lf7q10["z"] = 4.91 * ((1 / lf7q10["RI"])**0.14 -(1 - 1 / lf7q10["RI"])**0.14)
        lf7q10["K"] = (2/g) * ((((lf7q10["z"] - (g/6))*(g/6)+1)**3)-1)
        lf7q10["Qfit"] = 10**(Xbar + (lf7q10["K"] * S))

        ## Calculate 7q#
        RetIntCalc = 2
        z = 4.91 * ((1 / RetIntCalc)**0.14 -(1 - 1 / RetIntCalc)**0.14)
        K = (2/g) * ((((z - (g/6))*(g/6)+1)**3)-1)
        calc7q10 = 10**(Xbar + (K * S))
        out.extend([calc7q10])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out


In [5]:
def calcZeroFlow(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate number of late-summer low flow days
    '''
    

    stat = "ZeroFlow"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:
        
        #### Round columns to nearest 0.1 cfs, therefore anything less than 0.05 cfs is q=0
        #### Round columns to add zeros
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df.loc[df[column]==0,"new"] = 0
        else:
            df.loc[df[column]<=(df[column].mean()*.1),"new"] = 0

        ## Calculate number of zero flow days by season
        lsq10d = df[['climate_year','season','new']]
        if df["new"].isnull().all():
            lsq10d = np.nan
        else:
            lsq10d = lsq10d.groupby(['season',"climate_year"]).count()
            lsq10d = lsq10d['new'].mean()
        df = df.drop(['new'],axis=1,errors='ignore')
        out.extend([lsq10d])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out


In [6]:
def calcLowFlow(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate number of annual low flow days
    '''
    

    stat = "LowFlow"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:

        #### Round columns to add zeros
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df[column] = (df[column]*cfs_2_cms)
        else:
            pass
        
        df.loc[df[column]<=(df[column].quantile(0.25)),"new"] = 1

        ## Calculate number of zero flow days by season
        lsq10d = df[['climate_year','season','new']]
        if df["new"].isnull().all():
            lsq10d = np.nan
        else:
            lsq10d = lsq10d.groupby(['season',"climate_year"]).count()
            lsq10d = lsq10d['new'].mean()

        df = df.drop(['new'],axis=1,errors='ignore')
        out.extend([lsq10d])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out



In [7]:
def calcZeroFlowSummer(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate number of late-summer low flow days
    '''
    

    stat = "ZeroFlowSummer"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:

        #### Round columns to add zeros
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df.loc[df[column]==0,"new"] = 0
        else:
            df.loc[df[column]<=(df[column].mean()*.1),"new"] = 0

        ## Calculate number of zero flow days by season
        lsq10d = df[['climate_year','new',"flow_season"]]
        if df["new"].isnull().all():
            lsq10d = np.nan
        else:
            lsq10d = lsq10d.groupby(['climate_year',"flow_season"]).count()
            lsq10d = lsq10d['new'].mean()

        df = df.drop(['new'],axis=1,errors='ignore')
        out.extend([lsq10d])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out


In [8]:
def calcLowFlowSummer(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate number of late-summer low flow days
    '''
    

    stat = "LowFlowSummer"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:

        #### Round columns to add zeros
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df[column] = (df[column]*cfs_2_cms)
        else:
            pass
        
        df.loc[df[column]<=(df[column].quantile(0.25)),"new"] = 1

        ## Calculate number of zero flow days by season
        lsq10d = df[['climate_year','new',"flow_season"]]
        if df["new"].isnull().all():
            lsq10d = np.nan
        else:
            lsq10d = lsq10d.groupby(['climate_year',"flow_season"]).count()
            lsq10d = lsq10d['new'].mean()

        df = df.drop(['new'],axis=1,errors='ignore')
        out.extend([lsq10d])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out



In [9]:
def calclsBFI(df):
    import warnings
    warnings.filterwarnings("ignore")
    '''
    Read in data files for gages and calculate late-summer baseflow index
    '''
    

    stat = "LateSumBFI"
    columns = ["gage","VIC_"+stat,"PRMS_"+stat,"NWM2d0_"+stat,"NWM2d1_"+stat,"NWM3"+stat,"NWIS_"+stat]
    out = []

    for column in df[["streamflow_VIC","streamflow_PRMS",'streamflow_NWM2d0','streamflow_NWM2d1','streamflow_NWM3','streamflow_NWIS']]:
        #### Round columns to add zeros
        
        if column == "streamflow_NWIS":
            df[column] = df[column]/cfs_2_cms
            df[column] = np.round(df[column],1)
            df[column] = (df[column]*cfs_2_cms)+epsil
        else:
            pass
        
        ## Calculate late-summer baseflow index
        if df["new"].isnull().all():
            lsBFI = np.nan
        else:
            df['7Q10'] = df[column].rolling(window=7).mean()
            df['mean_flow'] = df[column].mean()
            lsBFI = df[['climate_year','7Q10','mean_flow','flow_season']]
            lsBFI = lsBFI.groupby(['climate_year','flow_season']).min()
            lsBFI = (lsBFI['7Q10']/lsBFI['mean_flow']).mean()
        
        out.extend([lsBFI])

    out.insert(0,int(df["gage"][0]))
    out = pd.DataFrame(out).transpose()
    out.columns = columns
    return out

In [12]:
from tqdm import tqdm

pnwNP = pd.read_csv("../data/pnwNP_StatsLocations_HydroClass.csv")
epsil = 1e-4

output = pd.DataFrame([])
for i in tqdm(range(len(pnwNP))):
    df = readModelFile(i)
    out = calc7Q10(df)
    out = out.merge(calc7Q2(df), how='left',on='gage') 
    out = out.merge(calcZeroFlow(df), how='left',on='gage')
    out = out.merge(calcLowFlow(df), how='left',on='gage')
    out = out.merge(calclsBFI(df), how='left',on='gage')
    out = out.merge(calcZeroFlowSummer(df), how='left',on='gage')
    out = out.merge(calcLowFlowSummer(df), how='left',on='gage')
    output = pd.concat([output, out], ignore_index=True)
    del(out,df)
    


100%|████████████████████████████████████████████████████████████████████████████████| 467/467 [01:30<00:00,  5.15it/s]


In [13]:
output.to_csv("../data/streamflowStats_NWM3.csv")