In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas
import xarray as xr
import matplotlib.colors as pltc
import geopandas
import datetime as dt
from scipy import stats
from sklearn import preprocessing
import s3fs
import sys, os, glob,re
import multiprocessing as mp
import time as time
import fsspec
from joblib_progress import joblib_progress
from joblib import Parallel, delayed

## Cubic feet to cubic meters conversion factor
cfs_2_cms = 0.0283168466

In [2]:
pnwNP = pd.read_csv("../data/pnwNPall_InfowStats.csv")

### Pull out sites
shp = geopandas.read_file("../data/VIC_UW/shapefiles/columbia_seg.shp")
shp = pnwNP.merge(shp,how = 'left',left_on='comid',right_on='POI_ID')


In [3]:
## Open modeled datasets (VIC and PRMS) for just reach ID and runoff
pnwVIC = xr.open_mfdataset('../data/VIC_UW/vic_historical_first_route_all.nc')[['reachID','IRFroutedRunoff']]


pnwPRMS = xr.open_mfdataset('../data/VIC_UW/prms_historical_first_route_all.nc')[['reachID','IRFroutedRunoff']]

## Open NWM2.0
## Open NWM from NOAA AWS bucket
s3_path = 's3://noaa-nwm-retro-v2-zarr-pds' #nwm 2.0

# Connect to S3
s3 = s3fs.S3FileSystem(anon=True)
store = s3fs.S3Map(root=s3_path, s3=s3, check=False)

# load the dataset
ds = xr.open_zarr(store=store, consolidated=True)


## Open NWM2.1
fs = fsspec.filesystem('s3', anon=True)
_file = fs.glob('noaa-nwm-retrospective-2-1-zarr-pds/chrtout.zarr')

dsd1 = xr.open_dataset(fs.get_mapper(_file[0]), engine='zarr', backend_kwargs={'consolidated': True})


In [4]:
# for i in range(len(shp)):
def getModels(i):
    print(shp['gage'][i])
    fields = ['X_00060_00003', 'Date']

    ## Get Vic data
    VIC = pnwVIC.where(pnwVIC['reachID']==shp['seg_id'][i],drop=True).to_dataframe()
    VIC = VIC.drop(['reachID'],axis=1)
    VIC= VIC.droplevel('seg')
    VIC['time'] = pd.to_datetime(VIC.index,)
    VIC['time'] = VIC['time'].dt.tz_localize(None)
    VIC = VIC.reset_index(drop=True)
    VIC.columns = ["streamflow_VIC","time"]



    ## Get PRMS data
    PRMS = pnwPRMS.where(pnwPRMS['reachID']==shp['seg_id'][i],drop=True).to_dataframe()
    PRMS = PRMS.drop(['reachID'],axis=1)
    PRMS= PRMS.droplevel('seg')
    PRMS['time'] = pd.to_datetime(PRMS.index,)
    PRMS['time'] = PRMS['time'].dt.tz_localize(None)
    PRMS = PRMS.reset_index(drop=True)
    PRMS.columns = ["streamflow_PRMS","time"]


    # ## Combine
    datMain = pd.merge(VIC,PRMS, on='time',how='outer')

    try:
        ## Get NWM 2.0 data
        # slice all data using a specific reach identifier
        dat = ds.sel(feature_id=shp['comid'][i]).streamflow.persist() 

        # This step takes a bit longer because it's actually returning the data
        dat = dat.resample(time='1d').mean()

        NWM = pd.DataFrame(dat.to_pandas())
        NWM['time'] = pd.to_datetime(NWM.index)
        NWM['time'] = NWM['time'].dt.tz_localize(None)
        NWM.columns = ["streamflow_NWM2d0","time"]
        NWM = NWM.reset_index(drop=True)


        datMain = pd.merge(datMain,NWM,on='time',how='outer')


        ## Get NWM 2.1 data
        # slice all data using a specific reach identifier
        df = dsd1.sel(feature_id=shp['comid'][i]).streamflow.persist() 

        NWM2d1 = pd.DataFrame(df.to_pandas()).resample('1d').mean()
        NWM2d1['time'] = pd.to_datetime(NWM2d1.index)
        NWM2d1['time'] = NWM2d1['time'].dt.tz_localize(None)
        NWM2d1.columns = ["streamflow_NWM2d1","time"]
        NWM2d1 = NWM2d1.reset_index(drop=True)


        # Combine with previous modeled data
        datMain = pd.merge(datMain,NWM2d1, on='time',how='outer')

        # ## Get NWIS Data
        path  = "../data/NWIS_streamflow/"
        gage = '*'+str(shp['gage'][i])+'*'
        file = glob.glob("../data/NWIS_streamflow/daily/"+gage)[0]


        fields = ['00060_Mean','datetime']
        NWIS = pd.read_csv(file,usecols=fields)
        NWIS.columns = ["time","streamflow_NWIS"]
        NWIS['time'] = pd.to_datetime(NWIS['time']).dt.tz_localize(None)
        NWIS["streamflow_NWIS"] = NWIS["streamflow_NWIS"]*cfs_2_cms

        ## Combine all the data
        datMain = pd.merge(datMain,NWIS, on='time',how='outer')
        datMain["gage"] = shp['gage'][i]


        datMain.sort_values(by='time',ascending=True) \
        .reset_index(drop=True) \
        .to_csv('../data/pnwNP_modeledData/'+str(shp['gage'][i])+".csv")

    except:
        print("No NWM data")
        pass

In [None]:
from tqdm import tqdm
for i in tqdm(range(6,len(shp))):
    getModels(i)

  0%|          | 0/612 [00:00<?, ?it/s]

10390001


  0%|          | 1/612 [04:53<49:53:44, 293.98s/it]

10393500


  0%|          | 2/612 [10:17<52:45:56, 311.40s/it]

10395000


  0%|          | 3/612 [14:48<49:34:25, 293.05s/it]

10395500


  1%|          | 4/612 [19:20<48:05:38, 284.77s/it]

10402000


  1%|          | 5/612 [23:48<46:58:26, 278.59s/it]

10403000


  1%|          | 6/612 [28:34<47:18:22, 281.03s/it]

10406500


  1%|          | 7/612 [33:07<46:49:04, 278.59s/it]

11533000


  1%|▏         | 8/612 [33:24<32:44:06, 195.11s/it]

No NWM data
12020900


  1%|▏         | 9/612 [39:47<42:32:33, 253.99s/it]

12025300


  2%|▏         | 10/612 [46:04<48:49:27, 291.97s/it]

12030000


  2%|▏         | 11/612 [52:50<54:32:30, 326.71s/it]

12034200


  2%|▏         | 12/612 [59:01<56:42:16, 340.23s/it]

12036650


  2%|▏         | 13/612 [1:05:19<58:30:16, 351.61s/it]

12043173


  2%|▏         | 14/612 [1:12:05<61:09:04, 368.13s/it]

12043190


  2%|▏         | 15/612 [1:16:40<56:25:18, 340.23s/it]

12057500


  3%|▎         | 16/612 [1:21:54<54:59:42, 332.19s/it]

12058000


  3%|▎         | 17/612 [1:26:39<52:33:53, 318.04s/it]

12058500


  3%|▎         | 18/612 [1:31:28<51:01:53, 309.28s/it]

12059000


  3%|▎         | 19/612 [1:36:25<50:21:14, 305.69s/it]

12063000


  3%|▎         | 20/612 [1:41:13<49:23:12, 300.33s/it]

12064500


  3%|▎         | 21/612 [1:46:02<48:45:01, 296.96s/it]

12065000


  4%|▎         | 22/612 [1:50:55<48:26:17, 295.56s/it]

12065500


  4%|▍         | 23/612 [1:55:47<48:11:36, 294.56s/it]

12066000


  4%|▍         | 24/612 [2:00:44<48:15:54, 295.50s/it]

12067000


  4%|▍         | 25/612 [2:05:46<48:28:35, 297.30s/it]

12067500


  4%|▍         | 26/612 [2:10:38<48:07:26, 295.64s/it]

12072000


  4%|▍         | 27/612 [2:15:02<46:31:12, 286.28s/it]

12078650


  5%|▍         | 28/612 [2:15:19<33:18:13, 205.30s/it]

No NWM data
12089208


  5%|▍         | 29/612 [2:15:34<24:02:47, 148.49s/it]

No NWM data
12090200


  5%|▍         | 30/612 [2:20:33<31:16:52, 193.49s/it]

12090365


  5%|▌         | 31/612 [2:25:07<35:08:56, 217.79s/it]

12090400


  5%|▌         | 32/612 [2:29:40<37:43:58, 234.20s/it]

12090500


  5%|▌         | 33/612 [2:34:02<38:59:15, 242.41s/it]

12091050


  6%|▌         | 34/612 [2:39:20<42:35:28, 265.27s/it]

12091060


  6%|▌         | 35/612 [2:44:12<43:48:14, 273.30s/it]

12091070


  6%|▌         | 36/612 [2:48:19<42:28:13, 265.44s/it]

12091100


  6%|▌         | 37/612 [2:52:40<42:10:13, 264.02s/it]

12091180


  6%|▌         | 38/612 [2:57:11<42:24:29, 265.97s/it]

12091200


  6%|▋         | 39/612 [3:02:28<44:46:24, 281.30s/it]

12091700


  7%|▋         | 40/612 [3:07:40<46:11:19, 290.70s/it]

12098920


  7%|▋         | 41/612 [3:12:32<46:08:16, 290.89s/it]

12099000


  7%|▋         | 42/612 [3:12:52<33:11:26, 209.63s/it]

No NWM data
12101100


  7%|▋         | 43/612 [3:13:11<24:05:47, 152.46s/it]

No NWM data
12102025


  7%|▋         | 44/612 [3:18:01<30:35:44, 193.92s/it]

12102190


  7%|▋         | 45/612 [3:23:14<36:09:36, 229.59s/it]

12102900


  8%|▊         | 46/612 [3:27:24<37:02:59, 235.65s/it]

12102920


  8%|▊         | 47/612 [3:31:49<38:22:23, 244.50s/it]

12103210


  8%|▊         | 48/612 [3:36:00<38:35:51, 246.37s/it]

12103212


  8%|▊         | 49/612 [3:36:16<27:43:06, 177.24s/it]

No NWM data
12103220


  8%|▊         | 50/612 [3:40:33<31:25:38, 201.31s/it]

12105710


  8%|▊         | 51/612 [3:44:35<33:16:44, 213.55s/it]

12106000


  8%|▊         | 52/612 [3:48:37<34:32:29, 222.05s/it]

12107300


  9%|▊         | 53/612 [3:53:33<37:54:56, 244.18s/it]

12111500


  9%|▉         | 54/612 [3:57:38<37:51:56, 244.30s/it]

12113346


  9%|▉         | 55/612 [4:01:53<38:20:07, 247.77s/it]

12113347


  9%|▉         | 56/612 [4:06:00<38:13:31, 247.50s/it]

12113349


  9%|▉         | 57/612 [4:06:16<27:26:17, 177.98s/it]

No NWM data
12115700


  9%|▉         | 58/612 [4:10:53<31:57:38, 207.69s/it]

12115800


 10%|▉         | 59/612 [4:15:25<34:52:42, 227.06s/it]

12116100


 10%|▉         | 60/612 [4:20:06<37:18:11, 243.28s/it]

12116500


 10%|▉         | 61/612 [4:24:41<38:40:17, 252.66s/it]

12116800


 10%|█         | 62/612 [4:29:12<39:26:33, 258.17s/it]

12117800


 10%|█         | 63/612 [4:34:00<40:43:16, 267.03s/it]

12117820


 10%|█         | 64/612 [4:38:52<41:48:15, 274.63s/it]

12118300


 11%|█         | 65/612 [4:39:08<29:55:31, 196.95s/it]

No NWM data
12119730


 11%|█         | 66/612 [4:39:23<21:36:18, 142.45s/it]

No NWM data
12120500


 11%|█         | 67/612 [4:44:03<27:48:11, 183.65s/it]

12121700


 11%|█         | 68/612 [4:48:44<32:09:19, 212.79s/it]

12121720


 11%|█▏        | 69/612 [4:53:18<34:52:43, 231.24s/it]

12121830


 11%|█▏        | 70/612 [4:58:03<37:13:28, 247.25s/it]

12125500


 12%|█▏        | 71/612 [5:02:44<38:42:08, 257.54s/it]

12126900


 12%|█▏        | 72/612 [5:07:36<40:10:35, 267.84s/it]

12128000


 12%|█▏        | 73/612 [5:12:13<40:31:31, 270.67s/it]

12142200


 12%|█▏        | 74/612 [5:16:25<39:35:46, 264.96s/it]

12143700


 12%|█▏        | 75/612 [5:21:29<41:16:22, 276.69s/it]

12148700


 12%|█▏        | 76/612 [5:25:53<40:37:01, 272.80s/it]

12153000


 13%|█▎        | 77/612 [5:30:53<41:46:49, 281.14s/it]

12154000


 13%|█▎        | 78/612 [5:35:45<42:10:36, 284.34s/it]

12154500


 13%|█▎        | 79/612 [5:40:02<40:52:34, 276.09s/it]

12157025


 13%|█▎        | 80/612 [5:44:18<39:55:22, 270.16s/it]

12157250


 13%|█▎        | 81/612 [5:48:21<38:36:54, 261.80s/it]

12158010


 13%|█▎        | 82/612 [5:52:41<38:28:51, 261.38s/it]

12158032


 14%|█▎        | 83/612 [5:56:54<38:01:46, 258.80s/it]

12169500


 14%|█▎        | 84/612 [6:01:18<38:10:39, 260.30s/it]

12181090


 14%|█▍        | 85/612 [6:05:41<38:13:39, 261.14s/it]

12181100


 14%|█▍        | 86/612 [6:11:26<41:50:24, 286.36s/it]

12181200


 14%|█▍        | 87/612 [6:16:32<42:36:50, 292.21s/it]

12197020


 14%|█▍        | 88/612 [6:21:03<41:37:36, 285.99s/it]

12197040


 15%|█▍        | 89/612 [6:25:23<40:24:28, 278.14s/it]

12197110


 15%|█▍        | 90/612 [6:29:50<39:50:06, 274.72s/it]

12197680


 15%|█▍        | 91/612 [6:34:11<39:11:04, 270.76s/it]

12197700


 15%|█▌        | 92/612 [6:38:36<38:52:12, 269.10s/it]

12199800


 15%|█▌        | 93/612 [6:42:57<38:25:17, 266.51s/it]

12201950


 15%|█▌        | 94/612 [6:47:35<38:51:32, 270.06s/it]

12201960


 16%|█▌        | 95/612 [6:52:22<39:30:25, 275.10s/it]

12202000


 16%|█▌        | 96/612 [6:57:00<39:32:00, 275.82s/it]

12202300


 16%|█▌        | 97/612 [7:01:44<39:48:53, 278.32s/it]

12202310


 16%|█▌        | 98/612 [7:06:34<40:13:52, 281.77s/it]

12202400


 16%|█▌        | 99/612 [7:06:51<28:51:46, 202.55s/it]

No NWM data
12202450


 16%|█▋        | 100/612 [7:07:08<20:51:47, 146.69s/it]

No NWM data
12203000


 17%|█▋        | 101/612 [7:11:55<26:49:39, 189.00s/it]

12210900


 17%|█▋        | 102/612 [7:17:58<34:09:16, 241.09s/it]

12212430


 17%|█▋        | 103/612 [7:18:14<24:32:05, 173.53s/it]

No NWM data
12214000


 17%|█▋        | 104/612 [7:22:59<29:13:23, 207.09s/it]

12301550


 17%|█▋        | 105/612 [7:27:00<30:35:56, 217.27s/it]

12323170


 17%|█▋        | 106/612 [7:31:41<33:13:59, 236.44s/it]

12323200


 17%|█▋        | 107/612 [7:35:56<33:55:35, 241.85s/it]

12323700


 18%|█▊        | 108/612 [7:40:22<34:52:54, 249.16s/it]

12323710


 18%|█▊        | 109/612 [7:44:36<35:01:17, 250.65s/it]

12323770


 18%|█▊        | 110/612 [7:49:05<35:42:27, 256.07s/it]

12323840


 18%|█▊        | 111/612 [7:53:46<36:41:49, 263.69s/it]

12327100


 18%|█▊        | 112/612 [7:58:09<36:33:47, 263.26s/it]

12341000


 18%|█▊        | 113/612 [8:02:37<36:41:15, 264.68s/it]

12342500


 19%|█▊        | 114/612 [8:06:57<36:26:46, 263.47s/it]

12345000


 19%|█▉        | 115/612 [8:11:31<36:47:05, 266.45s/it]

12345500


 19%|█▉        | 116/612 [8:11:47<26:21:01, 191.25s/it]

No NWM data
12353820


 19%|█▉        | 117/612 [8:16:02<28:57:25, 210.60s/it]

12370900


 19%|█▉        | 118/612 [8:20:21<30:52:48, 225.04s/it]

12374800


 19%|█▉        | 119/612 [8:25:47<34:59:01, 255.46s/it]

12388650


 20%|█▉        | 120/612 [8:30:11<35:15:09, 257.95s/it]

12392155


 20%|█▉        | 121/612 [8:34:27<35:05:18, 257.27s/it]

12392895


 20%|█▉        | 122/612 [8:38:40<34:51:16, 256.07s/it]

12402500


 20%|██        | 123/612 [8:42:51<34:33:12, 254.38s/it]

12407000


 20%|██        | 124/612 [8:47:04<34:27:13, 254.17s/it]

12407700


 20%|██        | 125/612 [8:51:17<34:20:29, 253.86s/it]

12408420


 21%|██        | 126/612 [8:55:27<34:06:08, 252.61s/it]

12413140


 21%|██        | 127/612 [9:00:17<35:33:14, 263.91s/it]

12413360


 21%|██        | 128/612 [9:04:38<35:20:51, 262.92s/it]

12415250


 21%|██        | 129/612 [9:08:46<34:39:16, 258.30s/it]

12415285


 21%|██        | 130/612 [9:13:02<34:29:26, 257.61s/it]

12415290


 21%|██▏       | 131/612 [9:17:28<34:45:19, 260.12s/it]

12418000


 22%|██▏       | 132/612 [9:17:43<24:54:53, 186.86s/it]

No NWM data
12418500


 22%|██▏       | 133/612 [9:17:59<18:01:21, 135.45s/it]

No NWM data
12422950


 22%|██▏       | 134/612 [9:22:15<22:46:12, 171.49s/it]

12422990


 22%|██▏       | 135/612 [9:26:24<25:50:19, 195.01s/it]

12424000


 22%|██▏       | 136/612 [9:30:37<28:03:39, 212.23s/it]

12433100


 22%|██▏       | 137/612 [9:34:45<29:24:42, 222.91s/it]

12433561


 23%|██▎       | 138/612 [9:38:50<30:14:10, 229.64s/it]

12435500


 23%|██▎       | 139/612 [9:42:57<30:51:07, 234.82s/it]

12438900


 23%|██▎       | 140/612 [9:47:16<31:44:11, 242.06s/it]

12438905


 23%|██▎       | 141/612 [9:51:24<31:53:26, 243.75s/it]

12439300


 23%|██▎       | 142/612 [9:55:38<32:15:03, 247.03s/it]

12439500


 23%|██▎       | 143/612 [9:59:56<32:36:48, 250.34s/it]

12442200


 24%|██▎       | 144/612 [10:00:13<23:25:21, 180.17s/it]

No NWM data
12444100


 24%|██▎       | 145/612 [10:04:24<26:08:14, 201.49s/it]

12444290


 24%|██▍       | 146/612 [10:08:40<28:11:30, 217.79s/it]

12444490
