In [None]:
import pandas as pd
import numpy as np
import xarray as xr
import datetime as dt
import s3fs
import sys, os, glob,re
import multiprocessing as mp
import time as time
import fsspec
import geopandas

## Cubic feet to cubic meters conversion factor
cfs_2_cms = 0.0283168466

In [None]:
pnwNP = pd.read_csv("../data/pnwNPall_InfowStats.csv")

### Pull out sites
shp = geopandas.read_file("../data/VIC_UW/shapefiles/columbia_seg.shp")
shp = pnwNP.merge(shp,how = 'left',left_on='comid',right_on='POI_ID')

In [None]:
## Open NWM3.0
fs = fsspec.filesystem('s3', anon=True)
_file = fs.glob('noaa-nwm-retrospective-3-0-pds/CONUS/zarr/chrtout.zarr')

ds3 = xr.open_dataset(fs.get_mapper(_file[0]), engine='zarr', backend_kwargs={'consolidated': True})

In [None]:
def getModels(i):
    print(shp['gage'][i])

    try:
        ## ## Get NWM 3.0 data
        # slice all data using a specific reach identifier
        df = ds3.sel(feature_id=shp['comid'][i]).streamflow.persist() 

        NWM3 = pd.DataFrame(df.to_pandas()).resample('1d').mean()
        
        
        NWM3['time'] = pd.to_datetime(NWM3.index)
        NWM3['time'] = NWM3['time'].dt.tz_localize(None)
        NWM3.columns = ["streamflow_NWM3","time"]
        NWM3 = NWM3.reset_index(drop=True)
        
        
        ## Read in previously constructed streamflow and simulation data
        dat = pd.read_csv("../data/pnwNP_modeledData/"+str(shp['gage'][i])+".csv")
        dat['time'] = pd.to_datetime(dat['time'])

        ## Merge NWM3 and data
        datMain = pd.merge(dat,NWM3, on='time',how='outer')
        
        ## Weird rounding going on that writes to CSV so taking care of that here
        datMain["streamflow_NWIS"] = np.round(datMain["streamflow_NWIS"],4)

        ## Write to new directory
        datMain.to_csv('../data/pnwNP_modeledData_NWM3/'+str(shp['gage'][i])+".csv")
        

    except:
        print("No NWM data")
        pass

In [None]:
import multiprocess
cores = multiprocess.cpu_count()-4

In [None]:
from joblib import Parallel, delayed
Parallel(n_jobs=cores)(delayed(getModels)(i) for i in range(18, len(shp)))