# Russian River Step 2 -- simulation geometry 

In this step we will form the map-view simulation geometry.


In [None]:
# these can be turned on for development work
%load_ext autoreload
%autoreload 2

%matplotlib qt

In [None]:
# setting up logging first or else it gets preempted by another package
import watershed_workflow.ui
watershed_workflow.ui.setup_logging(1)

In [None]:
import os,sys
import logging
import numpy as np
from matplotlib import pyplot as plt
import shapely
import pandas as pd
import geopandas as gpd
pd.options.display.max_columns = None

import watershed_workflow 
import watershed_workflow.config
import watershed_workflow.sources
import watershed_workflow.mesh
import watershed_workflow.regions
import watershed_workflow.resampling

# set the default figure size for notebooks
plt.rcParams["figure.figsize"] = (8, 6)


## Input: Parameters and other source data


In [None]:
# Force Watershed Workflow to pull data from this directory rather than a shared data directory.
# This picks up the Coweeta-specific datasets set up here to avoid large file downloads for 
# demonstration purposes.
#
def splitPathFull(path):
    """
    Splits an absolute path into a list of components such that
    os.path.join(*splitPathFull(path)) == path
    """
    parts = []
    while True:
        head, tail = os.path.split(path)
        if head == path:  # root on Unix or drive letter with backslash on Windows (e.g., C:\)
            parts.insert(0, head)
            break
        elif tail == path:  # just a single file or directory
            parts.insert(0, tail)
            break
        else:
            parts.insert(0, tail)
            path = head
    return parts

cwd = splitPathFull(os.getcwd())
assert cwd[-1] == 'workflow'
cwd = cwd[:-1]

# Note, this directory is where downloaded data will be put as well
data_dir = os.path.join(*(cwd + ['input_data',]))
def toInput(filename):
    return os.path.join(data_dir, filename)

output_dir = os.path.join(*(cwd + ['output_data',]))
output_filenames = dict()
def fromOutput(filename):
    return os.path.join(output_dir, filename)    

def toOutput(role, filename):
    output_filenames[role] = filename
    return fromOutput(filename)

# check output and input dirs exist
if not os.path.isdir(data_dir):
    os.makedirs(data_dir, exist_ok=True)
if not os.path.isdir(output_dir):
    os.makedirs(output_dir, exist_ok=True)
       

In [None]:
# Set the data directory to the local space to get the locally downloaded files
# REMOVE THIS CELL for general use outside fo Coweeta
watershed_workflow.config.setDataDirectory(data_dir)


In [None]:
## Parameters cell -- this provides all parameters that can be changed via pipelining to generate a new watershed. 
name = 'RussianRiver'
hucs = ['18010110'] # a list of HUCs to run

# Geometric parameters
# -- parameters to clean and reduce the river network prior to meshing
prune_by_area = 20               # km^2
simplify = 200                   # length scale to target average edge 

# -- mesh triangle refinement control
refine_d0 = 200
refine_d1 = 600

refine_L0 = 200
refine_L1 = 500

refine_A0 = refine_L0**2 / 2
refine_A1 = refine_L1**2 / 2

# smooth angles
min_angle = 20


# Note that, by default, we tend to work in the DayMet CRS because this allows us to avoid
# reprojecting meteorological forcing datasets.
crs = watershed_workflow.crs.default_crs

# Load data from previous run

In [None]:
watershed_polys = gpd.read_parquet(fromOutput('01_watershed_polys.parquet'))
reaches = gpd.read_parquet(fromOutput('01_rivers.parquet'))

In [None]:
watershed_polys

In [None]:
# remove a few points that cause trouble later...
def removeSecondToLastCoordinate(reach_id):
    bad = reaches.index[reaches['ID'] == reach_id].values[0]
    
    old_ls = list(reaches.loc[bad, 'geometry'].coords)
    new_ls = shapely.geometry.LineString(old_ls[0:-2] + [old_ls[-1],])
    reaches.loc[bad, 'geometry'] = new_ls

removeSecondToLastCoordinate('8271033')
removeSecondToLastCoordinate('8272363')
removeSecondToLastCoordinate('8272363') # intentionally called twice


#removeSecondToLastCoordinate('8272379')
#removeSecondToLastCoordinate('8270663')
#removeSecondToLastCoordinate('8269099')
#removeSecondToLastCoordinate('8269105')
#removeSecondToLastCoordinate('8270661')
#removeSecondToLastCoordinate('8273681')
#removeSecondToLastCoordinate('8273681') # intentionally called twice
#removeSecondToLastCoordinate('8273271')


In [None]:
watershed_polys

In [None]:
print(len(reaches))
rivers = watershed_workflow.river_tree.createRivers(reaches, method='native')
print(len(rivers))

In [None]:
# mark the outlet reaches as do-not-merge
for river in rivers:
    river.df['do-not-merge'] = [0,] * len(river)

roots = [rivers[watershed_polys.loc[index, 'river_index']].getNode(watershed_polys.loc[index, 'reach_index'])
             for index in watershed_polys.index]
for outlet, root in zip(watershed_polys.index, roots):
    if watershed_polys.loc[outlet].location_on_reach == 0:
        root['do-not-merge'] = -1
    elif watershed_polys.loc[outlet].location_on_reach == 1:
        root['do-not-merge'] = 1
    else:
        assert False


## Compute disjoint subcatchments

In [None]:
watersheds = watershed_workflow.split_hucs.SplitHUCs(watershed_polys)

In [None]:
# this generates a zoomable map, showing different reaches and watersheds, 
# with discrete points.  Problem areas are clickable to get IDs for manual
# modifications.
m = watersheds.explore(marker=True, marker_size=10)

for river in rivers:
    m = river.explore(column=None, m=m, color='black', name=river['name'], marker=True, marker_size=10)

#m = gpd.GeoDataFrame(geometry=[shapely.geometry.Point(point)], crs=watersheds.crs).explore(m=m, color='k', marker_size=100)

m = watershed_workflow.makeMap(m)
m

#fig, ax = plt.subplots(1,1)
#watersheds.plot(ax=ax, color='k')
#rivers[0].plot(ax=ax, color='b')
#plt.show()

In [None]:
# remove bad points on watersheds
old_ls = list(watersheds.linestrings[30].coords)
new_ls = shapely.geometry.LineString(old_ls[0:80]+old_ls[85:])
watersheds.linestrings[30] = new_ls


old_ls = list(watersheds.linestrings[44].coords)
new_ls = shapely.geometry.LineString(old_ls[0:96]+old_ls[97:])
watersheds.linestrings[44] = new_ls

# must update as we have modified geometry
watersheds.update()

In [None]:
# simplifying 
watershed_workflow.simplify(watersheds, rivers, 
                            reach_segment_target_length=refine_L0,
                            huc_segment_target_length=refine_L1,
                            river_close_distance=refine_d0,
                            river_far_distance=refine_d1,
                            snap_triple_junctions_tol=refine_L0,
                            min_angle=min_angle,
                            junction_min_angle=min_angle
                           )

# greatly shrunk the rivers... shrink the dataframe too
for river in rivers:
    river.resetDataFrame()



In [None]:
watersheds.df

In [None]:
# this generates a zoomable map, showing different reaches and watersheds, 
# with discrete points.  Problem areas are clickable to get IDs for manual
# modifications.
m = watersheds.explore(marker=True, marker_size=10)

for river in rivers:
    m = river.explore(column=None, m=m, color='black', name=river['name'], marker=True, marker_size=10)

m = watershed_workflow.makeMap(m)
m

In [None]:
# move one last point... Santa Rosa does weird stuff...
old_ls = list(watersheds.linestrings[57].coords)
new_ls = shapely.geometry.LineString(old_ls[:-1] + [list(watersheds.linestrings[62].coords)[7],])
watersheds.linestrings[57] = new_ls

watersheds.update()

In [None]:
# save this final version of rivers and watersheds to disk
import pickle
with open(toOutput('watershed_polys', '02_watersheds.pickle'), 'wb') as fid:
    pickle.dump(watersheds, fid)

river_df = gpd.GeoDataFrame(pd.concat([r.to_dataframe() for r in rivers]), crs=crs)
river_df.to_parquet(toOutput('rivers', '02_rivers.parquet'))

In [None]:
# save output filenames
with open(toOutput('02_output_filenames', '02_output_filenames.txt'), 'wb') as fid:
    pickle.dump(output_filenames, fid)