In [None]:
# Import the packages needed
import ee
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import glob
import os
import numpy as np
from scipy.spatial import cKDTree

# Initialize 
ee.Initialize()

In [None]:
## This code is used to pull in the RWC-Temp data,  snap it to its 5 closest SWORD Nodes, and save the CSV files ## 

In [None]:
##################################
###### Bring in & Prep the Data #######
##################################

In [None]:
### Pull in Dams File ###
Dams = gpd.read_file(r"F:\Insert_File_Path_of_Shapefile_with_Dam_Locations.shp") # Update this file path

# Create a list of  dams for loop 
Dams_List = Dams['grod_id'].tolist()
Dams_List= list(map(int, Dams_List))

In [None]:
## Bring in SWORD Nodes 
SWORD_Nodes = gpd.read_file(r"Insert_File_Path_of_the_Shapefile_Containing_the_Selected_SWORD_Nodes.shp") # Update this file path

## Prep the Data ## 
# Rename Width Column
SWORD_Nodes.rename(columns={"width":"SWD_wid"}, inplace = True)

# Convert Dam Distances to km
SWORD_Nodes['Dam_Dist_km'] = SWORD_Nodes['Dam_Dist']/1000

## Add in Upstream & Downstream flags
# Define Up/DS Function
def get_upds(Dam_Distance):
    if Dam_Distance == 0 :
        return 'Dam'
    if Dam_Distance > 0: 
        return 'Downstream'
    else:
        return 'Upstream'

# Apply Up/DS Function
SWORD_Nodes['Up_Ds'] = SWORD_Nodes.Dam_Dist.map(get_upds)

In [None]:
# Set up the location of the RWC-Temp data
FilePath = r"Insert_File_Path_of_the_Temperature_Outputs_from_RWC" # Update this file path

In [None]:
#### Define Functions ###
#  Define Season Function
def get_season(date):
    year = str(date.year)
    # Get the date string to use in the if statements
    date_in = str(date)
    # Format of the date_range (YYYY-MM-DD)
    seasons = {'Spring': pd.date_range(start=year+'/03/01', end=year+'/05/31'),
            'Summer': pd.date_range(start=year+'/06/01', end=year+'/08/31'),
            'Fall': pd.date_range(start=year+'/09/01', end=year+'/11/30')}
    if date_in in seasons['Spring']:
        return 'Spring'
    if date_in in seasons['Summer']:
        return 'Summer'
    if date_in in seasons['Fall']:
        return 'Fall'
    else:
        return 'Winter'
    
# Define Nearest Node Function
def Closest_Nodes(gdA, gdB):

    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=True)
    gdf = pd.concat(
        [
            gdA.reset_index(drop=True),
            gdB_nearest,
            pd.Series(dist, name='dist')
        ], 
        axis=1)
    return gdf

In [None]:
#### Create the Combined Dam CSV Files ###

In [None]:
## Loop thru the dams to pull in RWC-Temp csvs, combine, etc. 
for i in Dams_List[:]: # Update range for a given run's selection
    # Filter SWORD Nodes for the Dam 
    SWORD_Nodes_Dam = SWORD_Nodes[(SWORD_Nodes['Assgn_dam'] == i)]

    # Get a list of the CSV files for the dam
    CSVFiles = glob.glob(os.path.join(FilePath, "*_"+ str(i)+".csv"))

    # Loop through the files for each dam and make one dataframe
    Combined_Dam = pd.DataFrame()
    for j in range(len(CSVFiles)):
        try:
            x = pd.read_csv(CSVFiles[j])
            Combined_Dam = pd.concat([Combined_Dam,x],axis=0)
        except pd.errors.EmptyDataError:
            print(CSVFiles[j], " is empty and has been skipped.") # Some of the images create blank csvs -- could be ice/clouds/mask issues
    
    ## If this is empty skip to next dam
    if Combined_Dam.shape[0] == 0:
        continue

    #### Prep the RWC-TEMP Data ###
    # Fix Date Time  -- GEE uses Unix  Epoch time
    Combined_Dam['Date_Time'] = pd.to_datetime((Combined_Dam['GEE_time']/1000), unit = 's', utc=True)
    Combined_Dam['Date'] = Combined_Dam['Date_Time'].map(pd.Timestamp.date)
    Combined_Dam['Month'] = pd.DatetimeIndex(Combined_Dam['Date']).month
    Combined_Dam['Year'] =  pd.DatetimeIndex(Combined_Dam['Date']).year

    ## Add in Season
    Combined_Dam['Season'] = Combined_Dam.Date.map(get_season)

    ## Rename Width Column -- Clarity for Later
    Combined_Dam.rename(columns={"width":"RWC_wid"}, inplace = True)

    ## Filter out Ice
    Combined_Dam_noice = Combined_Dam[Combined_Dam['GEE_temp']>0]

    ## Make it a GDF
    geometry = [Point(xy) for xy in zip(Combined_Dam_noice['longitude'], Combined_Dam_noice['latitude'])]
    Combined_Dam_gdf = gpd.GeoDataFrame(Combined_Dam_noice, geometry=geometry, crs="EPSG:4326")

    ### Get Each RWC Node's Nearest SWORD node  ####
    Nearest_Nodes = Closest_Nodes(Combined_Dam_gdf, SWORD_Nodes_Dam)

    # Nearest Distance is calculated in Degrees b/c both gdf are in WGS 84 --> (Convert Degrees to meters * 111139)
    Nearest_Nodes['NDist_m'] = Nearest_Nodes['dist']*111139

    # Export to CSV
    Export_File_Path = r"F:\Insert_File_Path_Here"  # Update the Filepath here
    Dam_Name = i
    Nearest_Nodes.to_csv(Export_File_Path+r"\Dam_"+ str(Dam_Name)+"RWCT.csv") ## Used in the Accuracy_Assessment.ipynb
    print("CSV Exported: "+ str(Dam_Name))