In [None]:
########## This Code is Used to filter UP/DS SWORD profiles to only include single dams ##########
########## Snaps Temperatures to their nearest SWORD nodes and saves the CSV file  ##########
########## Does not filter ANY GROD Dams (Except to run in loops) ##########

In [None]:
# Import the packages needed
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import glob
import os
import numpy as np
from scipy.spatial import cKDTree

In [None]:
##################################
###### Bring in & Prep the Data #######
##################################

In [None]:
###  Pull in the Dam File ###
Dams = gpd.read_file(r"F:\Insert_File_Path_of_Shapefile_with_Dam_Locations.shp") # Update this file path
## This Shapefile ^^ has all the dams used to pull temperatures  in it with infromation from HILARRI matched to it (completed in ArcGIS) ##
Dams_List  = Dams["grod_id"].tolist()
Dams_List.sort()

In [None]:
# Set up the location of the RWC-Temp data
TempsFilePath = r"F:\Insert_File_Path_of_Temperature_CSVs_from_GEE"

In [None]:
## Bring in SWORD Nodes 
SWORD_Nodes = gpd.read_file(r"Insert_File_Path_of_the_Shapefile_Containing_the_Selected_SWORD_Nodes.shp") # Update this file path

## Prep the Data ## 
# Rename Width Column
SWORD_Nodes.rename(columns={"width":"SWD_wid"}, inplace = True)

# Convert Dam Distances to km
SWORD_Nodes['Dam_Dist_km'] = SWORD_Nodes['Dam_Dist']/1000

## Add in Upstream & Downstream flags
# Define Up/DS Function
def get_upds(Dam_Distance):
    if Dam_Distance == 0 :
        return 'Dam'
    if Dam_Distance > 0: 
        return 'Downstream'
    else:
        return 'Upstream'

# Apply Up/DS Function
SWORD_Nodes['Up_Ds'] = SWORD_Nodes.Dam_Dist.map(get_upds)

In [None]:
## Subset SWORD -- Doing this manually "parallelize" the script
# Create the List
Dams_List_Group = Dams_List[:] # Change out these numbers for different groupings (0:410)

# Subset the nodes
Profile_Subset = SWORD_Nodes[SWORD_Nodes["Assgn_dam"].isin(Dams_List_Group)]
Profile_Subset

In [None]:
## Want to remove anything downstream (DS) past another dam ##
# Create a list to select by index numbers
Updated_Profile_Indices = []

# Loop through dams, find any additional DS dams, find the min distance, and filter nodes before dams
for i in Dams_List_Group: 
    DS_Dams = Profile_Subset[(Profile_Subset["Assgn_dam"] == i) & (Profile_Subset["Up_Ds"] == "Downstream")& (Profile_Subset["Dam_Flag"] == "Dam")]
    if len(DS_Dams) > 0:
        DS_Dam_Cutoff = DS_Dams["Dam_Dist_km"].min() # Downstream nodes are positive, so closest is the smallest number
        Nodes_Upd = Profile_Subset[(Profile_Subset["Assgn_dam"] == i) & (Profile_Subset["Dam_Dist_km"] < DS_Dam_Cutoff)] # Less than encompasses everything to the left of the cut off
        Nodes_Upd_Index = Nodes_Upd.index.values.tolist()
        Updated_Profile_Indices.extend(Nodes_Upd_Index)
    else: # when there are no other dams ds, keep the original nodes
        print("Dam ", str(i), " has no downstream dams.")
        Keep_Nodes = Profile_Subset[(Profile_Subset["Assgn_dam"] == i)]
        Keep_Nodes_Index = Keep_Nodes.index.values.tolist()
        Updated_Profile_Indices.extend(Keep_Nodes_Index)
        print("Original Dam ", str(i), " nodes included.")

# Use the index numbers selected in the for loop to filter the nodes
Updated_Profiles = Profile_Subset.loc[ Profile_Subset.index.isin(Updated_Profile_Indices), : ]
Updated_Profiles

In [None]:
## Want to remove anything upstream (UP) past another dam, using the updated profile from previous step ##
# Create a list to select by index numbers
Updated_Profile_Indices_UP = []

# Loop through dams, find any additional UP dams, find the min distance, and filter nodes before dams
for i in Dams_List_Group: 
    UP_Dams = Updated_Profiles[(Updated_Profiles["Assgn_dam"] == i) & (Updated_Profiles["Up_Ds"] == "Upstream")& (Updated_Profiles["Dam_Flag"] == "Dam")]
    if len(UP_Dams) > 0:
        UP_Dam_Cutoff = UP_Dams["Dam_Dist_km"].max() # Upstream nodes are negative, so closest is the largest number
        Nodes_Upd = Updated_Profiles[(Updated_Profiles["Assgn_dam"] == i) & (Updated_Profiles["Dam_Dist_km"] > UP_Dam_Cutoff)] # Greater than encompasses everything to the right of the cut off
        Nodes_Upd_Index = Nodes_Upd.index.values.tolist()
        Updated_Profile_Indices_UP.extend(Nodes_Upd_Index)
    else: # when there are no other dams up, keep the original nodes
        print("Dam ", str(i), " has no upstream dams.")
        Keep_Nodes = Updated_Profiles[(Updated_Profiles["Assgn_dam"] == i)]
        Keep_Nodes_Index = Keep_Nodes.index.values.tolist()
        Updated_Profile_Indices_UP.extend(Keep_Nodes_Index)
        print("Original Dam ", str(i), " nodes included.")

# Use the index numbers selected in the for loop to filter the nodes
Updated_Profiles_UPDS = Updated_Profiles.loc[ Updated_Profiles.index.isin(Updated_Profile_Indices_UP), : ]
Updated_Profiles_UPDS

In [None]:
#### Define Functions ####
#  Define Season Function
def get_season(date):
    year = str(date.year)
    # Get the date string to use in the if statements
    date_in = str(date)
    # Format of the date_range (YYYY-MM-DD)
    seasons = {'Spring': pd.date_range(start=year+'/03/01', end=year+'/05/31'),
            'Summer': pd.date_range(start=year+'/06/01', end=year+'/08/31'),
            'Fall': pd.date_range(start=year+'/09/01', end=year+'/11/30')}
    if date_in in seasons['Spring']:
        return 'Spring'
    if date_in in seasons['Summer']:
        return 'Summer'
    if date_in in seasons['Fall']:
        return 'Fall'
    else:
        return 'Winter'
    
# Define Nearest Node Function
def Closest_Nodes(gdA, gdB):

    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=True)
    gdf = pd.concat(
        [
            gdA.reset_index(drop=True),
            gdB_nearest,
            pd.Series(dist, name='dist')
        ], 
        axis=1)
    return gdf

In [None]:
## Loop thru the dams to pull in RWC-Temp csvs, combine, etc. 
# Dam list needs to be int for file names
Dam_List_File= list(map(int, Dams_List_Group))

for i in Dam_List_File:
    # Filter SWORD Nodes for the Dam 
    SWORD_Nodes_Dam = Updated_Profiles_UPDS[(Updated_Profiles_UPDS['Assgn_dam'] == i)]

    # Get a list of the CSV files for the dam
    CSVFiles = glob.glob(os.path.join(TempsFilePath, "*_"+ str(i)+".csv"))

    # Loop through the files for each dam and make one dataframe
    Combined_Dam = pd.DataFrame()
    for j in range(len(CSVFiles)):
        try:
            x = pd.read_csv(CSVFiles[j])
            Combined_Dam = pd.concat([Combined_Dam,x],axis=0)
        except pd.errors.EmptyDataError:
            print(CSVFiles[j], " is empty and has been skipped.") # Some of the images create blank csvs -- could be ice/clouds/mask issues
    
    ## If this is empty skip to next dam
    if Combined_Dam.shape[0] == 0:
        continue

    #### Prep the RWC-TEMP Data ###
    # Fix Date Time  -- GEE uses Unix  Epoch time
    Combined_Dam['Date_Time'] = pd.to_datetime((Combined_Dam['GEE_time']/1000), unit = 's', utc=True)
    Combined_Dam['Date'] = Combined_Dam['Date_Time'].map(pd.Timestamp.date)
    Combined_Dam['Month'] = pd.DatetimeIndex(Combined_Dam['Date']).month
    Combined_Dam['Day'] = pd.DatetimeIndex(Combined_Dam['Date']).day  
    Combined_Dam['Year'] =  pd.DatetimeIndex(Combined_Dam['Date']).year

    ## Add in Season
    Combined_Dam['Season'] = Combined_Dam.Date.map(get_season)

    ## Rename Width Column -- Clarity for Later
    Combined_Dam.rename(columns={"width":"RWC_wid"}, inplace = True)

    ## Filter out Ice
    Combined_Dam_noice = Combined_Dam[Combined_Dam['GEE_temp']>0]

    ## Make it a GFD
    geometry = [Point(xy) for xy in zip(Combined_Dam_noice['longitude'], Combined_Dam_noice['latitude'])]
    Combined_Dam_gdf = gpd.GeoDataFrame(Combined_Dam_noice, geometry=geometry, crs="EPSG:4326")

    ### Get Each RWC Node's Nearest SWORD node  ####
    Nearest_Nodes = Closest_Nodes(Combined_Dam_gdf, SWORD_Nodes_Dam)

    # Nearest Distance is calculated in Degrees b/c both gdf are in WGS 84 --> (Convert Degrees to meters * 111139)
    Nearest_Nodes['NDist_m'] = Nearest_Nodes['dist']*111139

    ## Filtering to the closest nodes for averaging temps ##
    # Get each SWORD node's 5 nearest RWC points for each date
    Nearest_Nodes_grp = Nearest_Nodes.groupby(['Join_Node','Date'])['dist'].nsmallest(5)
    Nearest_Nodes_grp = Nearest_Nodes_grp.reset_index()

    # Get a list of Index numbers
    Node_list5 = Nearest_Nodes_grp['level_2'].tolist()

    # Select nodes by Index
    Select_Nodes = Nearest_Nodes.iloc[Node_list5]

    # Remove extra nodes -- If it is farther than 200m drop it  (distance to next SWORD node)
    Nearest_Nodes_filt = Select_Nodes[Select_Nodes['NDist_m'] <= 200]

    #### Get the Temperature Averages ####
    ## Group By Node, Month, Day, and Year -- Get Average Temp 
    Date_Near_Temps = Nearest_Nodes_filt.groupby(['Join_Node','Month','Day','Year']).agg({'GEE_temp': ['mean'],'RWC_wid': ['mean']})
    Date_Near_Temps.columns = ['Avg_Temp', 'Avg_RWC_Wid']
    Date_Near_Temps =Date_Near_Temps.reset_index()

    # Convert to Dataframe
    AvgDateTemps_df = pd.DataFrame(Date_Near_Temps)
    
    # Join The information 
    AvgDateTemps_xy = pd.merge(AvgDateTemps_df, SWORD_Nodes_Dam, on='Join_Node', how='inner')
    AvgDateTemps_xy = AvgDateTemps_xy[['Join_Node','Month','Day','Year', 'Avg_Temp','Avg_RWC_Wid', 'x','y','reach_id','lakeflag','Assgn_dam','Dam_Flag','Up_Ds', 'Dam_Dist', 'Dam_Dist_km']]

    #### Export the Average Temps CSV ####
    Export_File_Path = r"F:\Insert_File_Path_Here" # Update the Filepath here ## These outputs will be used for analysis in Dammed_River_Temperatures_Analysis.ipynb
    Dam_Name = i
    AvgDateTemps_xy.to_csv(Export_File_Path+"\Dam_"+ str(Dam_Name)+"Avg_Img_Temps.csv")