In [1]:
# Written 8/11/2022 (updated 6/27/2024) to automate the filtering process and assign dam order to all dams in the nation.
# The starting file for this script is a csv file of the NID database dams for the entire nation 
# downloaded from https://nid.sec.usace.army.mil/#/downloads. You should be able to run this for only a subset
# of the dams as well.

In [2]:
# Run with Python 3 and ArcGIS Pro v 3.2.2. User needs and ArcGIS Pro installation to use the arcpy package in this code.

# 1. Import databases and filter them. Combine databases as necessary.

In [3]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib as plt
import math
from simpledbf import Dbf5
import os
import re

# Import arcpy packages
import archook
arcgis_python_path = r'C:\Program Files\ArcGIS\Pro\bin\Python\envs\arcgispro-py3' #The path on your computer where your ArcGIS Python installation is located

# Add the Python environment to the path
archook.arcpy = arcgis_python_path

# Locate arcgis and access arcpy
# # archook.get_arcpy(pro=True) # pro=True argument may not be needed depending on archook version. If so, use:
# archook.get_arcpy() 

import arcpy

pd.set_option('display.max_columns',None)

PyTables is not installed. No support for HDF output.


*You may need to clone your arcgis environment and run from this activated environment for the line <archook.get_arcpy(pro=True)> to run. Otherwise you may receive an ImportError. More info on archook found here: https://pypi.org/project/archook-dbc/*

#### Load data here. You will need to replace all of the input files with the locations of your copies of the file. The output_folder variable is the folder where all of the output files will be saved to.

In [4]:
## Load data. See the files included with the report for formatting. Also load in the raw files (not the cross-ref files) and cross-ref
## in the code.

# Load NID data (NID downloaded 06/19/2024)
NIDs = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/NID2024.csv', header=1, low_memory=False) 

# Load removed dams file, if using
removed = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/RemovedDams.csv')

# Load GeoDAR data to use as location where no NID location (GeoDAR v11)
geoDAR = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/GeoDAR_v11_dams.csv')
# geoDAR = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/GeoDAR_crossref.csv')

#Load GDAT data to use for additional storage and year complete (GDAT v1)
GDAT = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/GDAT_v1_dams.csv')

# Load the dam attribute and location file for if you have sites with additional data or better locations that you want
# included. This can include completion year or storage data. This is where you tag dams for the SiteTag function.
sites = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/attributes_Mel.csv') 

# Load NHD Plus Medium Resolution flowline shapefile (NHDPlus v2)
NHDFlowline = 'E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/NHDFlowline_Network_NHDPlus_Countries.gdb/NHDFlowline_Network_NHDPlus_Countries'  # Path for NHD flowline shapefile

# Load GRanD data. (GRanD v1.3 with modifications to locations that places GRanD on NHD Flowlines)
GRanD = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/GRanD_dams_v1_3.csv')

# Load the most recent version of DamNet to transfer ShortIDs for the new run for consistency.
damnet = pd.read_csv(r"E:\ResSed\MediumResolution_DamLinkages\Manuscript\Outputs_Melissa\NIDsMappedInBasins_MedRes_102524.csv")

# Load the dataset cross-reference file.
crossref = pd.read_csv(r'E:\ResSed\MediumResolution_DamLinkages\Manuscript\FilesToUpload\damnet_datacrossreference.csv')

# Assign output folder directory where you want to save the output files.
out_folder = 'E:/ResSed/MediumResolution_DamLinkages/Manuscript/Outputs_Nov7' # Write full path to this folder for arcgis outputs to be saved properly

# Print original numbers of dams in various databases for tracking:
print('Original number of NID in database:',NIDs.shape)
print('Number of USBR in database:',sites.loc[sites.IsUSBR==1].shape)
print('Number of USACE in database:',sites.loc[sites.IsUSACE==1].shape)
print('Number of sites in database:',sites.loc[sites.IsSite==1].shape)

  GDAT = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/Manuscript/FinalInputFiles/GDAT_v1_dams.csv')
  damnet = pd.read_csv(r"E:\ResSed\MediumResolution_DamLinkages\Manuscript\Outputs_Melissa\NIDsMappedInBasins_MedRes_102524.csv")


Original number of NID in database: (91886, 84)
Number of USBR in database: (257, 34)
Number of USACE in database: (465, 34)
Number of sites in database: (1066, 34)


In [5]:
# Count how many dams have lat/long as 0
noLoc = NIDs[(NIDs.Latitude == 0) | (NIDs.Longitude == 0)]

print('Number of raw NIDs with no locations:', noLoc.shape)
#noLoc.head()

Number of raw NIDs with no locations: (2, 84)


In [6]:
# Clean NID data to only keep the columns that we need
# FOR 2024 NID FILE: Column headers may change between versions of NID downloads. Check this if re-running with a new NID download

# Rename columns
new_cols = ['Dam_Name','Other_Dam','Former_Name','NID','OtherStructureID','FederalID','Owner_Name','OwnerTypes','PrimaryOwnerType','NumStruct','AssStruct','Designer','NonFedDam','PrimaryPurp', 'Purp','SourceAgency','StateorFedID','Latitude','Longitude','State','County','City','DisttoCity','River','CongressDist','AmInd','SecLoc','StateReg','Juris','Agency','StatePerm','StateInsp','StateEnforce','FedReg','FedOwner','FedFunding','FedDesign','FedConst','FedReg','FedInsp','FedOps','FedOther','SecAg','NRCS','PrimDamType','DamTypes','CoreTypes','Foundation','Dam_Height','HydraulicHeight','StructHeight','NID_Height','NIDHeightCat','Dam_Length','Volume','Year_Compl','YearCompCat','Year_Modif','NID_Storag','Max_Storag','Normal_Sto','Surface_Ar','Drainage_A','Max_Discha','SpillwayType','SpillWidth','NumLocks','LengthLocks','LockWidth','LengthSecondLock','SecondLockWidth','OutletGate','DataUpdated','LastInspection','InspectionFreq','HazardClass','CondAss','CondAssDate','OpStat','OpStatDate','EAPPrep','EAPRev','InundationMap','URL']
NIDs.columns = new_cols


# Filter and rename variables
NIDs = NIDs[['Dam_Name','Other_Dam','NID','OtherStructureID','FederalID','Longitude','Latitude','State','River','Owner_Name','OwnerTypes','Year_Compl','Year_Modif','NID_Height','Dam_Length','NID_Storag','Max_Storag','Normal_Sto','Surface_Ar','Drainage_A','Max_Discha','PrimaryPurp','Purp','PrimDamType','NumLocks','LengthLocks','LockWidth','LengthSecondLock','SecondLockWidth']]
#Strip white spaces from NID IDs
NIDs['NID'] = NIDs['NID'].str.replace(" ", "")

In [7]:
# Assign Short ID to NID
NIDs = pd.merge(NIDs, damnet[['NID','ShortID']],how = 'left', on = 'NID')

# Merge input datasets with cross-reference file to transfer NID
geoDAR = pd.merge(geoDAR,crossref[['NID','GeoDAR_id_v11']],how = 'left',left_on='id_v11',right_on = 'GeoDAR_id_v11')
GDAT = pd.merge(GDAT,crossref[['NID','GDAT_Feature_ID']],how = 'left', left_on = 'Feature_ID', right_on = 'GDAT_Feature_ID')


#### Move and delete NID dams using the spatial edits file if so desired.

In [8]:
# Use spatial edit file to change lat/long of moved dams and remove deleted dams. If you do not wish to modify original NID, skip this.
delete = sites.loc[sites.Deleted == 1]
move = sites.loc[sites.Moved == 1]

NIDs = NIDs[~NIDs['NID'].isin(delete['NID'])] # Drop deleted NIDs

NID_join = pd.merge(NIDs, move, on='NID', how = 'outer') # Add moved NIDs to NID dataframe


# Change the Latitude and Longitude fields of the NIDs you wish to move
NID_join.loc[NID_join.Moved == 1,'Latitude'] = NID_join.Lat 
NID_join.loc[NID_join.Moved == 1, 'Longitude'] = NID_join.Long 

NID_join.Moved = NID_join.Moved.fillna(0)

print('Length to delete:', delete.shape)
print('NID database size:',NID_join.shape) # New database size after incorporating dam attribute file
print('Number of Moved dams in database:', NID_join.loc[NID_join.Moved == 1].shape) 
print('Number of sites:',NID_join.loc[NID_join.IsSite==1].shape)

# Combine duplicate ShortID fields from join
NID_join['ShortID'] = NID_join.ShortID_x
NID_join.loc[NID_join['ShortID'].isna(),'ShortID'] = NID_join.ShortID_y

NID_join = NID_join.drop(columns = ['ShortID_x','ShortID_y'],axis=1)

Length to delete: (211, 34)
NID database size: (91861, 63)
Number of Moved dams in database: (2591, 63)
Number of sites: (1228, 63)


In [9]:
# Replace NaNs with 0
NID_join.NID_Storag = NID_join.NID_Storag.fillna(0)
NID_join.Max_Storag = NID_join.Max_Storag.fillna(0)
NID_join.Normal_Sto = NID_join.Normal_Sto.fillna(0)
NID_join.IsUSBR = NID_join.IsUSBR.fillna(0)
NID_join.IsUSACE = NID_join.IsUSACE.fillna(0)
NID_join.IsSite = NID_join.IsSite.fillna(0)
NID_join.IsRiverMth = NID_join.IsRiverMth.fillna(0)
NID_join.Moved = NID_join.Moved.fillna(0)


#### Remove duplicate NIDs.

In [10]:
# Remove duplicate NIDs. Keep dam with largest reported storage data.
NID_join = NID_join.drop(index=NID_join.loc[NID_join.OtherStructureID.notnull()].index)

# A couple of dams don't have Other Structure ID but are duplicates. Filter those by storage. Should only be 2
# Sort the data by descending max storage
NID_join = NID_join.sort_values('NID_Storag', ascending = False)

# Remove duplicate NIDs, keeping the first value aka the biggest capacity
bool_series = NID_join['NID'].duplicated()
NID_join = NID_join[~bool_series]

NID_join = NID_join.reset_index()

# Print checks
print('Size after removing duplicates and joining to site file:',NID_join.shape) # New database size after removing duplicate NIDs
print('Number of sites in database:',NID_join.loc[(NID_join.IsSite == 1)].shape)
print('Number of Reclamation dams in database:', NID_join.loc[NID_join.IsUSBR == 1].shape)
print('Number of USACE dams in database:', NID_join.loc[NID_join.IsUSACE == 1].shape) 
print('Number of Moved dams in database:', NID_join.loc[NID_join.Moved == 1].shape)
print('Number of rivers in database:', NID_join.loc[NID_join.IsRiverMth==1].shape)

Size after removing duplicates and joining to site file: (91033, 63)
Number of sites in database: (1066, 63)
Number of Reclamation dams in database: (257, 63)
Number of USACE dams in database: (465, 63)
Number of Moved dams in database: (2229, 63)
Number of rivers in database: (143, 63)


#### Locate and populate lock dams.

In [11]:
# Populate IsLock column
# Replace null values with 0
# Assign 1 to any NID field with lock information
NID_join.NumLocks = NID_join.NumLocks.fillna(0)
NID_join.LengthLocks = NID_join.LengthLocks.fillna(0)
NID_join.LockWidth = NID_join.LockWidth.fillna(0)
NID_join.LengthSecondLock = NID_join.LengthSecondLock.fillna(0)
NID_join.SecondLockWidth = NID_join.SecondLockWidth.fillna(0)
NID_join.loc[(NID_join.NumLocks>0) & (NID_join.NumLocks<10),'IsLock'] = 1 
NID_join.loc[(NID_join.LengthLocks>0),'IsLock'] = 1
NID_join.loc[(NID_join.LockWidth>0),'IsLock'] = 1
NID_join.loc[(NID_join.LengthSecondLock>0),'IsLock'] = 1
NID_join.loc[(NID_join.SecondLockWidth>0),'IsLock'] = 1

# Search by name containing word 'Lock'
NID_join['Dam_Name'] = NID_join['Dam_Name'].fillna('') # First fill NA names with empty strings
NID_join.loc[NID_join['Dam_Name'].str.contains('Lock '), 'IsLock'] = 1 

# Set GA01804 and MO20537 to IsLock = 0 because are not locks but have lock in name
NID_join.loc[NID_join['NID'] == 'GA01804', 'IsLock'] = 0
NID_join.loc[NID_join['NID'] == 'MO20537', 'IsLock'] = 0

# Fill null values with 0
NID_join.IsLock = NID_join.IsLock.fillna(0)

# Drop columns we no longer need.
NID_join = NID_join.drop(['NumLocks','LengthLocks','LockWidth','LengthSecondLock','SecondLockWidth','Deleted'],axis=1)


#### Filter dams by name to remove any dam names that contain Spillway, Levee, Sewage, Treatment, Auxiliary, or Remedial.

In [12]:
print('Size before filtering:', NID_join.shape)

# Filter by name: Filter the dams that are NOT moved in the dam attributes file
filters = "Spillway|Levee|Sewage|Treatment|Auxiliary|Remedial"

# Only filter non-sites
NIDs_filtered = NID_join.drop(index=NID_join.loc[NID_join.Dam_Name.str.contains(filters)==True].loc[(NID_join.Moved == 0)].index)

NID_join = NIDs_filtered

print('Size after filtering by name:', NIDs_filtered.shape) # New database size after filtering by name
print('Number of sites in database:', NID_join.loc[NID_join.IsSite == 1].shape) 
print('Number of Reclamation dams in database:', NID_join.loc[NID_join.IsUSBR == 1].shape)
print('Number of USACE dams in database:', NID_join.loc[NID_join.IsUSACE == 1].shape)


Size before filtering: (91033, 57)
Size after filtering by name: (90563, 57)
Number of sites in database: (1066, 57)
Number of Reclamation dams in database: (257, 57)
Number of USACE dams in database: (465, 57)


#### Join remaining supplementary files and modify latitude and longitude appropriately.

In [13]:
## Join to GRanD dams

# Filter so only dams in the United States
GRanD = GRanD[(GRanD['COUNTRY'].str.contains('United States')== True)|(GRanD['SEC_CNTRY'].str.contains('United States')==True)]
GRanD = GRanD[GRanD['ADMIN_UNIT'].str.contains('Alaska') == False]
GRanD = GRanD[GRanD['ADMIN_UNIT'].str.contains('Hawaii') == False]
GRanD = GRanD[GRanD['ADMIN_UNIT'].str.contains('Puerto Rico') == False]
                
# Filter out columns we don't want
GRanD = GRanD.drop(columns = ['RIVER','ALT_RIVER','MAIN_BASIN','SUB_BASIN','NEAR_CITY','ALT_CITY','SEC_ADMIN','COUNTRY','SEC_CNTRY','ALT_YEAR','ALT_HGT_M','DAM_LEN_M','ALT_LEN_M','AREA_SKM','AREA_POLY','AREA_REP','AREA_MAX','AREA_MIN','CAP_MAX','CAP_REP','CAP_MIN','DEPTH_M','DIS_AVG_LS','DOR_PC','ELEV_MASL','CATCH_SKM','CATCH_REP','DATA_INFO','USE_IRRI','USE_ELEC','USE_SUPP','USE_FCON','USE_RECR','USE_NAVI','USE_FISH','USE_PCON','USE_LIVE','USE_OTHR','MAIN_USE','LAKE_CTRL','MULTI_DAMS','TIMELINE','COMMENTS','URL','QUALITY','EDITOR','POLY_SRC'])

print('Size of GRanD:', GRanD.shape)


Size of GRanD: (1901, 23)


In [14]:
# Find matches between GRanD NID and NID NID; outer join should keep GRanD dams that weren't in NID
GRanD_join = pd.merge(NID_join, GRanD, on='NID', how='outer', suffixes = ('_NID','_GRanD'))

# Combine output columns that were split in the join
GRanD_join.loc[GRanD_join.ShortID_GRanD.notnull(), 'ShortID'] = GRanD_join.ShortID_GRanD
GRanD_join.loc[GRanD_join.ShortID.isnull(), 'ShortID'] = GRanD_join.ShortID_NID

# Assign 0 to NaNs
GRanD_join.loc[GRanD_join.IsSite_NID.isna(),'IsSite_NID'] = 0
GRanD_join['IsSite'] = GRanD_join['IsSite_NID']
GRanD_join.loc[GRanD_join.IsUSBR_NID.isna(),'IsUSBR_NID'] = 0
GRanD_join['IsUSBR'] = GRanD_join['IsUSBR_NID']
GRanD_join.loc[GRanD_join.IsUSACE_NID.isna(),'IsUSACE_NID'] = 0
GRanD_join['IsUSACE'] = GRanD_join['IsUSACE_NID']

# Create a GRanD lat/long field that takes the GRanD lat/long preferentially. These fields are called NewX and NewY for our manual placements.
GRanD_join['LAT_GRAND'] = GRanD_join['NewY']
GRanD_join['LONG_GRAND'] = GRanD_join['NewX']

# Set null values to -999
GRanD_join.LAT_GRAND = GRanD_join.LAT_GRAND.fillna(-999)
GRanD_join.LONG_GRAND = GRanD_join.LONG_GRAND.fillna(-999)

# Create a new field for lat/long that takes the GRanD lat/long if not -999 and takes the original lat/long if -999
GRanD_join['LAT_JOIN'] = GRanD_join.LAT_GRAND
GRanD_join['LONG_JOIN'] = GRanD_join.LONG_GRAND
GRanD_join.loc[GRanD_join.LAT_GRAND == -999, 'LAT_JOIN'] = GRanD_join.Latitude
GRanD_join.loc[GRanD_join.LONG_GRAND == -999, 'LONG_JOIN'] = GRanD_join.Longitude

#Drop any GRanD dams that do not have an NID
GRanD_join = GRanD_join[GRanD_join['NID'].str.strip() != ""]

print('Size before joining to GRanD:',NID_join.shape)
print('Size after joining to GRanD:',GRanD_join.shape) # New database size after adding GRanD dams
print('Number of sites in database:',GRanD_join.loc[GRanD_join.IsSite == 1].shape) 
print('Number of Reclamation dams in database:', GRanD_join.loc[GRanD_join.IsUSBR == 1].shape)
print('Number of USACE dams in database:', GRanD_join.loc[GRanD_join.IsUSACE == 1].shape)
print('Number of GRanD in database:', GRanD_join.loc[GRanD_join.IsGRanD == 1].shape) 


Size before joining to GRanD: (90563, 57)
Size after joining to GRanD: (90625, 87)
Number of sites in database: (1066, 87)
Number of Reclamation dams in database: (257, 87)
Number of USACE dams in database: (465, 87)
Number of GRanD in database: (1898, 87)


In [15]:
# Find matches between Removed Dams and NID by NID; outer join should keep Removed dams that weren't in NID. 
# If you do not want to use a removed dams file, skip this step.
GRanD_join = pd.merge(GRanD_join, removed, on='NID', how='outer', suffixes = ('_join','_rem'))

# Create a GRanD lat/long field that takes the lat/long value from the two joins that is not -999 (the maximum)
GRanD_join['LAT_Rem'] = GRanD_join['DamLatitud']
GRanD_join['LONG_Rem'] = GRanD_join['DamLongitu']

# Set null values to -999
GRanD_join.LAT_Rem = GRanD_join.LAT_Rem.fillna(-999)
GRanD_join.LONG_Rem = GRanD_join.LONG_Rem.fillna(-999)

# Make GRanD lat/long null for GRanD and manually placed moved dams. This preferentially keeps their locations.
GRanD_join.loc[GRanD_join.IsGRanD == 1, 'LAT_Rem'] = -999
GRanD_join.loc[GRanD_join.IsGRanD == 1, 'LONG_Rem'] = -999                                                                                    

GRanD_join.loc[GRanD_join.Moved == 1, 'LAT_Rem'] = -999
GRanD_join.loc[GRanD_join.Moved == 1, 'LONG_Rem'] = -999
                           
# Create a new field for lat/long that takes the Removed dam lat/long if not null and takes the original lat/long if null
GRanD_join.loc[GRanD_join.LAT_Rem != -999, 'LAT_JOIN'] = GRanD_join.LAT_Rem
GRanD_join.loc[GRanD_join.LONG_Rem != -999, 'LONG_JOIN'] = GRanD_join.LONG_Rem

# Fix duplicate fields from join
GRanD_join.loc[GRanD_join.Batch_for_rem.isnull() == True,'Batch_for_rem'] = -999 # Set null values to -999
GRanD_join.loc[GRanD_join.Batch_for_rem != -999, 'Batch_for'] = GRanD_join.Batch_for_rem
GRanD_join.loc[GRanD_join.Batch_for_rem == -999, 'Batch_for'] = GRanD_join.Batch_for_join

print('Number of dams in database after joining to removed dams:',GRanD_join.shape) # New database size after adding removed dams


Number of dams in database after joining to removed dams: (90668, 111)


In [16]:
# Join to GeoDAR and add GeoDAR locations where able
NID_geoDAR = pd.merge(GRanD_join, geoDAR[['NID','lat','lon']], on='NID', how='left')
# NID_geoDAR = pd.merge(GRanD_join, geoDAR[['NID','NewX','NewY']], on='NID', how='left')

# Create a GRanD lat/long field that takes the lat/long value from the two joins that is not -999 (the maximum)
NID_geoDAR['LAT_Geo'] = NID_geoDAR['lat']
NID_geoDAR['LONG_Geo'] = NID_geoDAR['lon']
# NID_geoDAR['LAT_Geo'] = NID_geoDAR['NewY_y']
# NID_geoDAR['LONG_Geo'] = NID_geoDAR['NewX_y']

# Set null values to -999
NID_geoDAR.LAT_Geo = NID_geoDAR.LAT_Geo.fillna(-999)
NID_geoDAR.LONG_Geo = NID_geoDAR.LONG_Geo.fillna(-999)

# Make GRanD lat/long null for GRanD and manually placed moved dams. This preferentially keeps their locations
NID_geoDAR.loc[NID_geoDAR.IsGRanD == 1, 'LAT_Geo'] = -999
NID_geoDAR.loc[NID_geoDAR.IsGRanD == 1, 'LONG_Geo'] = -999                                                                                    

NID_geoDAR.loc[NID_geoDAR.Moved == 1, 'LAT_Geo'] = -999
NID_geoDAR.loc[NID_geoDAR.Moved == 1, 'LONG_Geo'] = -999
                                                                                    
# Create a new field for lat/long that takes the GeoDAR lat/long if not null and takes the original lat/long if null
NID_geoDAR.loc[NID_geoDAR.LAT_Rem != -999, 'LAT_JOIN'] = NID_geoDAR.LAT_Geo
NID_geoDAR.loc[NID_geoDAR.LONG_Rem != -999, 'LONG_JOIN'] = NID_geoDAR.LONG_Geo

print('Number of dams in database:', NID_geoDAR.shape)
print('Number of locations from GeoDAR:', len(NID_geoDAR.loc[NID_geoDAR['LAT_Geo']!=-999]))

GRanD_join = NID_geoDAR

Number of dams in database: (90668, 115)
Number of locations from GeoDAR: 4282


In [17]:
# Join to GDAT and get dam height, storage, and year completed data.

GRanD_join = pd.merge(GRanD_join,GDAT[['NID','Year_Fin','Volume_Max','Height']],on='NID',how='left')
GRanD_join.Year_Fin = pd.to_numeric(GRanD_join['Year_Fin'],errors='coerce') #convert string dates to integers from GDAT
GRanD_join.Height = pd.to_numeric(GRanD_join['Height'],errors='coerce') #convert Height to integers from GDAT


#### Filter based on storage

In [18]:
# Assign 0 to IsGRanD non-GRanD dams
GRanD_join.IsGRanD = GRanD_join.IsGRanD.fillna(0)

# Filter based on storage
print('Size before storage filtering:', GRanD_join.shape)

# Convert fields to m3
GRanD_join['NIDStor_m3'] = GRanD_join.NID_Storag*1233.48 #AF to m3
GRanD_join['GRanDCapm3'] = GRanD_join.CAP_MCM * (1e6) # Convert million cubic meters to cubic m
GRanD_join['Volume_Max'] = GRanD_join.Volume_Max * (1e6) # Convert million cubic meters to cubic m

#fill all null storage values in all fields with 0
GRanD_join.NIDStor_m3 = GRanD_join.NIDStor_m3.fillna(0)
GRanD_join.GRanDCapm3 = GRanD_join.GRanDCapm3.fillna(0)
GRanD_join.CapOrig_m3 = GRanD_join.CapOrig_m3.fillna(0)
GRanD_join.Capm3_p = GRanD_join.Capm3_p.fillna(0)
GRanD_join.OrigCap_m3 = GRanD_join.OrigCap_m3.fillna(0)
GRanD_join.CapNew_m3 = GRanD_join.CapNew_m3.fillna(0)
GRanD_join.Volume_Max = GRanD_join.Volume_Max.fillna(0)

# Replace NaNs with 0
GRanD_join.IsUSBR = GRanD_join.IsUSBR.fillna(0)
GRanD_join.IsUSACE = GRanD_join.IsUSACE.fillna(0)
GRanD_join.IsSite = GRanD_join.IsSite.fillna(0)
GRanD_join.IsRiverMth = GRanD_join.IsRiverMth.fillna(0)
GRanD_join.Moved = GRanD_join.Moved.fillna(0)



# Create column that takes the maximum of all of the storage values for a given field. This is NID_Storag in the NID table.
GRanD_join['MaxStor_m3'] = GRanD_join['NIDStor_m3']

# Set GRanD_join storage sources to initially be NID
# The outcome of this is that for each storage value reported, you have a source and a year that storage value represents
GRanD_join['StorSource'] = 'NID'


#Replace any with MaxStor == 0 with GRanD storage, dam attribute file storage, removed dams storage, then GDAT storage
GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'StorSource'] = 'GRanD'
GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'MaxStor_m3'] = GRanD_join.GRanDCapm3 #GRanD


GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'StorSource'] = GRanD_join.Batch_for
GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'MaxStor_m3'] = GRanD_join.CapOrig_m3 #dam attribute file and iCold


GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'StorSource'] = GRanD_join.Batch_for
GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'MaxStor_m3'] = GRanD_join.OrigCap_m3 #Removed dams


GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'StorSource'] = 'GDAT'
GRanD_join.loc[GRanD_join.MaxStor_m3 == 0, 'MaxStor_m3'] = GRanD_join.Volume_Max #GDAT


#Any dams with max-storage = 0 that is not a site or federal is removed
GRanD_join = GRanD_join.drop(index=GRanD_join.loc[GRanD_join.MaxStor_m3 == 0].loc[GRanD_join.IsRiverMth==0].index)


print('Size after storage filtering:',GRanD_join.shape)
print('Number of sites in database:', GRanD_join.loc[GRanD_join.IsSite == 1].shape)
print('Number of Reclamation dams in database:', GRanD_join.loc[GRanD_join.IsUSBR == 1].shape)
print('Number of USACE dams in database:', GRanD_join.loc[GRanD_join.IsUSACE == 1].shape)
print('Number of GRanD in database:', GRanD_join.loc[GRanD_join.IsGRanD == 1].shape)

Size before storage filtering: (90668, 118)
Size after storage filtering: (89830, 122)
Number of sites in database: (1066, 122)
Number of Reclamation dams in database: (257, 122)
Number of USACE dams in database: (465, 122)
Number of GRanD in database: (1898, 122)


#### Assign ShortIDs to any dam that doesn't already have one.

In [19]:
# Assign unique ShortID to non-sites

# First, sort by ascending NID
GRanD_join = GRanD_join.sort_values('NID', ascending = True)

# Starting ShortID should be the maximum of the site/GRanD ShortIDs plus 1,000 and rounded to the nearest thousandth
startID = math.floor((GRanD_join.ShortID.max() + 1000)/1000)*1000
ID = startID

# Assign a ShortID to anything that doesn't have one yet       
for index, row in GRanD_join.iterrows():
    if pd.isna(row['ShortID']):  # Check if ShortID is null
        GRanD_join.loc[index, 'ShortID'] = ID
        ID += 1  # Increment ID for next ShortID


In [20]:
# Check for duplicate ShortIDs. If there are duplicates, you have an error in your site input files. This is because either
# you assigned two dams with different NIDs the same ShortID or could have a typo in the NID field.

test = GRanD_join.ShortID

nodup = set(test)

if len(nodup) != len(test):
    print('There are duplicate ShortIDs!')

    newlist = [] # Empty list to hold unique elements from the list.
    duplist = [] # Empty list to hold the duplicate elements from the list.
    for i in test:
        if i not in newlist:
            newlist.append(i)
        else:
            duplist.append(i) # This method catches the first duplicate entries, and appends them to the list.
            
    # The next step is to print the duplicate entries, and the unique entries
    print("List of duplicates", duplist)
else:
    print('There are no duplicate ShortIDs')


There are no duplicate ShortIDs


#### Clean up columns and export as a csv.

In [21]:
# Combine columns where necessary and drop unwanted columns

# Combine NID Dam height and removed dam heights into one field. Only put in removed dam where NID is null
GRanD_join['DamH_ft'] = GRanD_join.NID_Height
GRanD_join.loc[(GRanD_join.DamH_ft.isnull() == True),'DamH_ft'] = GRanD_join.DAmHft #removed dam database
GRanD_join.loc[(GRanD_join.DamH_ft.isnull())==True,'DamH_ft'] = GRanD_join.DAM_HGT_M*3.28 #GRanD
GRanD_join.loc[(GRanD_join.DamH_ft.isnull())==True,'DamH_ft'] = GRanD_join.Height*3.28 #GDAT

# Fill in Dam function from removed dam file everywhere PrimPurp is null
# Then replace the null PrimaryPurp with Purp because some have null PrimaryPurp and non-null Purp.
GRanD_join.loc[(GRanD_join.PrimaryPurp.isnull() == True),'PrimaryPurp'] = GRanD_join.DamFunctio
GRanD_join.loc[(GRanD_join.PrimaryPurp.isnull() == True),'PrimaryPurp'] = GRanD_join.Purp

# Year completed
GRanD_join.loc[GRanD_join['yrc'].notna(),'yrc_source'] = GRanD_join.Batch_for #Removed dams
GRanD_join['yrc'] = GRanD_join['yrc'].fillna(GRanD_join['Year_Compl']) # Anywhere removed dam database is null, change to NID
GRanD_join.loc[(GRanD_join['yrc'].notna()) & (GRanD_join['yrc_source'].isna()),'yrc_source'] = 'NID'
GRanD_join.loc[(GRanD_join.Moved == 1) & (GRanD_join.Batch_for != 'iCOLD'), 'yrc'] = GRanD_join.Year_Compl_site #Anyting else still null fill from dam attributes file
GRanD_join.loc[(GRanD_join.Moved == 1) & (GRanD_join.Batch_for != 'iCOLD'), 'yrc_source'] = GRanD_join.Batch_for
GRanD_join.loc[GRanD_join['yrc'].isna(),'yrc'] = GRanD_join['YEAR'] # Anything else still null fill with GRanD
GRanD_join.loc[(GRanD_join['yrc'].notna()) & (GRanD_join['yrc_source'].isna()),'yrc_source'] = 'GRanD'
GRanD_join.loc[GRanD_join['yrc'].isna(),'yrc'] = GRanD_join['Year_Compl_site']# Anything else still null fill with iCOLD
GRanD_join.loc[(GRanD_join['yrc'].notna()) & (GRanD_join['yrc_source'].isna()),'yrc_source'] = 'iCOLD'
GRanD_join.loc[GRanD_join['yrc'].isna(),'yrc'] = GRanD_join['Year_Fin']# Anything else still null fill with GDAT
GRanD_join.loc[(GRanD_join['yrc'].notna()) & (GRanD_join['yrc_source'].isna()),'yrc_source'] = 'GDAT'

GRanD_join.loc[GRanD_join.yrc.isnull(),'yrc_source'] = np.nan #anywhere with no year completed, fill the source as NaN

# Year removed
GRanD_join['yrr'] = GRanD_join['yrr'].fillna(0) #Deal with inconsistencies in datatypes in fields
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-99,0)
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-999,0)
GRanD_join['yrr'] = GRanD_join.apply(lambda row: row['YrRemoved'] if row['yrr'] == 0 else row['yrr'], axis=1) #attribute file/USACE/USBR

GRanD_join['yrr'] = GRanD_join['yrr'].fillna(0)
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-99,0)
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-999,0)
GRanD_join['yrr'] = GRanD_join.apply(lambda row: row['REM_YEAR'] if row['yrr'] == 0 else row['yrr'], axis=1) #GRanD
GRanD_join['yrr'] = GRanD_join['yrr'].fillna(0)
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-99,0)
GRanD_join['yrr'] = GRanD_join['yrr'].replace(-999,0)

# Fill nans with 0
GRanD_join['IsRiverMth'] = GRanD_join['IsRiverMth'].fillna(0) #River indicator
GRanD_join['delta'] = GRanD_join['delta'].fillna(0) #delta indicator


# Dam name
GRanD_join.loc[GRanD_join.Dam_Name=='','Dam_Name'] = np.nan # Set values we made blank earlier back to nan
GRanD_join.loc[GRanD_join.Dam_Name.isnull()==True, 'Dam_Name'] = GRanD_join.Other_Dam # Replace missing NID names with NID other name first
GRanD_join.loc[GRanD_join.Dam_Name.isnull()==True, 'Dam_Name'] = GRanD_join.Reservoir # Then site
GRanD_join.loc[GRanD_join.Dam_Name.isnull()==True, 'Dam_Name'] = GRanD_join.RES_NAME # Then GRanD
GRanD_join.loc[GRanD_join.Dam_Name.isnull()==True, 'Dam_Name'] = GRanD_join.DamName # Then Removed file


# Drop unwanted columns at this point
GRanD_join = GRanD_join.drop(['index','Other_Dam','OtherStructureID','FederalID','Longitude','Latitude','River','Owner_Name',
                             'Max_Storag','Normal_Sto','Lat','Long','Batch_for_join','ShortID_NID',
                              'IsSite_NID','IsUSBR_NID','IsUSACE_NID','DAM_NAME','ALT_NAME',
                              'ADMIN_UNIT','LONG_DD','LAT_DD','NIDnotes','ShortID_GRanD','HasNHD',
                              'IsSite_GRanD','IsUSBR_GRanD','IsUSACE_GRanD','LAT_GRAND','LONG_GRAND',
                              'CitationID','CitationUR','DamAccessi','DamRiverNa','DamRiver_1','DamLocatio',
                              'DamState_P','DamLatitud','DamLongitu','DamAccurac','DamOwner','Batch_for_rem',
                              'LAT_Rem','LONG_Rem','LAT_Geo','LONG_Geo','NID_Height','DamH_m',
                              'DAmHft','Purp','DamFunctio','Other_Dam','REM_YEAR','RES_NAME',
                              'DamName','DAM_HGT_M','USACE_PROJECT_ID','YEAR','CapNewAF','CapAF_p',
                             'CAP_MCM','method','Year_Fin','Height','Volume_Max','GRanD_ID','SiteIsGRanD','OID__join',
                             'OID__rem','lat','lon','NID_ID'], axis=1)

In [22]:
# Combine Year Completed fields to fill in no Year Completed data into yrc.

# first, re-index
GRanD_join = GRanD_join.reset_index(drop=True)

# If yrc is outside of 1700-2023, make it 0 because is likely wrong.
GRanD_join.loc[(GRanD_join.yrc < 1700) | (GRanD_join.yrc > 2023), 'yrc'] = 0

# If yrc = 0 and Year_Modif from NID ~=0, set yrc to the minimum of Year_Modif
for i in range(len(GRanD_join['Year_Modif'])):
    years = GRanD_join['Year_Modif'][i]
    
    if isinstance(years,float):
        if np.isnan(years):
            years = []
    else:
        years = str(years)
        years = years.split(';')
        years = [int(re.search(r'\d+',year).group()) for year in years]
        
        if len(years) > 0:
            minyr = min(years)
            if GRanD_join['yrc'][i] == 0:
                GRanD_join.loc[i, 'yrc'] = minyr

GRanD_join.loc[GRanD_join.yrc == -99, 'yrc'] = 0
GRanD_join.loc[GRanD_join.yrc.isna(),'yrc'] = 0

GRanD_join = GRanD_join.drop(['Year_Compl','Year_Modif','Year_Compl_site','YrRemoved'],axis=1)

In [23]:
# Convert everything to metric and drop the ft column
GRanD_join['Dam_Len_m'] = GRanD_join.Dam_Length*0.3048
GRanD_join['SA_m2'] = GRanD_join.Surface_Ar*4046.85642
GRanD_join['DA_km2'] = GRanD_join.Drainage_A*2.58998811
GRanD_join['MaxQ_m3s'] = GRanD_join.Max_Discha*0.028316847
GRanD_join['DamH_m'] = GRanD_join.DamH_ft * 0.3048

# Fill in nans in Moved field
GRanD_join['Moved'] = GRanD_join['Moved'].fillna(0)

# Drop columns with imperial units
GRanD_join = GRanD_join.drop(['Dam_Length','Surface_Ar','Drainage_A','Max_Discha','CapOrigAF',
                             'DamNameAlt','elev_ft','DamH_ft','NID_Storag'
                              ],axis=1)
# Rename columns
GRanD_join.rename(columns = {'OrigCap_m3':'OCapm3_Rem','LAT_JOIN':'LAT_FINAL','LONG_JOIN':'LONG_FINAL',
                             'DA_km':'site_DA_km'}, inplace=True)

In [24]:
# Export final files as a csv
GRanD.to_csv(os.path.join(out_folder,'GRanD.csv'))
GRanD_join.to_csv(os.path.join(out_folder,'NID_GRanDjoin.csv'))

In [25]:
print('Size moving to snapping:',GRanD_join.shape)
print('Number of sites in database:', GRanD_join.loc[GRanD_join.IsSite == 1].shape)
print('Number of Reclamation dams in database:', GRanD_join.loc[GRanD_join.IsUSBR == 1].shape)
print('Number of USACE dams in database:', GRanD_join.loc[GRanD_join.IsUSACE == 1].shape)
print('Number of GRanD in database:', GRanD_join.loc[GRanD_join.IsGRanD == 1].shape)

Size moving to snapping: (89830, 45)
Number of sites in database: (1066, 45)
Number of Reclamation dams in database: (257, 45)
Number of USACE dams in database: (465, 45)
Number of GRanD in database: (1898, 45)


# 2. Snap dams to NHDPlus Flowlines.

In [26]:
# Snap dams to NHDPlus HR Flowlines: must be done with arcPy. Ensure that all layers are in the same coordinate system (here we use NAD83).

# First convert the csv to a shapefile
XFieldName = 'LONG_FINAL'
YFieldName = 'LAT_FINAL'
newLayerName = "NID_filtered" # Name of your output shapefile

spatialRef = arcpy.SpatialReference(4269) # Spatial reference WKID for NAD83
csvFilePath = os.path.join(out_folder,'NID_GRanDjoin.csv') # Your filtered dam dataset csv



arcpy.MakeXYEventLayer_management(csvFilePath, XFieldName, YFieldName, newLayerName, spatial_reference=spatialRef)
arcpy.FeatureClassToShapefile_conversion(newLayerName, out_folder)


*The following 2 cells will take multiple hours to run, so plan accordingly.*

In [27]:
# For NHDPlus
NIDFiltered = os.path.join(out_folder,'NID_filtered.shp') #Link to your filtered NID shapefile

NIDlyr = "NIDlyr" #create a layer file
NHDlyr = "NHDlyr"
arcpy.management.MakeFeatureLayer(NIDFiltered,NIDlyr) #convert the feature class to a layer to work from
arcpy.management.MakeFeatureLayer(NHDFlowline,NHDlyr)



##########################################################################################
# Run the near tool to get the new lat/long with near FType558
arcpy.management.SelectLayerByAttribute(NHDlyr,'NEW_SELECTION',"FCODE = 55800")
arcpy.management.SelectLayerByAttribute(NHDlyr,'SUBSET_SELECTION',"DivDASqKM > 0") #only select flowlines with > 0 sq.km.

arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"IsSite = 0") #select all non-sites, non-Reclamation, non-USACE, and non-GRanD to snap
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsGRanD = 0")
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSBR = 0")
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSACE = 0")
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"Moved = 0")

# Print counts
print('Total non-site initial selection:',arcpy.management.GetCount(NIDlyr))

# Near 250 m to FType558 for everything that isn't a site
arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Create a new field and populate it
arcpy.management.AddField(NIDlyr,'NrX_Final',"DOUBLE")
arcpy.management.AddField(NIDlyr,'NrY_Final',"DOUBLE")


arcpy.management.SelectLayerByAttribute(NHDlyr,'CLEAR_SELECTION') #clear the flowline FType 558 Selection before transferring over values

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])

# Print counts
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total snapped to flowline:', arcpy.management.GetCount(NIDlyr))


#########################################################################################
# Near 500m to all flowlines for any that didn't snap and have MaxStor >= 4000 AF/5,0000,000 m^3
arcpy.management.SelectLayerByAttribute(NHDlyr,'NEW_SELECTION',"FCODE <> 56600") #select NHD that isn't a coastline
arcpy.management.SelectLayerByAttribute(NHDlyr,'SUBSET_SELECTION',"DivDASqKM > 0") #only select flowlines with > 0 sq.km.

arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"NrX_Final = -1") #Select any Dams that didn't snap
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsSite = 0") #And any that aren't sites
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsGRanD = 0") #And not GRanD
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSBR = 0") #And not Reclamation
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSACE = 0") #And not Army Corps
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"Moved = 0")
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"MaxStor_m3>=5e6") #MaxStor >= 4000 AF

# Print counts
print('Total moving to storage >= 5e6 m$^3$ step:', arcpy.management.GetCount(NIDlyr))

arcpy.analysis.Near(NIDlyr,NHDlyr,"500 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])

arcpy.management.SelectLayerByAttribute(NHDlyr,'CLEAR_SELECTION') #clear the flowline Selection before transferring over values

# Print counts
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total snapped to flowline:', arcpy.management.GetCount(NIDlyr))


#######################################################################################
# Near 250m to all flowlines for any that didn't snap and still aren't a site
arcpy.management.SelectLayerByAttribute(NHDlyr,'NEW_SELECTION',"FCODE <> 56600") #select NHD that isn't a coastline
arcpy.management.SelectLayerByAttribute(NHDlyr,'SUBSET_SELECTION',"DivDASqKM > 0") #only select flowlines with > 0 sq.km.

arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"NrX_Final = -1") #Select any Dams that didn't snap
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsSite = 0") #And any that aren't sites
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsGRanD = 0") #And any that aren't GRanD
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSBR = 0") #And not Reclamation
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"IsUSACE = 0") #And not Army Corps
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"Moved = 0")
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"MaxStor_m3<5e6") #MaxStor < 4000 AF


# Print counts
print('Total moving to storage < 5e6 m$^3$ step:', arcpy.management.GetCount(NIDlyr))

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])

arcpy.management.SelectLayerByAttribute(NHDlyr,'CLEAR_SELECTION') #clear the flowline Selection before transferring over values

# Print counts
arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total snapped to flowline:', arcpy.management.GetCount(NIDlyr))


######################################################################################
# Sites to nearest flowline using near 250m b/c should be in the correct place.
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION') #clear the NID selection
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"IsSite = 1") #Select all sites

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])


arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total sites snapped to flowline:', arcpy.management.GetCount(NIDlyr))


# GRanD to nearest flowline using near 250m b/c should be in the correct place.
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION') #clear the NID selection
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"IsGRanD = 1") #Select all GRanD

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])

arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total GRanD snapped to flowline:', arcpy.management.GetCount(NIDlyr))

# USBR to nearest flowline using near 250m b/c should be in the correct place.
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION') #clear the NID selection
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"IsUSBR = 1") #Select all USBR

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])

arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total USBR snapped to flowline:', arcpy.management.GetCount(NIDlyr))


# USACE to nearest flowline using near 250m b/c should be in the correct place.
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION') #clear the NID selection
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"IsUSACE = 1") #Select all USACE

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])


arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total USACE snapped to flowline:', arcpy.management.GetCount(NIDlyr))

# Moved dams to nearest flowline using near 250m b/c should be in the correct place.
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION') #clear the NID selection
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"Moved = 1") #Select all dams moved in the NID edit file

arcpy.analysis.Near(NIDlyr,NHDlyr,"250 Meters","LOCATION","NO_ANGLE","PLANAR")

# Transfer values over
arcpy.CalculateField_management(NIDlyr, "NrX_Final","!NEAR_X!", "PYTHON3")
arcpy.CalculateField_management(NIDlyr, "NrY_Final","!NEAR_Y!", "PYTHON3")

arcpy.management.DeleteField(NIDlyr,['NEAR_X','NEAR_Y','NEAR_DIST','NEAR_FID'])


arcpy.management.SelectLayerByAttribute(NIDlyr,'SUBSET_SELECTION',"NrX_Final <> -1")
print('Total moved dams snapped to flowline:', arcpy.management.GetCount(NIDlyr))


# Delete any features that did not snap.
# First select non-snaps
arcpy.management.SelectLayerByAttribute(NIDlyr,'CLEAR_SELECTION')
arcpy.management.SelectLayerByAttribute(NIDlyr,'NEW_SELECTION',"NrX_Final = -1")

print('Total dams that do not snap:', arcpy.management.GetCount(NIDlyr))

# Delete the selected rows:
arcpy.management.DeleteRows(NIDlyr)

print('Total dams in DamNet:', arcpy.management.GetCount(NIDlyr))


# Display XY data using NearX and NearY to move the points onto the NHD Flowlines
arcpy.management.MakeXYEventLayer(NIDlyr, "NrX_Final", "NrY_Final", 'NIDFiltered_snap', spatialRef)

Total non-site initial selection: 86359
Total snapped to flowline: 30961
Total moving to storage >= 5e6 m$^3$ step: 1416
Total snapped to flowline: 814
Total moving to storage < 5e6 m$^3$ step: 53982
Total snapped to flowline: 24930
Total sites snapped to flowline: 1066
Total GRanD snapped to flowline: 1791
Total USBR snapped to flowline: 257
Total USACE snapped to flowline: 451
Total moved dams snapped to flowline: 2201
Total dams that do not snap: 29787
Total dams in DamNet: 60043


### Intersect with the flowline data to extract attributes.

In [28]:
# Intersect file with moved dams to the flowline file:
arcpy.analysis.Intersect(['NIDFiltered_snap',NHDFlowline],os.path.join(out_folder,'NIDFiltered_snap.shp'),"ALL",None,"INPUT")

# Export NID file
dbf = Dbf5(os.path.join(out_folder,'NIDFiltered_snap.dbf'))
df = dbf.to_dataframe()
df.to_csv(os.path.join(out_folder,'NID_filtered_snapped.csv'),index=False)

### Remove dams snapping to duplicate flowlines.

In [29]:
# Remove duplicate flowlines. When you run intersect in ArcGIS, any intersects that happen at the join of two lines gives two
# results in the final table. We need to delete one of these.

NID = pd.read_csv(os.path.join(out_folder,'NID_filtered_snapped.csv'), low_memory=False) # Load data.
# Delete any that snapped to no-drainage area flowlines that aren't sites
NID = NID.drop(index=NID.loc[NID.DivDASqKM == 0].index)

NID_sort = NID.sort_values('ShortID', ascending = True) # Sort by ascending ShortID.


print('Size before duplicates removed:', NID_sort.shape)

# Convert dataframe to dictionaries (struct-like); basically has format column->value.
NID_dict = NID_sort.to_dict(orient = 'records')
dupl_ordered_dict = NID.to_dict(orient='records')

# Initialize empty list to store indices of non-duplicates.
dupind = []

# Identify unique values and their counts.
shortID = [item['ShortID'] for item in dupl_ordered_dict]
uniquevals,ia = np.unique(shortID, return_inverse = True)

# Count the frequency of each index in ia.
bincounts = np.bincount(ia)

# Zero out singles.
singles = uniquevals[bincounts <= 1]
singleidx = [i for i, val in enumerate(shortID) if val in singles]
for idx in singleidx:
    shortID[idx] = 0
    
# Overwrite repeats.
repeats = uniquevals[bincounts > 1]
shortID = np.array([np.where(repeats == val)[0][0] + 1 if val in repeats else val for val in shortID])



Size before duplicates removed: (76196, 189)


In [30]:
skip_it = 0; # Initialize a counter.
# Pull out FCODE and Hydrosequence fields to help with decision tree for removing duplicates.
FCODE = [item['FCODE'] for item in dupl_ordered_dict]
Hydroseq = [item['Hydroseq'] for item in dupl_ordered_dict]


for i in range(len(shortID)):
    if shortID[i] == 0: # If it is not a duplicate, keep it.
        dupind.append(i)
    elif skip_it > 0: # Or if we already dealt with it, update the counter so it gets skipped.
        skip_it -= 1
        continue
    else: # Else the value is a duplicate.
        dup = [idx for idx, val in enumerate(shortID) if val == shortID[i]] # Gives all indices of the duplicates.
        dup1 = dup[0]
        dupskip = dup1 # Keep track of what the first index was because we will change this.
        j = len(dup)
        jskip = j # Same for the length of the duplicates.

        Hydro = Hydroseq[dup1:dup[j-1]+1] # Pull out Hydrosequences as the duplicates.

        kept_indices = [i for i, x in enumerate(Hydro) if x not in [Hydroseq[i] for i in dupind]] # If a dam is already snapped to that flowline, remove the flowlines from the options to choose from.

        dup_test = [dup[i] for i in kept_indices]
                
        if len(dup_test) == 0: # All of the flowline options have already been used, in which case just keep them all. Duplicate snaps are removed later.
            dup_test = dup
        
        dup = dup_test
        
        dup1 = dup[0]
        j = len(dup)
        
        Floc = [index for index, value in enumerate(FCODE[dup1:dup[j-1]+1]) if value == 55800] # Pull out FCODE = 55800 for duplicates (flowlines in reservoirs).
        coast = [index for index, value in enumerate(FCODE[dup1:dup[j-1]+1]) if value == 56600] # Pull out any duplicates that are on a coast flowline.
           
        if len(Floc) == 1: # If only one value is FType 558.
            dupind.append(dup[Floc[0]])
        elif len(Floc) == j: # All of the values are 558, take smallest hydroseq (most downstream).
            Hydro = Hydroseq[dup1:dup[j-1]+1]
            minloc = np.argmin(Hydro)
            dupind.append(dup[minloc])
        elif len(Floc) == 0: # None are FType 558.         
            Hydro = Hydroseq[dup1:dup[j-1]+1]
           
            if len(coast) > 0:
                Hydro = np.delete(Hydro,coast) # Remove coastal values; any dam that snaps to a coast flowline has it's dam order messed up and can route along the coast.
                dup = np.delete(dup,coast)

            minloc = np.argmin(Hydro) # Currently taking minimum of the new hydro.
            dupind.append(dup[minloc])
        else: # Some other number of values is FType 558; still take the most downstream.
            Hydro = [Hydroseq[dup[index]] for index in Floc]
    
            if len(coast) > 0:
                Hydro = np.delete(Hydro,coast)
                dup = np.delete(dup,coast)
                
            minloc = np.argmin(Hydro)
            dupind.append(dup[Floc[minloc]])
            
        if dupskip == i: # If the first index was the current index.
            skip_it = jskip-1 # Skip the next j-1 indices.
        else:
            skip_it = 0

dupltable = pd.DataFrame.from_dict(dupl_ordered_dict)
noduplicates = dupltable.loc[dupind]



print('Size after removing duplicates:',noduplicates.shape) # New database size after snapping to NHD flowlines
print('Number of sites in database:', noduplicates.loc[noduplicates.IsSite == 1].shape)
print('Number of Reclamation dams in database:', noduplicates.loc[noduplicates.IsUSBR == 1].shape)
print('Number of USACE dams in database:', noduplicates.loc[noduplicates.IsUSACE == 1].shape)
print('Number of GRanD in database:', noduplicates.loc[noduplicates.IsGRanD == 1].shape)

Size after removing duplicates: (60033, 189)
Number of sites in database: (1066, 189)
Number of Reclamation dams in database: (257, 189)
Number of USACE dams in database: (451, 189)
Number of GRanD in database: (1790, 189)


In [33]:
noduplicates.head()

Unnamed: 0,Dam_Name,NID,State,OwnerTypes,PrimaryPur,PrimDamTyp,Reservoir,Year_First,Year_Last,Owner,RES_SED_No,CapOrig_m3,CapNew_m3,site_DA_km,IsRiverMth,delta,IsLock,yr_p,Capm3_p,USBRname,GRAND_ID,IsGRanD,NewX,NewY,ShortID,IsSite,IsUSBR,IsUSACE,yrc,yrr,OCapm3_Rem,Batch_for,NIDStor_m3,GRanDCapm3,MaxStor_m3,StorSource,yrc_source,Dam_Len_m,SA_m2,DA_km2,MaxQ_m3s,DamH_m,NrX_Final,NrY_Final,COMID,LENGTHKM,FCODE,Hydroseq,Pathlength,TerminalPa,DnHydroseq,DivDASqKM,SLOPE,QA_MA,VA_MA,QC_MA,VC_MA,QE_MA,VE_MA,Country_ou,WBCOMID
0,RioGrande,MOUTH_RGocean,,,,,RioGrande,0.0,0.0,,,0.0,0.0,0.0,1.0,22.0,0.0,,0.0,,,0.0,,,-9.0,1.0,0.0,0.0,1700.0,0.0,0.0,Rivers,0.0,0.0,0.0,GDAT,Rivers,0.0,0.0,0.0,0.0,0.0,-97.146264,25.956148,626220,0.118,55800,680000035.0,0.0,680000035.0,680000002.0,449182.5912,1e-05,18942.517,-9999.0,17738.014,-9999.0,11938.578,-9999.0,5.0,625912.0
1,Lakeway Subdivision Lake Dam,TX05842,Texas,Private,Recreation,Earth,,,,,,0.0,0.0,,0.0,0.0,0.0,,0.0,,,0.0,,,363053.0,0.0,0.0,0.0,1981.0,0.0,0.0,,150484.56,0.0,150484.56,NID,NID,27.432,72843.41556,0.802896,0.0,2.4384,-97.531512,25.978205,207851,26.132,46006,630010377.0,27.862,630007866.0,630010198.0,255.8484,0.000134,9.802,0.82906,20.606,0.86136,20.606,0.86136,0.0,0.0
3,Rancho Viejo Dam D,TX06232,Texas,Private,Irrigation,Earth,,,,,,0.0,0.0,,0.0,0.0,0.0,,0.0,,,0.0,,,363421.0,0.0,0.0,0.0,0.0,0.0,0.0,,314537.4,0.0,314537.4,NID,,60.96,263045.6673,0.0,0.0,2.4384,-97.553501,25.997315,207873,1.571,55800,630010791.0,57.627,630007866.0,630010579.0,142.4178,0.000751,5.33,-9998.0,12.53,-9998.0,12.53,-9998.0,0.0,207583.0
4,Rancho Viejo Dam C,TX06231,Texas,Private,Irrigation,Earth,,,,,,0.0,0.0,,0.0,0.0,0.0,,0.0,,,0.0,,,363420.0,0.0,0.0,0.0,0.0,0.0,0.0,,370044.0,0.0,370044.0,NID,,60.96,263045.6673,0.0,0.0,2.1336,-97.54014,26.002529,206919,4.629,46006,630011557.0,61.425,630007866.0,630011279.0,117.1035,1e-05,4.376,0.59582,10.667,0.61733,10.667,0.61733,0.0,0.0
5,Rancho Viejo Dam B,TX06230,Texas,Private,Irrigation,Earth,,,,,,0.0,0.0,,0.0,0.0,0.0,,0.0,,,0.0,,,402030.0,0.0,0.0,0.0,0.0,0.0,0.0,,185022.0,0.0,185022.0,NID,,60.96,303514.2315,0.0,0.0,2.1336,-97.538459,26.015948,206919,4.629,46006,630011557.0,61.425,630007866.0,630011279.0,117.1035,1e-05,4.376,0.59582,10.667,0.61733,10.667,0.61733,0.0,0.0


In [44]:

test = pd.merge(noduplicates,sites[['NID','Moved','elev_ft']],on='NID',how='left')


In [47]:
noduplicates = test.copy()
noduplicates['Moved'] = noduplicates.Moved.fillna(0)

In [49]:
noduplicates['elev_m'] = noduplicates['elev_ft']* 0.3048

In [50]:
#Re-order fields and drop new additions we do not want
#Re-order fields and drop new additions we do not want
noduplicates = noduplicates.drop(['FID_NID_fi','Field1','FID_NHDFlo','FDATE','RESOLUTION','GNIS_ID','GNIS_NAME','REACHCODE',
                                 'FLOWDIR','FTYPE','StreamLeve','StreamOrde','StreamCalc','FromNode','ToNode','LevelPathI',
                                 'StartFlag','TerminalFl','DnLevel','UpLevelPat','UpHydroseq','DnLevelPat','DnMinorHyd','DnDrainCou',
                                 'FromMeas','ToMeas','RtnDiv','VPUIn','VPUOut','Tidal','TOTMA','WBAreaType','PathTimeMA',
                                 'HWNodeSqKM','MAXELEVRAW','MINELEVRAW','MAXELEVSMO','MINELEVSMO','HWTYPE','SLOPELENKM',
                                 'QA_01','VA_01','QC_01','VC_01','QE_01','VE_01','QA_02','VA_02','QC_02','VC_02','QE_02','VE_02',
                                 'QA_03','VA_03','QC_03','VC_03','QE_03','VE_03','QA_04','VA_04','QC_04','VC_04','QE_04','VE_04',
                                 'QA_05','VA_05','QC_05','VC_05','QE_05','VE_05','QA_06','VA_06','QC_06','VC_06','QE_06','VE_06',
                                 'QA_07','VA_07','QC_07','VC_07','QE_07','VE_07','QA_08','VA_08','QC_08','VC_08','QE_08','VE_08',
                                 'QA_09','VA_09','QC_09','VC_09','QE_09','VE_09','QA_10','VA_10','QC_10','VC_10','QE_10','VE_10',
                                 'QA_11','VA_11','QC_11','VC_11','QE_11','VE_11','QA_12','VA_12','QC_12','VC_12','QE_12','VE_12',
                                 'LakeFract','SurfArea','RAreaHLoad','RPUID','VPUID','Enabled','Shape_Leng','WBAREACOMI',
                                 'Divergence','ArbolateSu','ELEVFIXED','TotDASqKM','AreaSqKM','LAT_FINAL',
                                 'LONG_FINAL'],axis=1)

noduplicates = noduplicates[['Dam_Name','ShortID', 'NID', 'GRAND_ID', 'IsSite', 'IsUSBR', 'IsUSACE', 'IsGRanD', 'State', 'OwnerTypes', 
        'PrimaryPur', 'PrimDamTyp', 'Reservoir', 'Year_First', 'Year_Last', 'Owner', 'RES_SED_No', 'CapOrig_m3', 
        'CapNew_m3', 'site_DA_km', 'IsRiverMth', 'delta', 'IsLock', 'yr_p','Capm3_p', 'USBRname', 'yrc', 'yrr', 'yrc_source',
        'OCapm3_Rem', 'Batch_for', 'NIDStor_m3', 'GRanDCapm3', 'MaxStor_m3', 'StorSource', 'Dam_Len_m', 
        'SA_m2', 'DA_km2', 'MaxQ_m3s', 'DamH_m', 'elev_m', 'NrX_Final', 'NrY_Final', 'COMID', 'LENGTHKM', 'FCODE', 'Hydroseq', 
        'Pathlength', 'DnHydroseq', 'DivDASqKM', 'SLOPE', 'QA_MA', 'VA_MA', 'QC_MA', 'VC_MA', 'QE_MA', 'VE_MA', 'Country_ou', 
        'WBCOMID','TerminalPa','Moved']]

#convert cfs to cms and ft/s to m/s
noduplicates['QA_MA'] = noduplicates['QA_MA'] * 0.0283168
noduplicates['QC_MA'] = noduplicates['QC_MA'] * 0.0283168
noduplicates['QE_MA'] = noduplicates['QE_MA'] * 0.0283168
noduplicates['VA_MA'] = noduplicates['VA_MA'] * 0.3048
noduplicates['VC_MA'] = noduplicates['VC_MA'] * 0.3048
noduplicates['VE_MA'] = noduplicates['VE_MA'] * 0.3048


KeyError: "['FID_NID_fi', 'Field1', 'FID_NHDFlo', 'FDATE', 'RESOLUTION', 'GNIS_ID', 'GNIS_NAME', 'REACHCODE', 'FLOWDIR', 'FTYPE', 'StreamLeve', 'StreamOrde', 'StreamCalc', 'FromNode', 'ToNode', 'LevelPathI', 'StartFlag', 'TerminalFl', 'DnLevel', 'UpLevelPat', 'UpHydroseq', 'DnLevelPat', 'DnMinorHyd', 'DnDrainCou', 'FromMeas', 'ToMeas', 'RtnDiv', 'VPUIn', 'VPUOut', 'Tidal', 'TOTMA', 'WBAreaType', 'PathTimeMA', 'HWNodeSqKM', 'MAXELEVRAW', 'MINELEVRAW', 'MAXELEVSMO', 'MINELEVSMO', 'HWTYPE', 'SLOPELENKM', 'QA_01', 'VA_01', 'QC_01', 'VC_01', 'QE_01', 'VE_01', 'QA_02', 'VA_02', 'QC_02', 'VC_02', 'QE_02', 'VE_02', 'QA_03', 'VA_03', 'QC_03', 'VC_03', 'QE_03', 'VE_03', 'QA_04', 'VA_04', 'QC_04', 'VC_04', 'QE_04', 'VE_04', 'QA_05', 'VA_05', 'QC_05', 'VC_05', 'QE_05', 'VE_05', 'QA_06', 'VA_06', 'QC_06', 'VC_06', 'QE_06', 'VE_06', 'QA_07', 'VA_07', 'QC_07', 'VC_07', 'QE_07', 'VE_07', 'QA_08', 'VA_08', 'QC_08', 'VC_08', 'QE_08', 'VE_08', 'QA_09', 'VA_09', 'QC_09', 'VC_09', 'QE_09', 'VE_09', 'QA_10', 'VA_10', 'QC_10', 'VC_10', 'QE_10', 'VE_10', 'QA_11', 'VA_11', 'QC_11', 'VC_11', 'QE_11', 'VE_11', 'QA_12', 'VA_12', 'QC_12', 'VC_12', 'QE_12', 'VE_12', 'LakeFract', 'SurfArea', 'RAreaHLoad', 'RPUID', 'VPUID', 'Enabled', 'Shape_Leng', 'WBAREACOMI', 'Divergence', 'ArbolateSu', 'ELEVFIXED', 'TotDASqKM', 'AreaSqKM', 'LAT_FINAL', 'LONG_FINAL'] not found in axis"

In [51]:
noduplicates = noduplicates[['Dam_Name','ShortID', 'NID', 'GRAND_ID', 'IsSite', 'IsUSBR', 'IsUSACE', 'IsGRanD', 'State', 'OwnerTypes', 
        'PrimaryPur', 'PrimDamTyp', 'Reservoir', 'Year_First', 'Year_Last', 'Owner', 'RES_SED_No', 'CapOrig_m3', 
        'CapNew_m3', 'site_DA_km', 'IsRiverMth', 'delta', 'IsLock', 'yr_p','Capm3_p', 'USBRname', 'yrc', 'yrr', 'yrc_source',
        'OCapm3_Rem', 'Batch_for', 'NIDStor_m3', 'GRanDCapm3', 'MaxStor_m3', 'StorSource', 'Dam_Len_m', 
        'SA_m2', 'DA_km2', 'MaxQ_m3s', 'DamH_m', 'elev_m', 'NrX_Final', 'NrY_Final', 'COMID', 'LENGTHKM', 'FCODE', 'Hydroseq', 
        'Pathlength', 'DnHydroseq', 'DivDASqKM', 'SLOPE', 'QA_MA', 'VA_MA', 'QC_MA', 'VC_MA', 'QE_MA', 'VE_MA', 'Country_ou', 
        'WBCOMID','TerminalPa','Moved']]

#convert cfs to cms and ft/s to m/s
noduplicates['QA_MA'] = noduplicates['QA_MA'] * 0.0283168
noduplicates['QC_MA'] = noduplicates['QC_MA'] * 0.0283168
noduplicates['QE_MA'] = noduplicates['QE_MA'] * 0.0283168
noduplicates['VA_MA'] = noduplicates['VA_MA'] * 0.3048
noduplicates['VC_MA'] = noduplicates['VC_MA'] * 0.3048
noduplicates['VE_MA'] = noduplicates['VE_MA'] * 0.3048

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  noduplicates['QA_MA'] = noduplicates['QA_MA'] * 0.0283168
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  noduplicates['QC_MA'] = noduplicates['QC_MA'] * 0.0283168
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  noduplicates['QE_MA'] = noduplicates['QE_MA'] * 0.0283168
A value is trying to be set on 

In [52]:
#D50 Data
D50 = pd.read_csv(r"E:\ResSed\MediumResolution_DamLinkages\Manuscript\FinalInputFiles\NHDPlus_MediumResolution_D50.csv",header=0)

NID_D50 = pd.merge(noduplicates, D50, on='COMID', how='left')
NID_D50 = NID_D50.drop(['OID_','StreamOrde','TotDASqKM'],axis=1)

print('Size after joining D50:',NID_D50.shape)

NID_D50.to_csv(os.path.join(out_folder,'NID_filtered_snapped_nodupl_D50.csv'),index = False)

Size after joining D50: (60033, 62)


In [None]:
# damnet = pd.read_csv(r"E:\ResSed\MediumResolution_DamLinkages\Manuscript\Outputs_Nov\NIDsMappedInBasins_MedRes_110424.csv")
# sites = pd.read_csv(r"E:\ResSed\MediumResolution_DamLinkages\Manuscript\FinalInputFiles\sites.csv")
# test = pd.merge(damnet,sites[['ShortID','elev_ft','Year_First','Year_Last','RES_SED_No','Owner','CapOrig_m3','CapNew_m3','DA_km','yr_p','Capm3_p','USBRname']],how='left',on='ShortID')

# test = test.rename(columns={'DA_km':'site_DA_km'})
# test['elev_m'] = test['elev_ft']*.3048
# test = test.drop(['elev_ft'],axis=1)

In [None]:
# locks = pd.read_csv(r"T:\Jobs\DO\_NonFeature\WaterSMART\2020-PRG-ReservoirSedimentationEquations\DATA\GIS\LockDamsStorage_export102924.csv")

# test = pd.merge(test,locks[['NID','PermStorag']],how='left',on='NID')

# test.to_csv(r'E:\ResSed\MediumResolution_DamLinkages\Manuscript\Outputs_Nov\NIDsMappedInBasins_MedRes_110424_locks.csv')