In [3]:
# import packages
import pandas as pd
import numpy as np

pd.set_option('display.max_columns',None)

In [7]:
# #load Federal dam data and fix inconsistencies in headers and data types.
USBR = pd.read_csv("E:/ResSed/MediumResolution_DamLinkages/Reclamation/ReclamationPrediction.csv")
USACE = pd.read_csv("E:/ResSed/MediumResolution_DamLinkages/USACE/USACEPrediction.csv")
icold = pd.read_csv("E:/ResSed/MediumResolution_DamLinkages/iColdCap.csv")

sites = pd.read_csv('E:/ResSed/MediumResolution_DamLinkages/NewSites2023/AllSites24_snapMedRes_RvrMth_Oct2024.csv')
# sites2 = pd.read_csv('D:/ResSed/MediumResolution_DamLinkages/NewSites2023/AllSites23_snapMedRes_RvrMth_June2024.csv')

#rename columns and drop ones we don't need.
# USBR = USBR.drop(['Lat','Long'],axis = 1)
USBR.rename(columns = {'yrc':'Year_Compl_site','yrr':'YrRemoved','GID':'GRanD_ID','SnapX':'Long','SnapY':'Lat','IsGRanD':'SiteIsGRanD','NIDname':'Reservoir','CapM3_p':'Capm3_p','SID':'ShortID'}, inplace = True)

sites = sites.drop(['LockDam'],axis=1)
sites.rename(columns = {'Year_Compl':'Year_Compl_site','SiteIsGRD':'SiteIsGRanD'},inplace = True)

USACE = USACE.drop(['LATITUDE','LONGITUDE','NAVIGATION'],axis=1)
USACE.rename(columns = {'PROJECT_ID':'USACE_PROJECT_ID','yrc':'Year_Compl_site','yrr':'YrRemoved','GID':'GRanD_ID','SnapX':'Long','SnapY':'Lat','IsGranD':'SiteIsGRanD','NIDname':'Reservoir','NID_ID':'NID','SID':'ShortID'},inplace=True)


In [8]:
#combine with sites file.

#find matches between sites and USBR; outer join should keep ShortIDs that weren't sites
sites_fed = pd.merge(sites, USBR, on='ShortID', how='outer')

# Combine columns with suffixes '_x' and '_y' by keeping non-null values
for col in sites_fed.columns:
    if col.endswith('_x') or col.endswith('_y'):
        col_name = col[:-2]  # Remove '_x' or '_y'
        if col_name in sites_fed.columns:
            # If column already exists, combine values
            sites_fed[col_name] = sites_fed[col_name].combine_first(sites_fed[col])
        else:
            # If column doesn't exist, rename it
            sites_fed.rename(columns={col: col_name}, inplace=True)

# Drop columns with suffixes '_x' and '_y'
sites_fed = sites_fed.drop(columns=[col for col in sites_fed.columns if col.endswith('_x') or col.endswith('_y')])


#find matches between sites and USACE; outer join should keep ShortIDs that weren't sites
sites_fed = pd.merge(sites_fed, USACE, on='ShortID', how='outer')

# Combine columns with suffixes '_x' and '_y' by keeping non-null values
for col in sites_fed.columns:
    if col.endswith('_x') or col.endswith('_y'):
        col_name = col[:-2]  # Remove '_x' or '_y'
        if col_name in sites_fed.columns:
            # If column already exists, combine values
            sites_fed[col_name] = sites_fed[col_name].combine_first(sites_fed[col])
        else:
            # If column doesn't exist, rename it
            sites_fed.rename(columns={col: col_name}, inplace=True)

# Drop columns with suffixes '_x' and '_y'
sites_fed = sites_fed.drop(columns=[col for col in sites_fed.columns if col.endswith('_x') or col.endswith('_y')])

#find matches between iCOLD and USACE; outer join should keep ShortIDs that weren't sites
sites_fed = pd.merge(sites_fed, icold, on='NID', how='outer')

# Combine columns with suffixes '_x' and '_y' by keeping non-null values
for col in sites_fed.columns:
    if col.endswith('_x') or col.endswith('_y'):
        col_name = col[:-2]  # Remove '_x' or '_y'
        if col_name in sites_fed.columns:
            # If column already exists, combine values
            sites_fed[col_name] = sites_fed[col_name].combine_first(sites_fed[col])
        else:
            # If column doesn't exist, rename it
            sites_fed.rename(columns={col: col_name}, inplace=True)

# Drop columns with suffixes '_x' and '_y'
sites_fed = sites_fed.drop(columns=[col for col in sites_fed.columns if col.endswith('_x') or col.endswith('_y')])

# sites_fed.head()


In [9]:
#Replace nans with 0
sites_fed.IsSite = sites_fed.IsSite.fillna(0)
sites_fed.IsUSBR = sites_fed.IsUSBR.fillna(0)
sites_fed.IsUSACE = sites_fed.IsUSACE.fillna(0)
sites_fed.delta = sites_fed.delta.fillna(0)
sites_fed.IsLock = sites_fed.IsWasLock
sites_fed.IsLock = sites_fed.IsLock.fillna(0)
sites_fed.IsRiverMth = sites_fed.IsRiverMth.fillna(0)

sites_fed.elev_ft = pd.to_numeric(sites_fed.elev_ft, errors='coerce')

#Replace -999 with nan
sites_fed.elev_ft = sites_fed.elev_ft.replace(-999,np.nan)
sites_fed.Capm3_p = sites_fed.Capm3_p.replace(-999,np.nan)
sites_fed.CapAF_p = sites_fed.CapAF_p.replace(-999,np.nan)

#Fill source for iCOLD as source data
sites_fed.loc[(sites_fed.IsSite==0)&(sites_fed.IsUSBR==0)&(sites_fed.IsUSACE==0),'Batch_for'] = 'iCOLD'

sites_fed = sites_fed.drop(['GIDnum','IsWasLock','Field'],axis=1)

print('Number of USBR in database:',sites_fed.loc[sites_fed.IsUSBR==1].shape)
print('Number of USACE in database:',sites_fed.loc[sites_fed.IsUSACE==1].shape)
print('Number of sites in database:',sites_fed.loc[sites_fed.IsSite==1].shape)

Number of USBR in database: (257, 31)
Number of USACE in database: (465, 31)
Number of sites in database: (1066, 31)


In [10]:
sites_fed.to_csv('E:/ResSed/MediumResolution_DamLinkages/sites_fed.csv')
# sites.to_csv('D:/ResSed/MediumResolution_DamLinkages/NewSites2023/AllSites23_snapMedRes_RvrMth_July2024.csv')
# USACE.to_csv('D:/ResSed/MediumResolution_DamLinkages/USACE/USACE_Prediction_AAH.csv')
# USBR.to_csv('D:/ResSed/MediumResolution_DamLinkages/Reclamation/Reclamation_Prediction_AAH.csv')