# Create WaDE Uploader csv Files for Water Right Data
- Last Updated: 07/21/2025
- Purpose: To create necessary processed WaDE Uploader csv files for water right information.

In [1]:
# Needed Libraries / Modules
import sys
import os
import numpy as np
import pandas as pd

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import CreateWaterSourcesFile
import CreateSitesFile
import CreateAllocationsAmounts_factsFile
import RemoveUnusedRecordsFile
import CreatePODSiteToPOUSiteRelationshipsFile
import JoinOverlayToSiteFile
import AddCountyHUC8HUC12File

## Input Data (make changes here)
 - create and add variables specific to the project here

In [2]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/WaDE Data Folder/Washington/WaterAllocation"  # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

# ---- Inputs ----
varST = "WA" # source organization or state abbreviation
varUUIDType = "wr" # UUID data type abbreviation
mainInputFile = "RawinputData/Pwr_Main.zip" # use processed zip file

The working Directory is: G:/Shared drives/WaDE Data/WaDE Data Folder/Washington/WaterAllocation


In [3]:
# ---- Read in mainInputFile csv file ----
df = pd.read_csv(mainInputFile, compression='zip').replace(np.nan, "")
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

  df = pd.read_csv(mainInputFile, compression='zip').replace(np.nan, "")


Total memory: 1022.98 MB


In [4]:
# ---- Optomize data types of mainInputFile csv file ----
def optimize_dataframe(df):
    for col in df.select_dtypes(include='int'):
        if (df[col] >= 0).all():
            df[col] = pd.to_numeric(df[col], downcast='unsigned')
        else:
            df[col] = pd.to_numeric(df[col], downcast='integer')

    for col in df.select_dtypes(include='float'):
        df[col] = pd.to_numeric(df[col], downcast='float')

    for col in df.select_dtypes(include='object'):
        num_unique_values = df[col].nunique()
        num_total_values = len(df[col])
        if num_unique_values / num_total_values < 0.5:
            df[col] = df[col].astype('category')

    return df

df = optimize_dataframe(df)
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Total memory: 135.07 MB


## Create Water Right Input Files

In [5]:
# ---- Method (methods.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [6]:
# ---- Variable (variables.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [7]:
# ---- Organization (organizations.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [8]:
# ---- Water Sources (watersources.csv) ----
print("Creating Water Sources input csv...")
print("############################################################################")
CreateWaterSourcesFile.CreateWaterSourcesInputFunction(workingDirString, varST, varUUIDType, df)

Creating Water Sources input csv...
############################################################################
Setting inputs...
Populating dataframe...
Geometry
GNISFeatureNameCV
WaterQualityIndicatorCV
WaterSourceName
WaterSourceNativeID
WaterSourceTypeCV
Adding Data Assessment UUID
Resetting Index
GroupBy outdf duplicates based on key fields...


  outdf = outdf.groupby('WaterSourceNativeID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()


Error checking each field. Purging bad inputs.
Length of outdf DataFrame:  4
Length of dfpurge DataFrame:  0
Assign WaterSourceUUID
Cleaning export for correct data types...
Exporting dataframe...
Done


In [9]:
# ---- Sites (sites.csv) ----
print("Creating Sites input csv...")
print("############################################################################")
CreateSitesFile.CreateSitesInputFunction(workingDirString, varST, varUUIDType, df)

Creating Sites input csv...
############################################################################
Setting inputs...
Populating dataframe...
WaterSourceUUIDs
OverlayUUIDs
CoordinateAccuracy
CoordinateMethodCV
County
EPSGCodeCV
Geometry
GNISCodeCV
HUC12
HUC8
Latitude
Longitude
NHDNetworkStatusCV
NHDProductCV
PODorPOUSite
SiteName
SiteNativeID
SitePoint
SiteTypeCV
StateCV
USGSSiteID
Adding Data Assessment UUID
Resetting Index
GroupBy outdf duplicates based on key fields...
Error checking each field. Purging bad inputs.


  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)


Length of outdf DataFrame:  285205
Length of dfpurge DataFrame:  29
Assign SiteUUID
Cleaning export for correct data types...
Exporting dataframe...
... 29  records removed.
Done


In [10]:
# ---- Allocations Amounts (waterallocations.csv) ----
print("Creating AllocationsAmounts_facts input csv...")
print("############################################################################")
CreateAllocationsAmounts_factsFile.CreateAllocationsAmounts_factsInputFunction(workingDirString, varST, varUUIDType, df)

Creating AllocationsAmounts_facts input csv...
############################################################################
Setting inputs...


  dfs = pd.read_csv("ProcessedInputData/sites.csv").replace(np.nan, "")


Populating dataframe outdf...
MethodUUID
OrganizationUUID
SiteUUID
VariableSpecificUUID
AllocationApplicationDate
AllocationAssociatedConsumptiveUseSiteIDs
AllocationAssociatedWithdrawalSiteIDs
AllocationBasisCV
AllocationChangeApplicationIndicator
AllocationCommunityWaterSupplySystem
AllocationCropDutyAmount
AllocationExpirationDate
AllocationFlow_CFS
AllocationLegalStatusCV
AllocationNativeID
AllocationOwner
AllocationPriorityDate
AllocationSDWISIdentifierCV
AllocationTimeframeEnd
AllocationTimeframeStart
AllocationTypeCV
AllocationVolume_AF
BeneficialUseCategory
CommunityWaterSupplySystem
CropTypeCV
CustomerTypeCV
DataPublicationDate
DataPublicationDOI
ExemptOfVolumeFlowPriority
GeneratedPowerCapacityMW
IrrigatedAcreage
IrrigationMethodCV
LegacyAllocationIDs
OwnerClassificationCV
PopulationServed
PowerType
PrimaryBeneficialUseCategory
WaterAllocationNativeURL
Adding Data Assessment UUID
Resetting Index
GroupBy outdf duplicates based on key fields...
Solving WaDE 2.0 upload issues
Er

  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask

Length of outdf DataFrame:  86867
Length of dfpurge DataFrame:  73261
Assign AllocationUUID
Cleaning export for correct data types...
Exporting dataframe...
... 73261  records removed.
Done


In [11]:
# ---- Remove unused records ----
print("Remove unused Water Sources and Sites records not found within AllocationsAmounts_facts input csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedAllocationsAmountRecordsFileFunction(workingDirString)

Remove unused Water Sources and Sites records not found within AllocationsAmounts_facts input csv...
############################################################################
Reading input csv...


  dfs = pd.read_csv("ProcessedInputData/sites.csv").replace(np.nan, "")


Length of dfs before removing sites:  285205
Length of dfs after removing sites:  151781
Length of dfws before removing water sources:  4
Length of dfws after removing water sources:  4
no sa data to work for these wrs
Export Files - watersource.csv, watersource_missing.csv, sites.csv, sites_missing.csv, waterallocations.csv, sitespecificamounts.csv
Done


In [12]:
# ---- Create POD and POU joins (podsitetopousiterelationships.csv) ----
print("Creating Pod site -to- Pou site relationships input csv...")
print("############################################################################")
CreatePODSiteToPOUSiteRelationshipsFile.PODToPOUWaterRightRelationshipsFunction(workingDirString)

Creating Pod site -to- Pou site relationships input csv...
############################################################################
Reading input csv...
Populating dataframe...
Exporting dataframe outdf to csv...
Done


In [13]:
# ---- Join OverlayUUIDs for Sites (Sites.csv update) ----
print("Joining OverlayUUIDs to Sites input csv if exists...")
print("############################################################################")
JoinOverlayToSiteFile.JoinOverlayToSiteFunction(workingDirString)

Joining OverlayUUIDs to Sites input csv if exists...
############################################################################
Checking for available Overlays data / project...
- Overlays directory exists
Reading input csv(s)...


  dfs = pd.read_csv('ProcessedInputData/sites.csv')


- Reporting Unit Type(s) in ru: ['Watershed Administrative Units', 'Water Resource Inventory Areas']
For Reporting Unit Type = "Watershed Administrative Units"...
- Water Source Type(s) for Watershed Administrative Units: ['Surface Water']...
-- Extracting Water Source Type "Surface Water" from sites.csv
-- Selecting sites within reporting unit polygon
-- Setting OverlayUUIDs in sites.csv.
-- Concatenate updated sites.csv to existing file
For Reporting Unit Type = "Water Resource Inventory Areas"...
- Water Source Type(s) for Water Resource Inventory Areas: ['Surface Water']...
-- Extracting Water Source Type "Surface Water" from sites.csv
Done


In [14]:
# ---- Add missing County, HUC8, HUC12 information for Sites (Sites.csv update) ----
print("Add misinsg County, HUC8, HUC12 information if mising...")
print("############################################################################")
AddCountyHUC8HUC12File.AddCountyHUC8HUC12Function(workingDirString)

Add misinsg County, HUC8, HUC12 information if mising...
############################################################################
Checking for available shapefiles data / project...
Reading input files(s)...


  dfs = pd.read_csv('ProcessedInputData/sites.csv')


Checking for missing County information....
Checking for missing HUC8 information....
Checking for missing HUC12 information....
Done
