# Create WaDE Uploader csv Files for Overlay Data
- Date Update: 10/29/2024
- Purpose: To create WaDE data input files for overlay information.

In [1]:
# Needed Libraries / Modules
import sys
import os
import numpy as np
import pandas as pd

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import CreateReportingUnitsFile
import CreateOverlaysFile
import CreateOverlayReportingUnits_factFile
import RemoveUnusedRecordsFile

## Input Data
 - create and add variables specific to this proejct here

In [2]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/WaDE Data Folder/Arizona/Overlays" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

# ---- Inputs ----
varST = "AZ" # source organization or state abbreviation
varUUIDType = "ov" # UUID data type abbreviation
mainInputFile = "RawinputData/Pov_Main.zip" # use processed zip file

The working Directory is: G:/Shared drives/WaDE Data/WaDE Data Folder/Arizona/Overlays


In [3]:
# ---- Read in mainInputFile csv file ----
df = pd.read_csv(mainInputFile, compression='zip')
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Total memory: 0.01 MB


In [4]:
# ---- Optomize data types of mainInputFile csv file ----
def optimize_dataframe(df):
    for col in df.select_dtypes(include='int'):
        if (df[col] >= 0).all():
            df[col] = pd.to_numeric(df[col], downcast='unsigned')
        else:
            df[col] = pd.to_numeric(df[col], downcast='integer')

    for col in df.select_dtypes(include='float'):
        df[col] = pd.to_numeric(df[col], downcast='float')

    for col in df.select_dtypes(include='object'):
        num_unique_values = df[col].nunique()
        num_total_values = len(df[col])
        if num_unique_values / num_total_values < 0.5:
            df[col] = df[col].astype('category')

    return df

df = optimize_dataframe(df)
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Total memory: 0.00 MB


## Create Overlay Input Files

In [5]:
# ---- Date (date.csv)----
# Simple info, create by hand and save in ProcessedInputData folder.

In [6]:
# ---- Organization (organizations.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [7]:
# ---- Reporting Unit Areas (reportingunits.csv) ----
print("Creating Reporting Unit input csv...")
print("############################################################################")
CreateReportingUnitsFile.CreateReportingUnitInputFunction(workingDirString, varST, varUUIDType, df)

Creating Reporting Unit input csv...
############################################################################
Setting inputs...
Populating dataframe...
EPSGCodeCV
ReportingUnitName
ReportingUnitNativeID
ReportingUnitProductVersion
ReportingUnitTypeCV
ReportingUnitUpdateDate
StateCV
Geometry
Adding Data Assessment UUID
Resetting Index
GroupBy outdf duplicates based on key fields...
Error checking each field. Purging bad inputs.
Length of outdf DataFrame:  8
Length of dfpurge DataFrame:  0
Assign ReportingUnitUUID
Exporting dataframe...
Done


In [8]:
# ---- Overlays (overlays.csv) ----
print("Creating Overlays input csv...")
print("############################################################################")
CreateOverlaysFile.CreateOverlaysInputFunction(workingDirString, varST, varUUIDType, df)

Creating Overlays input csv...
############################################################################
Setting inputs...
Populating dataframe outdf...
OversightAgency
OverlayDescription
OverlayName
OverlayNativeID
OverlayStatusCV
Statute
StatuteLink
StatutoryEffectiveDate
StatutoryEndDate
OverlayTypeCV
WaterSourceTypeCV
Adding Data Assessment UUID
Resetting Index
Joining outdf duplicates based on key fields...
Solving WaDE 2.0 upload issues
Error checking each field. Purging bad inputs.
Length of outdf DataFrame:  8
Length of dfpurge DataFrame:  120
Assign OverlayUUID
Exporting dataframe...
... 120  records removed.


  outdf = outdf.groupby(groupbyList).agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem!=''])).replace(np.nan, "").reset_index()
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)
  dfy = pd.concat([dfy, mask]).reset_index(drop=True)


Done


In [9]:
# ---- Overlay Reporting Units (overlayreportingunits.csv) ----
print("Creating Overlay Reporting Units input csv...")
print("############################################################################")
CreateOverlayReportingUnits_factFile.CreateOverlayReportingUnitsInputFunction(workingDirString, df)

Creating Overlay Reporting Units input csv...
############################################################################
Setting inputs...
Populating dataframe outdf...
DataPublicationDate
OrganizationUUID
OverlayUUID
ReportingUnitUUID
Resetting Index
Solving WaDE 2.0 upload issues
Error checking each field. Purging bad inputs.
Length of outdf DataFrame:  8
Length of dfpurge DataFrame:  0
Exporting dataframe...
Done


In [10]:
# ---- Remove unused records ----
print("Remove unused Reporting Unit records not found within Overlays input csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedOverlaysRecordsFileFunction(workingDirString)

Remove unused Reporting Unit records not found within Overlays input csv...
############################################################################
Reading input csv...
Length of dfru before removing reporting units:  8
Length of dfru after removing reporting units:  8
Length of dfrru before removing reporting units:  8
Length of dfrru after removing reporting units:  8
Export Files - reportingunits.csv, reportingunits_missing.csv, overlayreportingunits.csv, overlayreportingunits_missing.csv, overlays.csv
Done
