# Create WaDE Uploader csv Files for Water Supply Site Time Series data for WaDE
- Last Updated: 01/19/2024
- Purpose: To create necessary processed WaDE Uploader csv files for reservoir and observation site information.

In [None]:
# Needed Libraries / Modules
import sys
import os

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import CreateWaterSourcesFile
import CreateSitesFile
import CreateSiteSpecificAmounts_factFile
import RemoveUnusedRecordsFile

## Input Data (make changes here)
 - create and add variables specific to this proejct here

In [None]:
# ---- working directory ----
workingDirString = #"G:/Shared drives/WaDE Data/WaDE Data Folder/California/WaterSupply_SiteSpecific"  # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

# ---- Inputs ----
varST = #"CA" # source organization or state abbreviation
varUUIDType = #"wsss" # UUID data type abbreviation
mainInputFile = #"Pwsss_caMain.zip" # use processed zip file

In [None]:
# ---- Read in mainInputFile csv file ----
df = pd.read_csv(mainInputFile, compression='zip')
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

In [None]:
# ---- Optomize data types of mainInputFile csv file ----
def optimize_dataframe(df):
    for col in df.select_dtypes(include='int'):
        if (df[col] >= 0).all():
            df[col] = pd.to_numeric(df[col], downcast='unsigned')
        else:
            df[col] = pd.to_numeric(df[col], downcast='integer')

    for col in df.select_dtypes(include='float'):
        df[col] = pd.to_numeric(df[col], downcast='float')

    for col in df.select_dtypes(include='object'):
        num_unique_values = df[col].nunique()
        num_total_values = len(df[col])
        if num_unique_values / num_total_values < 0.5:
            df[col] = df[col].astype('category')

    return df

df = optimize_dataframe(df)
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

## Create CSV Input Files

In [None]:
# ---- Method (methods.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Variable (variables.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Organization (organizations.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Water Sources (watersources.csv) ----
print("Creating Water Sources input csv...")
print("############################################################################")
CreateWaterSourcesFile.CreateWaterSourcesInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Sites (sites.csv) ----
print("Creating Sites input csv...")
print("############################################################################")
CreateSitesFile.CreateSitesInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Site Specific Amounts (sitespecificamounts.csv) ----
print("Creating Site Specific Amounts input csv...")
print("############################################################################")
CreateSiteSpecificAmounts_factFile.CreateSiteSpecificAmounts_factsInputFunction(workingDirString, df)

In [None]:
# ---- Remove unused records ----
print("Remove unused Water Sources and Sites records not found within sitespecificamountsinput.csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedSiteSpecificAmountsRecordsFileFunction(workingDirString)