# Create WaDE Uploader csv Files for Aggregated Area Timeseries Data
- Purpose: To create necessary processed WaDE Uploader csv files for aggregate area time series.

In [None]:
# Needed Libraries / Modules
import sys
import os

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import CreateWaterSourcesFile
import CreateReportingUnitsFile
import CreateAggregatedAmountsFile
import RemoveUnusedRecordsFile

## Input Data (make changes here)
 - create and add variables specific to this proejct here

In [None]:
# ---- working directory ----
workingDirString = #"G:/Shared drives/WaDE Data/WaDE Data Folder/Arizona/AggregatedAmounts"  # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

# ---- Inputs ----
varST = #"AZ" # source organization or state abbreviation
varUUIDType = #"ag" # UUID data type abbreviation
mainInputFile = "RawinputData/Pag_Main.zip" # use processed zip file

In [None]:
# ---- Read in mainInputFile csv file ----
df = pd.read_csv(mainInputFile, compression='zip')
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

In [None]:
# ---- Optomize data types of mainInputFile csv file ----
def optimize_dataframe(df):
    for col in df.select_dtypes(include='int'):
        if (df[col] >= 0).all():
            df[col] = pd.to_numeric(df[col], downcast='unsigned')
        else:
            df[col] = pd.to_numeric(df[col], downcast='integer')

    for col in df.select_dtypes(include='float'):
        df[col] = pd.to_numeric(df[col], downcast='float')

    for col in df.select_dtypes(include='object'):
        num_unique_values = df[col].nunique()
        num_total_values = len(df[col])
        if num_unique_values / num_total_values < 0.5:
            df[col] = df[col].astype('category')

    return df

df = optimize_dataframe(df)
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

## Create Aggregated Area Timeseries Input Files

In [None]:
# ---- Method (methods.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Variable (variables.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Organization (organizations.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Water Sources (watersources.csv) ----
print("Creating Water Sources input csv...")
print("############################################################################")
CreateWaterSourcesFile.CreateWaterSourcesInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Reporting Unit (reportingunits.csv) ----
print("Creating Reporting Unit input csv...")
print("############################################################################")
CreateReportingUnitsFile.CreateReportingUnitInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Aggregated Amounts (aggregatedamounts.csv) ----
print("Creating aggregatedamounts input csv...")
print("############################################################################")
CreateAggregatedAmountsFile.CreateAggregatedAmountsInputFunction(workingDirString, df)

In [None]:
# ---- Remove unused records ----
print("Remove unused Water Sources and Reporting Unit records not found within Aggregated Amounts input csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedAggregatedAmountRecordsFileFunction(workingDirString)