# Create WaDE Uploader csv Files for Overlay Data
- Date Update: 10/29/2024
- Purpose: To create WaDE data input files for overlay information.

In [None]:
# Needed Libraries / Modules
import sys
import os

## Custom Libraries
sys.path.append("../../5_CustomFunctions/MappingFunctions")
import CreateReportingUnitsFile
import CreateOverlaysFile
import CreateOverlayReportingUnits_factFile
import RemoveUnusedRecordsFile

## Input Data
 - create and add variables specific to this proejct here

In [None]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/WaDE Data Folder/Arizona/Overlays" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

# ---- Inputs ----
varST = "AZ" # source organization or state abbreviation
varUUIDType = "ov" # UUID data type abbreviation
mainInputFile = "RawinputData/Pov_Main.zip" # use processed zip file

In [None]:
# ---- Read in mainInputFile csv file ----
df = pd.read_csv(mainInputFile, compression='zip')
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

In [None]:
# ---- Optomize data types of mainInputFile csv file ----
def optimize_dataframe(df):
    for col in df.select_dtypes(include='int'):
        if (df[col] >= 0).all():
            df[col] = pd.to_numeric(df[col], downcast='unsigned')
        else:
            df[col] = pd.to_numeric(df[col], downcast='integer')

    for col in df.select_dtypes(include='float'):
        df[col] = pd.to_numeric(df[col], downcast='float')

    for col in df.select_dtypes(include='object'):
        num_unique_values = df[col].nunique()
        num_total_values = len(df[col])
        if num_unique_values / num_total_values < 0.5:
            df[col] = df[col].astype('category')

    return df

df = optimize_dataframe(df)
print(f"Total memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

## Create Overlay Input Files

In [None]:
# ---- Date (date.csv)----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Organization (organizations.csv) ----
# Simple info, create by hand and save in ProcessedInputData folder.

In [None]:
# ---- Reporting Unit Areas (reportingunits.csv) ----
print("Creating Reporting Unit input csv...")
print("############################################################################")
CreateReportingUnitsFile.CreateReportingUnitInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Overlays (overlays.csv) ----
print("Creating Overlays input csv...")
print("############################################################################")
CreateOverlaysFile.CreateOverlaysInputFunction(workingDirString, varST, varUUIDType, df)

In [None]:
# ---- Overlay Reporting Units (overlayreportingunits.csv) ----
print("Creating Overlay Reporting Units input csv...")
print("############################################################################")
CreateOverlayReportingUnits_factFile.CreateOverlayReportingUnitsInputFunction(workingDirString, df)

In [None]:
# ---- Remove unused records ----
print("Remove unused Reporting Unit records not found within Overlays input csv...")
print("############################################################################")
RemoveUnusedRecordsFile.RemoveUnusedOverlaysRecordsFileFunction(workingDirString)