# Pre-processing Colorado Allocation data for WaDEQA upload.
Date Updated: 04/13/2022
Purpose:  To pre-process the Colorado data into one master file for simple DataFrame creation and extraction

Notes:
asdf

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Colorado/WaterAllocation/RawInputData"
os.chdir(workingDir)

# Input File
fileInput = "DWR_Water_Right_-_Net_Amounts_input.csv"
df = pd.read_csv(fileInput)
df.head(2)

In [None]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
df['Appropriation Date'] = pd.to_datetime(df['Appropriation Date'])
df['Appropriation Date'] = pd.to_datetime(df["Appropriation Date"].dt.strftime('%m/%d/%Y'))
df.head(2)

In [None]:
#Creating Beneficial Use.
#Need to split CO abbreviatoin strings to a workable format.

BenUseDict = {
"0" : "Storage",
"1" : "Irrigation",
"2" : "Municipal",
"3" : "Commercial",
"4" : "Industrial",
"5" : "Recreation",
"6" : "Fishery",
"7" : "Fire",
"8" : "Domestic",
"9" : "Stock",
"A" : "Augmentation",
"B" : "Export from Basin",
"C" : "Cumulative Accretion to River",
"D" : "Cumulative Depletion from River",
"E" : "Evaporative",
"F" : "Federal Reserved",
"G" : "Geothermal",
"H" : "Household Use Only",
"K" : "Snow Making",
"M" : "Minimum Streamflow",
"N" : "Net Effect on River",
"P" : "Power Generation",
"Q" : "Other",
"R" : "Recharge",
"S" : "Export from State",
"T" : "Transmountain Export",
"W" : "Wildlife",
"X" : "All Beneficial Uses"}

def retrieveBenUse(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        x=[]
        x[:] = String1
        try:
            outList = []
            for i in range(len(x)):
                y = x[i].strip()
                y = BenUseDict[y]
                outList.append(y)
            outString = ", ".join(str(e) for e in outList)
        except:
            outString = "Unspecified"
    return outString

df['in_WaDEBenUse'] = df.apply(lambda row: retrieveBenUse(row['Decreed Uses']), axis=1)
df.head(2)

In [None]:
#Determining WaterSourceTypeCV

def determineWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        if "GROUNDWATER:" in String1:
            outString = "Groundwater"
        else:
            outString = "Surface Water"
    return outString

df['in_WaterSourceTypeCV'] = df.apply(lambda row: determineWaterSourceTypeCV(row['Water Source']), axis=1)
df.head(2)

In [None]:
#Determining WaterSourceName
#Want to remove the "GROUNDWATER: " from the name.

def determineWaterSourceName(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        String1 = str(colrowValue).strip()
        if "GROUNDWATER:" in String1:
            outString = colrowValue.replace("GROUNDWATER: ", "").strip()
        else:
            outString = colrowValue
    return outString

df['in_WaterSourceName'] = df.apply(lambda row: determineWaterSourceName(row['Water Source']), axis=1)
df.head(2)

In [None]:
#Filling in blank spots of GNIS ID for use of WaterSourceNativeID

def fillGNISID(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outString = "Unspecified"
    else:
        outString = str(colrowValue).strip()
    return outString

df['GNIS ID'] = df.apply(lambda row: fillGNISID(row['GNIS ID']), axis=1)
df.head(2)

In [None]:
# Allocation_CFS
# If Decreed Units = "C" and Net Absolute != 0, then return Net Absolute
# Elif Decreed Units = "C" and Net Conditional != 0, then return Net Conditional
# Else return blank

# For creating Allocation_CFS
def assignAllocation_CFS(valA, valB, valC):
    valA = str(valA).strip()
    if (valB != 0) and (valC != 0):
        outString = 0
    else:
        if (valA == "C") and (valB != 0):
            outString = valB
        elif (valA  == "C") and (valC != 0):
            outString = valC
        else:
            outString = 0
    return outString

df['in_AllocationFlow_CFS'] = df.apply(lambda row: assignAllocation_CFS(row["Decreed Units"], row["Net Absolute"], row["Net Conditional"]), axis=1)
df.head(2)

In [None]:
# AllocationVolume_AF
# If Decreed Units = "A" and Net Absolute != 0, then return Net Absolute
# Elif Decreed Units = "A" and Net Conditional != 0, then return Net Conditional
# Else return blank

# For creating AllocationVolume_AF
def assignAllocationVolume_AF(valA, valB, valC):
    valA = str(valA).strip()
    if (valB != 0) and (valC != 0):
        outString = 0
    else:
        if (valA == "A") and (valB != 0):
            outString = valB
        elif (valA  == "A") and (valC != 0):
            outString = valC
        else:
            outString = 0
    return outString

df['in_AllocationVolume_AF'] = df.apply(lambda row: assignAllocationVolume_AF(row["Decreed Units"], row["Net Absolute"], row["Net Conditional"]), axis=1)
df.head(2)

In [None]:
# For creating AllocationLegalStatusCV
# If Net Absolute = 0 and Net Condontial = 0, then Condtional Aboslute
# Elif Net Absolute = 0 and Net Condontial != 0, then Condtional
# Else, Aboslute

def assignAllocationLegalStatusCV(valA, valB):
    if (valA == 0) and (valB == 0):
        outString = "Conditional Absolute"
    elif (valA == 0) and (valB != 0):
        outString = "Conditional"
    else:
        outString = "Absolute"
    return outString

df['in_AllocationLegalStatusCV'] = df.apply(lambda row: assignAllocationLegalStatusCV(row['Net Absolute'], row['Net Conditional']), axis=1)
df.head(2)

In [None]:
# Need a unique identifier for WaDE AllocationNativeID.  Combine combine **Admin No**, **Order No**, **Decreed Units**, & **WDID** into single string entry.

# For creating AllocationAmount
def assignAllocationNativeID(colrowValueA, colrowValueB, colrowValueC, colrowValueD):
    outString = "-".join(map(str, [colrowValueA, colrowValueB, colrowValueC, colrowValueD]))
    return outString

df['in_AllocationNativeID'] = df.apply(lambda row: assignAllocationNativeID(row['Admin No'], row['Order No'], row['Decreed Units'], row['WDID']), axis=1)
df.head(2)

## Export Outputs

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

In [None]:
#Exporting to Finished File
df.to_csv('P_ColoradoMaster.csv', index=False)  # The output