# Pre-processing Oregon Allocation data for WaDEQA upload.
Date Updated: 11/03/2020
Purpose:  To pre-process the Oregon data into one master file for simple DataFrame creation and extraction

Useful Links to Data:

- Data Avalaible (use 'Statewide Water Right Spatial Data with Metadata'): https://www.oregon.gov/OWRD/access_Data/Pages/Data.aspx

- POD metadata: https://arcgis.wrd.state.or.us/data/wr_pod_metadata.pdfPOD

- POU metadata: https://arcgis.wrd.state.or.us/data/wr_pou_metadata.pdfPOD

In [None]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
from pyproj import Transformer, transform
transformer = Transformer.from_proj(2992, 4326)
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [None]:
# Working Directory
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Oregon/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [None]:
columnsList = [
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    "in_Latitude",
    "in_Longitude",
    "in_PODorPOUSite",
    "in_SiteName",
    "in_SiteNativeID",
    "in_SiteTypeCV",
    "in_AllocationFlow_CFS",
    "in_AllocationVolume_AF",
    "in_AllocationOwner",
    "in_AllocationTimeframeEnd",
    "in_AllocationTimeframeStart",
    "in_IrrigatedAcreage",
    "snp_id",
    "priority_date",
    "claim_char",
    "use_code_description",
    "wris_link"]

## Point of Diversoin Data

In [None]:
# Dataframe creation
Podfile = "ORwr_v_pod_public_input.csv"  # contains PoD info
df = pd.read_csv(Podfile, encoding = "ISO-8859-1")
print(len(df))
df.head(3)

In [None]:
# For creating WaterSourceName
def assignWaterSourceName(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        outList = colrowValue.strip()
    return outList

df['in_WaterSourceName'] = df.apply(lambda row: assignWaterSourceName(row['source']), axis=1)

In [None]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = WSTypeDict[String1]
        except:
            outList = "Unspecified"

    return outList

df['in_WaterSourceTypeCV'] = df.apply(lambda row: assignWaterSourceTypeCV(row['wr_type']), axis=1)

In [None]:
# For converting projection latitude.
def assignLat(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return lat

# For converting projection longitude.
def assignLong(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return long

df['in_Latitude'] = df.apply(lambda row: assignLat(row['POINT_X'], row['POINT_Y']), axis=1)
df['in_Longitude'] = df.apply(lambda row: assignLong(row['POINT_X'], row['POINT_Y']), axis=1)

In [None]:
# For creating Site Name
def assignSiteName(colrowValueA, colrowValueB):
    if (colrowValueA == '' and colrowValueB == '') or (pd.isnull(colrowValueA) and pd.isnull(colrowValueB)):
        outList = "Unspecified"
    else:
        A = str(colrowValueA).strip()
        B = str(colrowValueB).strip()
        outList = A + "_" + B
    return outList

df['in_SiteName'] = df.apply(lambda row: assignSiteName(row['snp_id'], row['pod_nbr']), axis=1)

In [None]:
# For creating SiteTypeCV
STCVDict = {
"LK" : "lake",
"DR" : "drain",
"SP" : "spring",
"ST" : "stream",
"SL" : "slough",
"WW" : "waste water",
"WE" : "well",
"WR" : "winter runoff",
"SM" : "sump",
"PD" : "pond",
"RS" : "reservoir",
"DT" : "ditch",
"SE" : "sewage effluent",
"CN" : "canal"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = STCVDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['in_SiteTypeCV'] = df.apply(lambda row: assignSiteTypeCV(row['source_type']), axis=1)

In [None]:
# Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [None]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = "Unspecified"
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: assignownerNameORCompany(row['name_company'], row['name_first'], row['name_last']), axis=1)

In [None]:
#Determining AllocationTimeframe Start & End time for each site.

def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString1)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

df['in_AllocationTimeframeStart'] = df.apply(lambda row: formatDateString(row['begin_month'], row['begin_day']), axis=1)
df['in_AllocationTimeframeEnd'] = df.apply(lambda row: formatDateString(row['end_month'], row['end_day']), axis=1)

In [None]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    else:
        outString = val
    return outString

df['use_code_description'] = df.apply(lambda row: fixBenUse(row['use_code_description']), axis=1)

In [None]:
# Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(columns=columnsList, index=df.index)

# Water Source
dfPOD["in_WaterSourceName"] = df['in_WaterSourceName']
dfPOD['in_WaterSourceTypeCV'] = df['in_WaterSourceTypeCV']

# Site
dfPOD["in_Latitude"] = df['in_Latitude']
dfPOD["in_Longitude"] = df['in_Longitude']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = df['in_SiteName']
dfPOD["in_SiteNativeID"] = df['pod_location_id'].astype(str)
dfPOD["in_SiteTypeCV"] = df['in_SiteTypeCV']

# Allocation
dfPOD["in_AllocationFlow_CFS"] = df['rate_cfs'].astype(float)
dfPOD["in_AllocationCropDutyAmount "] = df['duty'].astype(float)
dfPOD['in_AllocationOwner'] = df['in_AllocationOwner']
dfPOD["in_AllocationTimeframeEnd"] = df['in_AllocationTimeframeEnd']
dfPOD["in_AllocationTimeframeStart"] = df['in_AllocationTimeframeStart'] 
dfPOD["in_AllocationVolume_AF"] = df['acre_feet'].astype(float)
dfPOD["in_IrrigatedAcreage"] = ""

# Shared Elements
dfPOD['snp_id'] = df['snp_id']  #for AllocationNativeID
dfPOD['priority_date'] = df['priority_date']  #for AllocationPriorityDate
dfPOD['claim_char'] = df['claim_char']  #for AllocationTypeCV
dfPOD['use_code_description'] = df['use_code_description']  #for BeneficialUseCategory
dfPOD['wris_link'] = df['wris_link']  #for WaterAllocationNativeURL

print(len(dfPOD))
dfPOD

## Place of Use Data

In [None]:
# Dataframe creation
dfPOUfile = "ORwr_v_pou_public_input.csv"  # contains POU info
df = pd.read_csv(dfPOUfile, encoding = "ISO-8859-1")
print(len(df))
df.head(3)

In [None]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = WSTypeDict[String1]
        except:
            outList = "Unspecified"

    return outList

df['in_WaterSourceTypeCV'] = df.apply(lambda row: assignWaterSourceTypeCV(row['wr_type']), axis=1)

In [None]:
# Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [None]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = "Unspecified"
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: assignownerNameORCompany(row['name_company'], row['name_first'], row['name_last']), axis=1)

In [None]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    else:
        outString = val
    return outString

df['use_code_description'] = df.apply(lambda row: fixBenUse(row['use_code_description']), axis=1)

In [None]:
# Creating the output Dataframe for PODs.

dfPOU = pd.DataFrame(columns=columnsList, index=df.index)

# Water Source
dfPOU["in_WaterSourceName"] = "Unspecified"
dfPOU['in_WaterSourceTypeCV'] = df['in_WaterSourceTypeCV']

# Site
dfPOU["in_Latitude"] = df['Latitude']
dfPOU["in_Longitude"] = df['Longitude']
dfPOU["in_PODorPOUSite"] = "POU"
dfPOU["in_SiteName"] = "Unspecified"
dfPOU["in_SiteNativeID"] = df['pou_use_id'].astype(str)
dfPOU["in_SiteTypeCV"] = "Unspecified"

# Allocation
dfPOU["in_AllocationFlow_CFS"] = ""
dfPOD["in_AllocationCropDutyAmount "] = ""
dfPOU['in_AllocationOwner'] =  df['in_AllocationOwner']
dfPOU["in_AllocationTimeframeEnd"] = ""
dfPOU["in_AllocationTimeframeStart"] = ""
dfPOU["in_AllocationVolume_AF"] = ""
dfPOU["in_IrrigatedAcreage"] = df['wris_acres'].astype(float)

# Shared Elements
dfPOU['snp_id'] = df['snp_id']  #for AllocationNativeID
dfPOU['priority_date'] = df['priority_date']  #for AllocationPriorityDate
dfPOU['claim_char'] = df['claim_char']  #for AllocationTypeCV
dfPOU['use_code_description'] = df['use_code_description']  #for BeneficialUseCategory
dfPOU['wris_link'] = df['wris_link']  #for WaterAllocationNativeURL

print(len(dfPOU))
dfPOU

## Concatenate POD and POU data

In [None]:
# Concatenate
frames = [dfPOD, dfPOU]
dfout = pd.concat(frames)
print(len(dfout))
dfout.head(3)

## Custom WaDE Elements due to missing info

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEOR_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = dfout['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
dfout

## The Output

In [None]:
print(len(dfout))
dfout

In [None]:
#Exporting to Finished File
dfout.to_csv('P_OregonMaster.csv', index=False) 