# Pre-processing Oregon Allocation data for WaDEQA upload.
Date Updated: 11/03/2020
Purpose:  To pre-process the Oregon data into one master file for simple DataFrame creation and extraction

Useful Links to Data:

- Data Avalaible (use 'Statewide Water Right Spatial Data with Metadata'): https://www.oregon.gov/OWRD/access_Data/Pages/Data.aspx

- POD metadata: https://arcgis.wrd.state.or.us/data/wr_pod_metadata.pdfPOD

- POU metadata: https://arcgis.wrd.state.or.us/data/wr_pou_metadata.pdfPOD

In [1]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
import geopandas as gpd # the library that lets us read in shapefiles
from datetime import datetime
from pyproj import Transformer, transform
transformer = Transformer.from_proj(2992, 4326)
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Oregon/WaterAllocation/RawInputData"
os.chdir(workingDir)

In [3]:
columnsList = [
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    "in_Latitude",
    "in_Longitude",
    "in_PODorPOUSite",
    "in_SiteName",
    "in_SiteNativeID",
    "in_SiteTypeCV",
    "in_AllocationFlow_CFS",
    "in_AllocationVolume_AF",
    "in_AllocationOwner",
    "in_AllocationTimeframeEnd",
    "in_AllocationTimeframeStart",
    "in_IrrigatedAcreage",
    "snp_id",
    "priority_date",
    "claim_char",
    "use_code_description",
    "wris_link"]

## Point of Diversoin Data

In [4]:
# Dataframe creation
Podfile = "ORwr_v_pod_public_input.csv"  # contains PoD info
df = pd.read_csv(Podfile, encoding = "ISO-8859-1")
print(len(df))
df.head(3)

189822


  df = pd.read_csv(Podfile, encoding = "ISO-8859-1")


Unnamed: 0,OBJECTID,snp_id,app_nbr,pod_display,permit_nbr,acre_feet,acre_feet_est,agency,app_char,begin_day,begin_month,cert_nbr,claim_char,claim_nbr,decree_title,duty,end_day,end_month,feature_quality_code,last_updt_date,max_rate_acre_feet,max_rate_cfs,name_company,name_first,name_last,permit_char,pod_char,pod_display_short,pod_location_id,pod_nbr,pod_use_id,POINT_X,POINT_Y,priority_date,rate_cfs,rate_cfs_est,rec_creation_date,remarks,snp_id.1,source,source_type,stream_name,streamcode,supplemental,technician_initials,transfer_nbr,tributary_to,use_category,use_code,use_code_description,Unnamed: 50,wr_type,wris_link
0,1,21755,11987.0,Permit: G 10961 * MI,10961.0,,0,OWRD,G,1.0,1.0,,,,,,31.0,12.0,,6/1/1996 0:00,,0.04,FORMOSA EXPLORATION INC.,,,G,,G 10961,6909,1,26859,539412.5499,416705.7999,11/21/1989 0:00,0.04,0,6/1/1996 0:00,0 G 10961 1,21755,FORMOSA 1 ADIT ...,WE,UNN STR > MIDDLE CR,1.6125e+19,0,MIGRT,,CANYON CREEK,0,MI,MINING,,GW,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
1,2,21755,11987.0,Permit: G 10961 * MI,10961.0,,0,OWRD,G,1.0,1.0,,,,,,31.0,12.0,,6/1/1996 0:00,,0.005,FORMOSA EXPLORATION INC.,,,G,,G 10961,6910,2,26860,539232.9167,416251.9918,11/21/1989 0:00,0.005,0,6/1/1996 0:00,0 G 10961 2,21755,SILVER BUTTE 1 ADIT ...,WE,UNN STR > MIDDLE CR,1.6125e+19,0,MIGRT,,CANYON CREEK,0,MI,MINING,,GW,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
2,3,23327,13684.0,Permit: G 12684 * MI,12684.0,,0,OWRD,G,1.0,1.0,,,,,,31.0,12.0,,6/1/2001 0:00,,0.22,DUTCH MINING LLC,,,G,,G 12684,9355,1,29682,470932.7999,355915.4583,5/6/1994 0:00,0.11,1,6/1/2001 0:00,0 G 12684 1,23327,A WELL ...,WE,DRAIN CR > WHISKY CR,15168000000000.0,0,MIGRT,,ROGUE RIVER,0,MI,MINING,,GW,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...


In [5]:
# For creating WaterSourceName
def assignWaterSourceName(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        outList = colrowValue.strip()
    return outList

df['in_WaterSourceName'] = df.apply(lambda row: assignWaterSourceName(row['source']), axis=1)

In [6]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = WSTypeDict[String1]
        except:
            outList = "Unspecified"

    return outList

df['in_WaterSourceTypeCV'] = df.apply(lambda row: assignWaterSourceTypeCV(row['wr_type']), axis=1)

In [7]:
# For converting projection latitude.
def assignLat(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return lat

# For converting projection longitude.
def assignLong(colrowValueLat, colrowValueLong):
    lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return long

df['in_Latitude'] = df.apply(lambda row: assignLat(row['POINT_X'], row['POINT_Y']), axis=1)
df['in_Longitude'] = df.apply(lambda row: assignLong(row['POINT_X'], row['POINT_Y']), axis=1)

In [8]:
# For creating Site Name
def assignSiteName(colrowValueA, colrowValueB):
    if (colrowValueA == '' and colrowValueB == '') or (pd.isnull(colrowValueA) and pd.isnull(colrowValueB)):
        outList = "Unspecified"
    else:
        A = str(colrowValueA).strip()
        B = str(colrowValueB).strip()
        outList = A + "_" + B
    return outList

df['in_SiteName'] = df.apply(lambda row: assignSiteName(row['snp_id'], row['pod_nbr']), axis=1)

In [9]:
# For creating SiteTypeCV
STCVDict = {
"LK" : "lake",
"DR" : "drain",
"SP" : "spring",
"ST" : "stream",
"SL" : "slough",
"WW" : "waste water",
"WE" : "well",
"WR" : "winter runoff",
"SM" : "sump",
"PD" : "pond",
"RS" : "reservoir",
"DT" : "ditch",
"SE" : "sewage effluent",
"CN" : "canal"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = STCVDict[String1]
        except:
            outList = "Unspecified"
    return outList

df['in_SiteTypeCV'] = df.apply(lambda row: assignSiteTypeCV(row['source_type']), axis=1)

In [10]:
# Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [11]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = "Unspecified"
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: assignownerNameORCompany(row['name_company'], row['name_first'], row['name_last']), axis=1)

In [12]:
#Determining AllocationTimeframe Start & End time for each site.

def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString1)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

df['in_AllocationTimeframeStart'] = df.apply(lambda row: formatDateString(row['begin_month'], row['begin_day']), axis=1)
df['in_AllocationTimeframeEnd'] = df.apply(lambda row: formatDateString(row['end_month'], row['end_day']), axis=1)

In [13]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    else:
        outString = val
    return outString

df['use_code_description'] = df.apply(lambda row: fixBenUse(row['use_code_description']), axis=1)

In [14]:
# Creating the output Dataframe for PODs.

dfPOD = pd.DataFrame(columns=columnsList, index=df.index)

# Water Source
dfPOD["in_WaterSourceName"] = df['in_WaterSourceName']
dfPOD['in_WaterSourceTypeCV'] = df['in_WaterSourceTypeCV']

# Site
dfPOD["in_Latitude"] = df['in_Latitude']
dfPOD["in_Longitude"] = df['in_Longitude']
dfPOD["in_PODorPOUSite"] = "POD"
dfPOD["in_SiteName"] = df['in_SiteName']
dfPOD["in_SiteNativeID"] = "POD" + df['pod_location_id'].astype(str)
dfPOD["in_SiteTypeCV"] = df['in_SiteTypeCV']

# Allocation
dfPOD["in_AllocationFlow_CFS"] = df['rate_cfs'].astype(float)
dfPOD["in_AllocationCropDutyAmount "] = df['duty'].astype(float)
dfPOD['in_AllocationOwner'] = df['in_AllocationOwner']
dfPOD["in_AllocationTimeframeEnd"] = df['in_AllocationTimeframeEnd']
dfPOD["in_AllocationTimeframeStart"] = df['in_AllocationTimeframeStart'] 
dfPOD["in_AllocationVolume_AF"] = df['acre_feet'].astype(float)
dfPOD["in_IrrigatedAcreage"] = ""

# Shared Elements
dfPOD['snp_id'] = df['snp_id']  #for AllocationNativeID
dfPOD['priority_date'] = df['priority_date']  #for AllocationPriorityDate
dfPOD['claim_char'] = df['claim_char']  #for AllocationTypeCV
dfPOD['use_code_description'] = df['use_code_description']  #for BeneficialUseCategory
dfPOD['wris_link'] = df['wris_link']  #for WaterAllocationNativeURL

print(len(dfPOD))
dfPOD

189822


Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationOwner,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_IrrigatedAcreage,snp_id,priority_date,claim_char,use_code_description,wris_link,in_AllocationCropDutyAmount
0,FORMOSA 1 ADIT,Groundwater,42.855813,-123.382877,POD,21755_1,POD6909,well,0.040,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
1,SILVER BUTTE 1 ADIT,Groundwater,42.854551,-123.383487,POD,21755_2,POD6910,well,0.005,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
2,A WELL,Groundwater,42.682269,-123.629420,POD,23327_1,POD9355,well,0.110,,DUTCH MINING LLC,12/31,1/1,,23327,1994-05-06,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
3,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.670,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
4,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.110,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189817,RUNOFF,Storage,45.692059,-123.079106,POD,202390_1,POD344799,winter runoff,,1.5375,ERIC ANTHONY URSTADT,12/31,12/1,,202390,2017-09-05,,WILDLIFE,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
189818,RUNOFF,Storage,45.692059,-123.079106,POD,202390_1,POD344799,winter runoff,,1.5375,ERIC ANTHONY URSTADT,4/30,1/1,,202390,2017-09-05,,WILDLIFE,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
189819,RUNOFF,Storage,45.692059,-123.079106,POD,202390_1,POD344799,winter runoff,,4.1000,ERIC ANTHONY URSTADT,11/30,5/1,,202390,2017-09-05,,WILDLIFE,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
189820,A WELL,Storage,45.307916,-122.462393,POD,33683_2,POD34240,well,,0.2187,ALTON MADDOX,6/30,1/1,,33683,2000-06-10,,WILDLIFE,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,


## Place of Use Data

In [15]:
# Dataframe creation
dfPOUfile = "ORwr_v_pou_public_input.csv"  # contains POU info
df = pd.read_csv(dfPOUfile, encoding = "ISO-8859-1")
print(len(df))
df.head(3)

109576


  df = pd.read_csv(dfPOUfile, encoding = "ISO-8859-1")


Unnamed: 0,OID_,snp_id,agency,app_char,app_nbr,cert_nbr,claim_char,claim_nbr,decree_title,delta_size,feature_quality_code,last_updt_date,Latitude,Longitude,name_company,name_first,name_last,permit_char,permit_nbr,pou_display,pou_display_short,pou_use_id,priority_date,rec_creation_date,remarks,Shape_Area,Shape_Length,supplemental,technician_initials,transfer_nbr,use_category,use_code,use_code_description,wr_type,wris_acres,wris_link
0,1,5135,OWRD,P,81441.0,,,,,,30.0,7/22/2005 8:02,43.736859,-118.364476,DASH W BAR RANCH,TERRY,WILLIAMS,,,App: P 81441 * LV,P 81441,4124,8/7/1996 0:00,6/30/2005 0:00,PLACED USING DRG,2.91985e-06,0.008945,0,MW,,8,LV,LIVESTOCK,ST,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
1,2,6333,OWRD,P,82980.0,,,,,,30.0,4/11/2016 10:48,42.441488,-123.041443,,SOFIA,PARKER,,,App: P 82980 * ST,P 82980,5886,1/7/1997 0:00,4/11/2016 0:00,PLACED USING 2014 IMAGERY,3.5859e-07,0.004559,0,BRW,,M,ST,STORAGE,ST,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
2,3,17008,OWRD,,,,,,East Mud Creek,,,12/1/1997 0:00,45.992923,-118.440767,,DONALD,RENCKEN,,,Inchoate: T 4213 CF (REG) * IR,T 4213,7696,12/31/1892 0:00:00,12/1/1997 0:00,0 CD 31,4.0931e-06,0.008191,0,MIGRT,T 4213,3,IR,IRRIGATION,SW,10.0,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...


In [16]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = WSTypeDict[String1]
        except:
            outList = "Unspecified"

    return outList

df['in_WaterSourceTypeCV'] = df.apply(lambda row: assignWaterSourceTypeCV(row['wr_type']), axis=1)

In [17]:
# Changing datatype of used date fields. 
df['priority_date'] = pd.to_datetime(df['priority_date'], errors = 'coerce')
df['priority_date'] = pd.to_datetime(df["priority_date"].dt.strftime('%m/%d/%Y'))

In [18]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = "Unspecified"
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    return outString

df['in_AllocationOwner'] = df.apply(lambda row: assignownerNameORCompany(row['name_company'], row['name_first'], row['name_last']), axis=1)

In [19]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    else:
        outString = val
    return outString

df['use_code_description'] = df.apply(lambda row: fixBenUse(row['use_code_description']), axis=1)

In [20]:
# Creating the output Dataframe for PODs.

dfPOU = pd.DataFrame(columns=columnsList, index=df.index)

# Water Source
dfPOU["in_WaterSourceName"] = "Unspecified"
dfPOU['in_WaterSourceTypeCV'] = df['in_WaterSourceTypeCV']

# Site
dfPOU["in_Latitude"] = df['Latitude']
dfPOU["in_Longitude"] = df['Longitude']
dfPOU["in_PODorPOUSite"] = "POU"
dfPOU["in_SiteName"] = "Unspecified"
dfPOU["in_SiteNativeID"] = "POU" + df['pou_use_id'].astype(str)
dfPOU["in_SiteTypeCV"] = "Unspecified"

# Allocation
dfPOU["in_AllocationFlow_CFS"] = ""
dfPOD["in_AllocationCropDutyAmount "] = ""
dfPOU['in_AllocationOwner'] =  df['in_AllocationOwner']
dfPOU["in_AllocationTimeframeEnd"] = ""
dfPOU["in_AllocationTimeframeStart"] = ""
dfPOU["in_AllocationVolume_AF"] = ""
dfPOU["in_IrrigatedAcreage"] = df['wris_acres'].astype(float)

# Shared Elements
dfPOU['snp_id'] = df['snp_id']  #for AllocationNativeID
dfPOU['priority_date'] = df['priority_date']  #for AllocationPriorityDate
dfPOU['claim_char'] = df['claim_char']  #for AllocationTypeCV
dfPOU['use_code_description'] = df['use_code_description']  #for BeneficialUseCategory
dfPOU['wris_link'] = df['wris_link']  #for WaterAllocationNativeURL

print(len(dfPOU))
dfPOU

109576


Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationOwner,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_IrrigatedAcreage,snp_id,priority_date,claim_char,use_code_description,wris_link
0,Unspecified,Storage,43.736859,-118.364476,POU,Unspecified,POU4124,Unspecified,,,"TERRY WILLIAMS, DASH W BAR RANCH",,,,5135,1996-08-07,,LIVESTOCK,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
1,Unspecified,Storage,42.441488,-123.041443,POU,Unspecified,POU5886,Unspecified,,,SOFIA PARKER,,,,6333,1997-01-07,,STORAGE,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
2,Unspecified,Surface Water,45.992923,-118.440767,POU,Unspecified,POU7696,Unspecified,,,DONALD RENCKEN,,,10.00,17008,1892-12-31,,IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
3,Unspecified,Surface Water,45.993822,-118.439625,POU,Unspecified,POU7697,Unspecified,,,DONALD RENCKEN,,,,17008,1892-12-31,,LIVESTOCK,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
4,Unspecified,Surface Water,44.235307,-121.551262,POU,Unspecified,POU7713,Unspecified,,,ROY M RUNCO,,,38.50,17022,1900-12-31,,IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109571,Unspecified,Groundwater,42.000615,-121.519010,POU,Unspecified,POU269184,Unspecified,,,MLM PROPERTY LLC,,,28.50,208328,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
109572,Unspecified,Groundwater,42.098688,-121.705149,POU,Unspecified,POU269187,Unspecified,,,KUCERA ENTERPRISES,,,26.00,208330,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
109573,Unspecified,Groundwater,42.054974,-121.656443,POU,Unspecified,POU269188,Unspecified,,,DAVID HAMEL,,,362.20,208331,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...
109574,Unspecified,Groundwater,42.192755,-121.927985,POU,Unspecified,POU269194,Unspecified,,,HUNTER MOYLES,,,144.07,208337,2021-04-12,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...


## Concatenate POD and POU data

In [21]:
# Concatenate
frames = [dfPOD, dfPOU]
dfout = pd.concat(frames)
print(len(dfout))
dfout.head(3)

299398


Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationOwner,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_IrrigatedAcreage,snp_id,priority_date,claim_char,use_code_description,wris_link,in_AllocationCropDutyAmount
0,FORMOSA 1 ADIT,Groundwater,42.855813,-123.382877,POD,21755_1,POD6909,well,0.04,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
1,SILVER BUTTE 1 ADIT,Groundwater,42.854551,-123.383487,POD,21755_2,POD6910,well,0.005,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,
2,A WELL,Groundwater,42.682269,-123.62942,POD,23327_1,POD9355,well,0.11,,DUTCH MINING LLC,12/31,1/1,,23327,1994-05-06,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,


## Custom WaDE Elements due to missing info

In [22]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEOR_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = dfout['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceName'] == A) & 
                                       (dfWaterSourceNativeID['in_WaterSourceTypeCV'] == B), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
dfout

Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationOwner,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_IrrigatedAcreage,snp_id,priority_date,claim_char,use_code_description,wris_link,in_AllocationCropDutyAmount,in_WaterSourceNativeID
0,FORMOSA 1 ADIT,Groundwater,42.855813,-123.382877,POD,21755_1,POD6909,well,0.04,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS1
1,SILVER BUTTE 1 ADIT,Groundwater,42.854551,-123.383487,POD,21755_2,POD6910,well,0.005,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS2
2,A WELL,Groundwater,42.682269,-123.629420,POD,23327_1,POD9355,well,0.11,,DUTCH MINING LLC,12/31,1/1,,23327,1994-05-06,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
3,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.67,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
4,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.11,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109571,Unspecified,Groundwater,42.000615,-121.519010,POU,Unspecified,POU269184,Unspecified,,,MLM PROPERTY LLC,,,28.5,208328,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109572,Unspecified,Groundwater,42.098688,-121.705149,POU,Unspecified,POU269187,Unspecified,,,KUCERA ENTERPRISES,,,26.0,208330,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109573,Unspecified,Groundwater,42.054974,-121.656443,POU,Unspecified,POU269188,Unspecified,,,DAVID HAMEL,,,362.2,208331,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109574,Unspecified,Groundwater,42.192755,-121.927985,POU,Unspecified,POU269194,Unspecified,,,HUNTER MOYLES,,,144.07,208337,2021-04-12,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875


## Shapefile Data
- For attaching gemetry to csv inputs.

In [23]:
# PoU Shapefile Data
# Shapefile input
ShapeFileInput = gpd.read_file('shapefile/OR_PoU.shp')
dfPoUshapetemp = pd.DataFrame(ShapeFileInput)
dfPoUshapetemp.head(3)

Unnamed: 0,pou_displa,pou_disp_1,wris_link,snp_id,pou_use_id,app_char,app_nbr,permit_cha,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_tit,transfer_n,wr_type,name_last,name_first,name_compa,use_code,use_catego,use_code_d,priority_d,supplement,wris_acres,technician,agency,rec_creati,last_updt_,feature_qu,delta_size,remarks,Shape_Leng,Shape_Area,geometry
0,App: P 81441 * LV,P 81441,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,5135,4124,P,81441,,0,0,,0,,,ST,WILLIAMS,TERRY,DASH W BAR RANCH,LV,8,LIVESTOCK,1996-08-07,0,0.0,MW,OWRD,2005-06-30,2005-07-22,30,0.0,PLACED USING DRG,2654.809276,281203.13083,"POLYGON ((-118.36564 43.73765, -118.36567 43.7..."
1,App: P 82980 * ST,P 82980,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,6333,5886,P,82980,,0,0,,0,,,ST,PARKER,SOFIA,,ST,M,STORAGE,1997-01-07,0,0.0,BRW,OWRD,2016-04-11,2016-04-11,30,0.0,PLACED USING 2014 IMAGERY,1437.654166,35294.340067,"POLYGON ((-123.04147 42.44192, -123.04144 42.4..."
2,Inchoate: T 4213 CF (REG) * IR,T 4213,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,17008,7696,,0,,0,0,,0,East Mud Creek,T 4213,SW,RENCKEN,DONALD,,IR,3,IRRIGATION,1892-12-31,0,10.0,MIGRT,OWRD,1997-12-01,1997-12-01,0,0.0,0 CD 31,2426.48477,379563.116664,"POLYGON ((-118.43940 45.99213, -118.44205 45.9..."


In [24]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['pou_use_id'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POU4124,"POLYGON ((-118.36564 43.73765, -118.36567 43.7..."
1,POU5886,"POLYGON ((-123.04147 42.44192, -123.04144 42.4..."
2,POU7696,"POLYGON ((-118.43940 45.99213, -118.44205 45.9..."


## The Output

In [25]:
print(len(dfout))
dfout

299398


Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SiteTypeCV,in_AllocationFlow_CFS,in_AllocationVolume_AF,in_AllocationOwner,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_IrrigatedAcreage,snp_id,priority_date,claim_char,use_code_description,wris_link,in_AllocationCropDutyAmount,in_WaterSourceNativeID
0,FORMOSA 1 ADIT,Groundwater,42.855813,-123.382877,POD,21755_1,POD6909,well,0.04,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS1
1,SILVER BUTTE 1 ADIT,Groundwater,42.854551,-123.383487,POD,21755_2,POD6910,well,0.005,,FORMOSA EXPLORATION INC.,12/31,1/1,,21755,1989-11-21,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS2
2,A WELL,Groundwater,42.682269,-123.629420,POD,23327_1,POD9355,well,0.11,,DUTCH MINING LLC,12/31,1/1,,23327,1994-05-06,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
3,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.67,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
4,A WELL,Groundwater,44.301041,-121.654631,POD,23390_1,POD9480,well,0.11,,"JEFF STEYAERT, KNIFE RIVER CORP.",12/31,1/1,,23390,1995-01-18,,MINING,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109571,Unspecified,Groundwater,42.000615,-121.519010,POU,Unspecified,POU269184,Unspecified,,,MLM PROPERTY LLC,,,28.5,208328,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109572,Unspecified,Groundwater,42.098688,-121.705149,POU,Unspecified,POU269187,Unspecified,,,KUCERA ENTERPRISES,,,26.0,208330,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109573,Unspecified,Groundwater,42.054974,-121.656443,POU,Unspecified,POU269188,Unspecified,,,DAVID HAMEL,,,362.2,208331,2021-04-05,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875
109574,Unspecified,Groundwater,42.192755,-121.927985,POU,Unspecified,POU269194,Unspecified,,,HUNTER MOYLES,,,144.07,208337,2021-04-12,,SUPPLEMENTAL IRRIGATION,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,WaDEOR_WS16875


In [26]:
#Exporting to Finished File
dfout.to_csv('P_OregonMaster.csv', index=False)
dfPoUshape.to_csv('P_OregonGeometry.csv', index=False) # The output geometry.