# Pre-processing Oregon Allocation data for WaDE upload.

Purpose:  To pre-process the Oregon data into one master file for simple DataFrame creation and extraction

Useful Links to Data:

- Data Avalaible (use 'Statewide Water Right Spatial Data with Metadata'): https://www.oregon.gov/OWRD/access_Data/Pages/Data.aspx

- POD metadata: https://arcgis.wrd.state.or.us/data/wr_pod_metadata.pdfPOD

- POU metadata: https://arcgis.wrd.state.or.us/data/wr_pou_metadata.pdfPOD

In [1]:
#Needed Libararies

# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# API retrieval
import requests
import json

# Cleanup
import re
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x) # suppress scientific notation in Pandas

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Oregon/WaterAllocation/RawInputData"
os.chdir(workingDir)

## Point of Diversoin Data

In [3]:
# import pod data
inputFile = 'shapefile/wr_v_pod_public.zip'
dfinPOD = gpd.read_file(inputFile).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfinPOD = dfinPOD.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOD:
    dfinPOD['WaDEUUID'] = "orD" + dfinPOD.index.astype(str)
    dfinPOD.to_csv('wr_v_pod_public.zip', compression=dict(method='zip', archive_name='wr_v_pod_public.csv'), index=False)

dfinPOD = dfinPOD.drop_duplicates().reset_index(drop=True)
print(len(dfinPOD))
dfinPOD.head()

195986


Unnamed: 0,pod_displa,pod_disp_1,wris_link,snp_id,pod_locati,pod_use_id,app_char,app_nbr,permit_cha,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_tit,transfer_n,wr_type,name_last,name_first,name_compa,pod_nbr,pod_char,source_typ,use_code,use_catego,use_code_d,priority_d,duty,rate_cfs,rate_cfs_e,max_rate_c,acre_feet,acre_feet_,max_rate_a,source,tributary_,streamcode,stream_nam,supplement,begin_mont,begin_day,end_month,end_day,technician,agency,rec_creati,last_updt_,feature_qu,remarks,Latitude,Longitude,WaDEUUID
0,Permit: G 10961 * MI,G 10961,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,21755,6909,26859,G,11987,G,10961,0,,0,,,GW,,,FORMOSA EXPLORATION INC.,1,,WE,MI,0,MINING,1989-11-21,0.0,0.04,0,0.04,0.0,0,0.0,FORMOSA 1 ADIT,CANYON CREEK,16125009000400430220,UNN STR > MIDDLE CR,0,1,1,12,31,MIGRT,OWRD,1996-06-01,1996-06-01,0,0 G 10961 1,42.85582,-123.3829,orD0
1,Permit: G 10961 * MI,G 10961,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,21755,6910,26860,G,11987,G,10961,0,,0,,,GW,,,FORMOSA EXPLORATION INC.,2,,WE,MI,0,MINING,1989-11-21,0.0,0.005,0,0.005,0.0,0,0.0,SILVER BUTTE 1 ADIT,CANYON CREEK,16125009000400430220,UNN STR > MIDDLE CR,0,1,1,12,31,MIGRT,OWRD,1996-06-01,1996-06-01,0,0 G 10961 2,42.85455,-123.38351,orD1
2,Permit: G 12684 * MI,G 12684,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,23327,9355,29682,G,13684,G,12684,0,,0,,,GW,,,BENTON MINES INC.,1,,WE,MI,0,MINING,1994-05-06,0.0,0.11,1,0.22,0.0,0,0.0,A WELL,ROGUE RIVER,15168010500040,DRAIN CR > WHISKY CR,0,1,1,12,31,MIGRT,OWRD,2001-06-01,2001-06-01,0,0 G 12684 1,42.68227,-123.62944,orD2
3,Permit: G 12750 * MI,G 12750,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,23390,9480,29835,G,13944,G,12750,0,,0,,,GW,STEYAERT,JEFF,KNIFE RIVER CORP.,1,,WE,MI,0,MINING,1995-01-18,0.0,0.67,0,0.67,0.0,0,0.0,A WELL,TROUT CREEK BASIN,5198012200060050,TROUT CR > INDIAN FORD CR,0,1,1,12,31,MIGRT,OWRD,2001-05-01,2001-05-01,0,0 G 12750 1,44.30105,-121.65465,orD3
4,Permit: G 12750 * MI,G 12750,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,23390,9480,29836,G,13944,G,12750,0,,0,,,GW,STEYAERT,JEFF,KNIFE RIVER CORP.,1,,WE,MI,0,MINING,1995-01-18,0.0,0.11,0,0.11,0.0,0,0.0,A WELL,TROUT CREEK BASIN,5198012200060050,TROUT CR > INDIAN FORD CR,0,1,1,12,31,MIGRT,OWRD,2001-05-01,2001-05-01,0,0 G 12750 1,44.30105,-121.65465,orD4


In [4]:
# For creating SiteTypeCV
STCVDict = {
"LK" : "lake",
"DR" : "drain",
"SP" : "spring",
"ST" : "stream",
"SL" : "slough",
"WW" : "waste water",
"WE" : "well",
"WR" : "winter runoff",
"SM" : "sump",
"PD" : "pond",
"RS" : "reservoir",
"DT" : "ditch",
"SE" : "sewage effluent",
"CN" : "canal"}
def assignSiteTypeCV(colrowValue):
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = ""
    else:
        String1 = colrowValue.strip()
        try:
            outList = STCVDict[String1]
        except:
            outList = ""
    return outList

dfinPOD['in_SiteTypeCV'] = dfinPOD.apply(lambda row: assignSiteTypeCV(row['source_typ']), axis=1)
dfinPOD['in_SiteTypeCV'].unique()

array(['well', 'stream', 'spring', 'reservoir', 'sump', 'winter runoff',
       'lake', 'waste water', 'drain', 'pond', 'ditch', 'canal',
       'sewage effluent', 'slough'], dtype=object)

In [5]:
# Creating Ownername.
# Concatenating first and last name of individual.
# Determining if company is available, split string.
# combine together for output.

import re

# first & last name funciton
def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = " ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


# Business name and Concatenate
def assignownerNameORCompany(buisName, fName, lName):
    
    # Concatenating First and Last name together.
    frilasName = assignownerName(fName, lName)
    
    # Clearn Company Name Entry
    if buisName == "" or pd.isnull(buisName):
        outBuisString = ""
    else:
        buisName = str(buisName).strip()
        if ";" in buisName:
            xList = buisName.split(";")
            for index, item in enumerate(xList):
                if "," in item:
                    list1 = item.split(",")
                    list1.reverse()
                    xList[index] = "".join(list1)
                else:
                    xList[index] = item
            outBuisString = ",".join(xList)
        elif "," in buisName:
            xList = buisName.split(",")
            outBuisString = str(xList[0]).strip() + "," + str(xList[1]).strip()
        else:
            outBuisString = buisName
    
    #Concatenating together, create outString
    if frilasName == ""  or pd.isnull(frilasName):
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = ""
        else:
            outString = outBuisString
    else:
        if outBuisString == ""  or pd.isnull(outBuisString):
            outString = frilasName
        else:
            outString = frilasName + ", " + outBuisString
        
    outString = outString.strip()
    outString = re.sub("[$@&.;,/\)(-]", "", outString).replace("  ", " ").title().strip()
    
    return outString

dfinPOD['in_AllocationOwner'] = dfinPOD.apply(lambda row: assignownerNameORCompany(row['name_compa'], row['name_first'], row['name_last']), axis=1)
dfinPOD['in_AllocationOwner'].unique()

  outString = re.sub("[$@&.;,/\)(-]", "", outString).replace("  ", " ").title().strip()


array(['Formosa Exploration Inc', 'Benton Mines Inc',
       'Jeff Steyaert Knife River Corp', ...,
       'Paul Knopp Northwest Floriculture', 'Pacific Farms Co Llc',
       'Mildred Hicks'], dtype=object)

In [6]:
#Determining AllocationTimeframe Start & End time for each site.

def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString1)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

dfinPOD['in_AllocationTimeframeStart'] = dfinPOD.apply(lambda row: formatDateString(row['begin_mont'], row['begin_day']), axis=1)
dfinPOD['in_AllocationTimeframeEnd'] = dfinPOD.apply(lambda row: formatDateString(row['end_month'], row['end_day']), axis=1)

In [7]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOD['WaDEUUID']

# Method Info
df['in_MethodUUID'] = "ORwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "ORwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "ORwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOD['source']
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = dfinPOD['wr_type']

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOD['Latitude']
df['in_Longitude'] = dfinPOD['Longitude']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POD"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POD" + dfinPOD['pod_locati'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = dfinPOD['in_SiteTypeCV']
df['in_StateCV'] = "OR"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = dfinPOD['duty']
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOD['rate_cfs']
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOD['snp_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_AllocationOwner'] = dfinPOD['in_AllocationOwner']
df['in_AllocationPriorityDate'] = dfinPOD['priority_d']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOD['in_AllocationTimeframeEnd']
df['in_AllocationTimeframeStart'] = dfinPOD['in_AllocationTimeframeStart']
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOD['acre_feet']
df['in_BeneficialUseCategory'] = dfinPOD['use_code_d']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = ""
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOD['wris_link']  #for WaterAllocationNativeURL

outPOD = df.copy()
outPOD = outPOD.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outPOD))
outPOD.head()

195986


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,orD0,ORwr_M1,ORwr_V1,ORwr_O1,,,,FORMOSA 1 ADIT,,GW,,,,4326,,,,42.85582,-123.3829,,,POD,,POD6909,,well,OR,,,,,,,,0.0,,0.04,,21755,Formosa Exploration Inc,1989-11-21,,12/31,1/1,,0.0,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
1,orD1,ORwr_M1,ORwr_V1,ORwr_O1,,,,SILVER BUTTE 1 ADIT,,GW,,,,4326,,,,42.85455,-123.38351,,,POD,,POD6910,,well,OR,,,,,,,,0.0,,0.005,,21755,Formosa Exploration Inc,1989-11-21,,12/31,1/1,,0.0,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
2,orD2,ORwr_M1,ORwr_V1,ORwr_O1,,,,A WELL,,GW,,,,4326,,,,42.68227,-123.62944,,,POD,,POD9355,,well,OR,,,,,,,,0.0,,0.11,,23327,Benton Mines Inc,1994-05-06,,12/31,1/1,,0.0,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
3,orD3,ORwr_M1,ORwr_V1,ORwr_O1,,,,A WELL,,GW,,,,4326,,,,44.30105,-121.65465,,,POD,,POD9480,,well,OR,,,,,,,,0.0,,0.67,,23390,Jeff Steyaert Knife River Corp,1995-01-18,,12/31,1/1,,0.0,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
4,orD4,ORwr_M1,ORwr_V1,ORwr_O1,,,,A WELL,,GW,,,,4326,,,,44.30105,-121.65465,,,POD,,POD9480,,well,OR,,,,,,,,0.0,,0.11,,23390,Jeff Steyaert Knife River Corp,1995-01-18,,12/31,1/1,,0.0,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...


## Place of Use Data

In [8]:
# Input File - PoU Shapefile Data
# export dataframe as zipped csv
pouInput = 'shapefile/wr_v_pou_public.zip'
dfinPOU = gpd.read_file(pouInput).replace(np.nan, "").replace("nan,nan", "") #geodataframe read
dfinPOU = dfinPOU.drop(['geometry'], axis=1)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfinPOU:
    dfinPOU['WaDEUUID'] = "orU" + dfinPOU.index.astype(str)
    dfinPOU.to_csv('wr_v_pou_public.zip', compression=dict(method='zip', archive_name='wr_v_pou_public.csv'), index=False)

print(len(dfinPOU))
dfinPOU.head(1)

110623


Unnamed: 0,pou_displa,pou_disp_1,wris_link,snp_id,pou_use_id,app_char,app_nbr,permit_cha,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_tit,transfer_n,wr_type,name_last,name_first,name_compa,use_code,use_catego,use_code_d,priority_d,supplement,wris_acres,technician,agency,rec_creati,last_updt_,feature_qu,delta_size,remarks,cent_Longi,cent_Latit,Shape_Leng,Shape_Area,WaDEUUID
0,App: P 74762 * LV,P 74762,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,2306,1075,P,74762,,0,0,,0,,,ST,GARREN,ROY,,LV,8,LIVESTOCK,1994-10-13,0,0.0,DAM,OWRD,2022-02-02,2022-02-02,30,0.0,PLACED USING 2018 IMAGERY,-123.36539,43.07279,0.00324,0.0,orU0


In [9]:
# POU data is missing key inputs, will combine with POD data to fill in missing gaps.
dfinPOU['snp_id'] = dfinPOU['snp_id'].replace("", 0).fillna(0).astype(str).str.strip()  #for AllocationNativeID

dfinPOU = pd.merge(dfinPOU, outPOD, left_on='snp_id', right_on='in_AllocationNativeID', how='left')
print(len(dfinPOU))
dfinPOU.head()

321972


Unnamed: 0,pou_displa,pou_disp_1,wris_link,snp_id,pou_use_id,app_char,app_nbr,permit_cha,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_tit,transfer_n,wr_type,name_last,name_first,name_compa,use_code,use_catego,use_code_d,priority_d,supplement,wris_acres,technician,agency,rec_creati,last_updt_,feature_qu,delta_size,remarks,cent_Longi,cent_Latit,Shape_Leng,Shape_Area,WaDEUUID_x,WaDEUUID_y,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,App: P 74762 * LV,P 74762,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,2306,1075,P,74762,,0,0,,0,,,ST,GARREN,ROY,,LV,8,LIVESTOCK,1994-10-13,0,0.0,DAM,OWRD,2022-02-02,2022-02-02,30,0.0,PLACED USING 2018 IMAGERY,-123.36539,43.07279,0.00324,0.0,orU0,orD121465,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326.0,,,,43.07269,-123.36562,,,POD,,POD17850,,winter runoff,OR,,,,,,,,0.0,,0.0,,2306,Roy Garren,1994-10-13,,12/31,1/1,,6.9,LIVESTOCK,,,,,,0.0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
1,App: P 77106 * LV,P 77106,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,2813,1590,P,77106,,0,0,,0,,,ST,HOPKINS,MILDRED,JERRY BARRY TRUST,LV,8,LIVESTOCK,1994-12-30,0,0.0,DAM,OWRD,2024-05-31,2024-05-31,15,0.0,ADDED BY POLY-POINT TOOL,-122.31927,42.03179,0.00062,0.0,orU1,orD121466,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326.0,,,,42.03179,-122.31927,,,POD,,POD19610,,winter runoff,OR,,,,,,,,0.0,,0.0,,2813,Mildred Hopkins Jerry Barry Trust,1994-12-30,,12/31,1/1,,8.83,LIVESTOCK,,,,,,0.0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
2,App: P 81441 * LV,P 81441,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,5135,4124,P,81441,,0,0,,0,,,ST,WILLIAMS,TERRY,DASH W BAR RANCH,LV,8,LIVESTOCK,1996-08-07,0,0.0,MW,OWRD,2005-06-30,2005-07-22,30,0.0,PLACED USING DRG,-118.36448,43.73686,0.00894,0.0,orU2,orD121467,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326.0,,,,43.73574,-118.36362,,,POD,,POD26615,,winter runoff,OR,,,,,,,,0.0,,0.0,,5135,Terry Williams Dash W Bar Ranch,1996-08-07,,12/31,1/1,,2.4,LIVESTOCK,,,,,,0.0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
3,App: P 81441 * LV,P 81441,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,5135,4124,P,81441,,0,0,,0,,,ST,WILLIAMS,TERRY,DASH W BAR RANCH,LV,8,LIVESTOCK,1996-08-07,0,0.0,MW,OWRD,2005-06-30,2005-07-22,30,0.0,PLACED USING DRG,-118.36448,43.73686,0.00894,0.0,orU2,orD185371,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326.0,,,,43.73574,-118.36362,,,POD,,POD26615,,winter runoff,OR,,,,,,,,0.0,,0.0,,5135,Terry Williams Dash W Bar Ranch,1996-08-07,,12/31,1/1,,2.4,WILDLIFE,,,,,,0.0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
4,App: P 82570 * FP,P 82570,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,5923,5276,P,82570,,0,0,,0,,,ST,,,U.S. BUREAU OF LAND MANAGEMENT,FP,M,FIRE PROTECTION,1997-01-28,0,0.0,CKP,OWRD,2024-03-18,2024-05-06,30,0.0,PLACED USING 2022 IMAGERY,-122.65769,42.29338,0.04222,0.0,orU3,orD121468,ORwr_M1,ORwr_V1,ORwr_O1,,,,ARMSTRONG GULCH,,ST,,,,4326.0,,,,42.18482,-122.96264,,,POD,,POD28218,,stream,OR,,,,,,,,0.0,,0.0,,5923,Us Bureau Of Land Management,1997-01-28,,12/31,1/1,,0.005,LIVESTOCK,,,,,,0.0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...


In [10]:
# # check if dupbliate siteNative IDs
# dfinPOU['pou_use_id'] = np.where(dfinPOU['pou_use_id'].duplicated(keep=False),
#                                  dfinPOU['pou_use_id'].astype(str).str.cat(dfinPOU.groupby('pou_use_id').cumcount().add(1).astype(str), sep='_'),
#                                  dfinPOU['pou_use_id'])

# print(dfinPOU['pou_use_id'].nunique())
# dfinPOU['pou_use_id'].unique()

In [11]:
# create output POD dataframe
df = pd.DataFrame()

# Data Assessment UUID
df['WaDEUUID'] = dfinPOU['WaDEUUID_x']

# Method Info
df['in_MethodUUID'] = "ORwr_M1"

# Variable Info
df['in_VariableSpecificUUID'] = "ORwr_V1"

# Organization Info
df['in_OrganizationUUID'] = "ORwr_O1"

# WaterSource Info
df['in_Geometry'] = ""
df['in_GNISFeatureNameCV'] = ""
df['in_WaterQualityIndicatorCV'] = ""
df['in_WaterSourceName'] = dfinPOU['in_WaterSourceName'] # from POD
df['in_WaterSourceNativeID'] = ""
df['in_WaterSourceTypeCV'] = dfinPOU['in_WaterSourceTypeCV'] # from POD

# Site Info
df['in_CoordinateAccuracy'] = ""
df['in_CoordinateMethodCV'] = ""
df['in_County'] = ""
df['in_EPSGCodeCV'] = 4326
df['in_Geometry'] = ""
df['in_GNISCodeCV'] = ""
df['in_HUC12'] = ""
df['in_HUC8'] = ""
df['in_Latitude'] = dfinPOU['cent_Latit']
df['in_Longitude'] = dfinPOU['cent_Longi']
df['in_NHDNetworkStatusCV'] = ""
df['in_NHDProductCV'] = ""
df['in_PODorPOUSite'] = "POU"
df['in_SiteName'] = ""
df['in_SiteNativeID'] = "POU" + dfinPOU['pou_use_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_SitePoint'] = ""
df['in_SiteTypeCV'] = ""
df['in_StateCV'] = "OR"
df['in_USGSSiteID'] = ""

# AllocationAmount Info
df['in_AllocationApplicationDate'] = ""
df['in_AllocationAssociatedConsumptiveUseSiteIDs'] = ""
df['in_AllocationAssociatedWithdrawalSiteIDs'] = ""
df['in_AllocationBasisCV'] = ""
df['in_AllocationChangeApplicationIndicator'] = ""
df['in_AllocationCommunityWaterSupplySystem'] = ""
df['in_AllocationCropDutyAmount'] = dfinPOU['in_AllocationCropDutyAmount'] # from POD
df['in_AllocationExpirationDate'] = ""
df['in_AllocationFlow_CFS'] = dfinPOU['in_AllocationFlow_CFS'] # From POD
df['in_AllocationLegalStatusCV'] = ""
df['in_AllocationNativeID'] =  dfinPOU['snp_id'].replace("", 0).fillna(0).astype(int).astype(str).str.strip()
df['in_AllocationOwner'] = dfinPOU['in_AllocationOwner'] # from POD
df['in_AllocationPriorityDate'] = dfinPOU['priority_d']
df['in_AllocationSDWISIdentifierCV'] = ""
df['in_AllocationTimeframeEnd'] = dfinPOU['in_AllocationTimeframeEnd'] # from POD
df['in_AllocationTimeframeStart'] = dfinPOU['in_AllocationTimeframeStart'] # from POD
df['in_AllocationTypeCV'] = ""
df['in_AllocationVolume_AF'] = dfinPOU['in_AllocationVolume_AF']  # from POD
df['in_BeneficialUseCategory'] = dfinPOU['use_code_d']
df['in_CommunityWaterSupplySystem'] = ""
df['in_CropTypeCV'] = ""
df['in_CustomerTypeCV'] = ""
df['in_DataPublicationDate'] = ""
df['in_DataPublicationDOI'] = ""
df['in_ExemptOfVolumeFlowPriority'] = 0
df['in_GeneratedPowerCapacityMW'] = ""
df['in_IrrigatedAcreage'] = dfinPOU['wris_acres']
df['in_IrrigationMethodCV'] = ""
df['in_LegacyAllocationIDs'] = ""
df['in_OwnerClassificationCV'] = ""
df['in_PopulationServed'] = ""
df['in_PowerType'] = ""
df['in_PrimaryBeneficialUseCategory'] = ""
df['in_SDWISIdentifierCV'] = ""
df['in_WaterAllocationNativeURL'] = dfinPOU['in_WaterAllocationNativeURL']  #for WaterAllocationNativeURL

outPOU = df.copy()
outPOU = outPOU.drop_duplicates().reset_index(drop=True).replace(np.nan, '')
print(len(outPOU))
outPOU.head()

220655


Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,orU0,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326,,,,43.07279,-123.36539,,,POU,,POU1075,,,OR,,,,,,,,0.0,,0.0,,2306,Roy Garren,1994-10-13,,12/31,1/1,,6.9,LIVESTOCK,,,,,,0,,0.0,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
1,orU1,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326,,,,42.03179,-122.31927,,,POU,,POU1590,,,OR,,,,,,,,0.0,,0.0,,2813,Mildred Hopkins Jerry Barry Trust,1994-12-30,,12/31,1/1,,8.83,LIVESTOCK,,,,,,0,,0.0,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
2,orU2,ORwr_M1,ORwr_V1,ORwr_O1,,,,RUNOFF,,ST,,,,4326,,,,43.73686,-118.36448,,,POU,,POU4124,,,OR,,,,,,,,0.0,,0.0,,5135,Terry Williams Dash W Bar Ranch,1996-08-07,,12/31,1/1,,2.4,LIVESTOCK,,,,,,0,,0.0,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
3,orU3,ORwr_M1,ORwr_V1,ORwr_O1,,,,ARMSTRONG GULCH,,ST,,,,4326,,,,42.29338,-122.65769,,,POU,,POU5276,,,OR,,,,,,,,0.0,,0.0,,5923,Us Bureau Of Land Management,1997-01-28,,12/31,1/1,,0.005,FIRE PROTECTION,,,,,,0,,0.0,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
4,orU3,ORwr_M1,ORwr_V1,ORwr_O1,,,,ARMSTRONG GULCH,,ST,,,,4326,,,,42.29338,-122.65769,,,POU,,POU5276,,,OR,,,,,,,,0.0,,0.0,,5923,Us Bureau Of Land Management,1997-01-28,,12/31,1/1,,0.021,FIRE PROTECTION,,,,,,0,,0.0,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...


## Concatenate POD and POU data

In [12]:
# Concatenate dataframes
frames = [outPOD, outPOU]
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

416641


## Custom WaDE Elements due to missing info

In [13]:
# For creating WaterSourceTypeCV
WSTypeDict = {
    "ST": "Storage",
    "GW": "Groundwater",
    "SW": "Surface Water"}
def assignWaterSourceTypeCV(colrowValue):
    colrowValue = str(colrowValue).strip()
    if colrowValue == "" or pd.isnull(colrowValue):
        outList = "WaDE Blank"
    else:
        try:
            outList = WSTypeDict[colrowValue]
        except:
            outList = "WaDE Blank"
    return outList

outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: assignWaterSourceTypeCV(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Storage', 'WaDE Blank'],
      dtype=object)

In [14]:
#Fixing Beneficial Uses PRIMARY_PURPOSE

def fixBenUse(val):
    val = str(val).strip()
    if val == "IRRIGATION, LIVESTOCK AND DOMESTIC":
        outString = "IRRIGATION, LIVESTOCK, DOMESTIC"
    elif val == "IRRIGATION AND LIVESTOCK":
        outString = "IRRIGATION, LIVESTOCK"
    elif val == "LIVESTOCK AND WILDLIFE":
        outString = "LIVESTOCK, WILDLIFE"
    elif val == "DOMESTIC AND LIVESTOCK":
        outString = "DOMESTIC, LIVESTOCK"
    elif val == "IRRIGATION AND DOMESTIC":
        outString = "IRRIGATION, DOMESTIC"
    elif val == "HUMAN CONSUMPTION AND LIVESTOCK":
        outString = "HUMAN CONSUMPTION, LIVESTOCK"
    else:
        outString = val
    return outString

outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: fixBenUse(row['in_BeneficialUseCategory']), axis=1)
for x in outdf['in_BeneficialUseCategory'].sort_values().unique():
    print(f'"' + x + '",')

"AESTHETICS",
"AGRICULTURE USES",
"AIR CONDITIONING OR HEATING",
"ANADROMOUS AND RESIDENT FISH HABITAT (INSTREAM)",
"AQUACULTURE",
"AQUATIC LIFE (INSTREAM)",
"CAMPSITE",
"COMMERCIAL USES",
"CRANBERRY",
"DAIRY BARN USES",
"DOMESTIC",
"DOMESTIC EXPANDED",
"DOMESTIC INCLUDING LAWN AND GARDEN",
"DOMESTIC, LIVESTOCK",
"FIRE PROTECTION",
"FIRE PROTECTION (INSTREAM)",
"FISH AND WILDLIFE",
"FISH CULTURE",
"FISHERY ENHANCEMENT (INSTREAM)",
"FLOW AUGMENTATION",
"FOREST MANAGEMENT",
"FROST PROTECTION",
"GEO-THERMAL (HEATING & COOLING)",
"GEO-THERMAL(ENERGY PRODUCTION)",
"GREENHOUSE",
"GROUND WATER RECHARGE",
"GROUNDWATER AQUIFER STORAGE & RECOVERY",
"GROUP DOMESTIC",
"HARVESTING OF CRANBERRIES",
"HUMAN CONSUMPTION",
"HUMAN CONSUMPTION, LIVESTOCK",
"INDUSTRIAL/MANUFACTURING USES",
"IRRIGATION",
"IRRIGATION OF CRANBERRIES",
"IRRIGATION, DOMESTIC",
"IRRIGATION, LIVESTOCK",
"IRRIGATION, LIVESTOCK, DOMESTIC",
"LABORATORY",
"LIVESTOCK",
"LIVESTOCK, WILDLIFE",
"LOG DECK SPRINKLING",
"MINING",
"MULTIPLE 

In [15]:
# Clean owner name up
def cleanOwnerDataFunc(Val):
    Val = str(Val)
    Val = re.sub("[$@&.;/\)(-]", "", Val).title().strip()
    return Val

  Val = re.sub("[$@&.;/\)(-]", "", Val).title().strip()


In [16]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Formosa Exploration Inc', 'Benton Mines Inc',
       'Jeff Steyaert Knife River Corp', ...,
       'Paul Knopp Northwest Floriculture', 'Pacific Farms Co Llc',
       'Mildred Hicks'], dtype=object)

In [17]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Formosa 1 Adit', 'Silver Butte 1 Adit', 'A Well', ...,
       'Drainage Swale', 'Little Sagehen Creek', 'Bonita Meadows Creek'],
      dtype=object)

In [18]:
outdf['in_SiteName'] = outdf.apply(lambda row: cleanOwnerDataFunc(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [19]:
# Ensure Empty String

def ensureEmptyString(val):
    val = str(val).strip()
    if val == "" or val == " " or val == "nan" or pd.isnull(val):
        outString = ""
    else:
        outString = val
    return outString

In [20]:
outdf['in_WaterSourceName'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceName']), axis=1)
outdf['in_WaterSourceName'].unique()

array(['Formosa 1 Adit', 'Silver Butte 1 Adit', 'A Well', ...,
       'Drainage Swale', 'Little Sagehen Creek', 'Bonita Meadows Creek'],
      dtype=object)

In [21]:
outdf['in_WaterSourceTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceTypeCV'].unique()

array(['Groundwater', 'Surface Water', 'Storage', 'WaDE Blank'],
      dtype=object)

In [22]:
outdf['in_SiteName'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteName']), axis=1)
outdf['in_SiteName'].unique()

array([''], dtype=object)

In [23]:
outdf['in_SiteTypeCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_SiteTypeCV']), axis=1)
outdf['in_SiteTypeCV'].unique()

array(['well', 'stream', 'spring', 'reservoir', 'sump', 'winter runoff',
       'lake', 'waste water', 'drain', 'pond', 'ditch', 'canal',
       'sewage effluent', 'slough', ''], dtype=object)

In [24]:
outdf['in_AllocationLegalStatusCV'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationLegalStatusCV']), axis=1)
outdf['in_AllocationLegalStatusCV'].unique()

array([''], dtype=object)

In [25]:
outdf['in_AllocationOwner'] = outdf.apply(lambda row: ensureEmptyString(row['in_AllocationOwner']), axis=1)
outdf['in_AllocationOwner'].unique()

array(['Formosa Exploration Inc', 'Benton Mines Inc',
       'Jeff Steyaert Knife River Corp', ...,
       'Paul Knopp Northwest Floriculture', 'Pacific Farms Co Llc',
       'Mildred Hicks'], dtype=object)

In [26]:
outdf['in_BeneficialUseCategory'] = outdf.apply(lambda row: ensureEmptyString(row['in_BeneficialUseCategory']), axis=1)
outdf['in_BeneficialUseCategory'].unique()

array(['MINING', 'NURSERY USES', 'TEMPERATURE CONTROL',
       'SUPPLEMENTAL FLOOD HARVESTING', 'CRANBERRY', 'FROST PROTECTION',
       'HARVESTING OF CRANBERRIES', 'GREENHOUSE', 'AGRICULTURE USES',
       'USE IN A MINT STILL', 'DAIRY BARN USES', 'DOMESTIC',
       'DOMESTIC INCLUDING LAWN AND GARDEN', 'DOMESTIC EXPANDED',
       'GROUP DOMESTIC', 'USE WITHIN A SCHOOL', 'DOMESTIC, LIVESTOCK',
       'HUMAN CONSUMPTION', 'RESTROOM', 'HUMAN CONSUMPTION, LIVESTOCK',
       'IRRIGATION', 'IRRIGATION, LIVESTOCK, DOMESTIC',
       'PRIMARY AND SUPPLEMENTAL IRRIGATION', 'SUPPLEMENTAL IRRIGATION',
       'IRRIGATION, DOMESTIC', 'IRRIGATION OF CRANBERRIES',
       'IRRIGATION, LIVESTOCK', 'PRACTICABLY IRRIGABLE ACREAGE',
       'INDUSTRIAL/MANUFACTURING USES', 'COMMERCIAL USES',
       'GEO-THERMAL (HEATING & COOLING)', 'SAWMILL',
       'LOG DECK SPRINKLING', 'SHOP', 'LABORATORY', 'RECREATION',
       'CAMPSITE', 'SWIMMING', 'RAM', 'POWER DEVELOPMENT',
       'GEO-THERMAL(ENERGY PRODUCTION)',

In [27]:
# in_Latitude
outdf['in_Latitude'] = pd.to_numeric(outdf['in_Latitude'], errors='coerce').fillna("")
outdf['in_Latitude'].unique()

array([42.85581554, 42.85455396, 42.68227127, ..., 45.05922444,
       45.09928477, 45.41008825])

In [28]:
# in_Longitude
outdf['in_Longitude'] = pd.to_numeric(outdf['in_Longitude'], errors='coerce').fillna("")
outdf['in_Longitude'].unique()

array([-123.38289638, -123.38350641, -123.62943907, ..., -123.38643959,
       -123.05294764, -122.96200842])

In [29]:
#Update datatype of Priority Date to fit WaDE 2.0 structure
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'], errors = 'coerce')
outdf['in_AllocationPriorityDate'] = pd.to_datetime(outdf['in_AllocationPriorityDate'].dt.strftime('%m/%d/%Y'))
outdf['in_AllocationPriorityDate'].unique()

<DatetimeArray>
['1989-11-21 00:00:00', '1994-05-06 00:00:00', '1995-01-18 00:00:00',
 '1992-09-03 00:00:00', '1997-11-14 00:00:00', '1998-03-19 00:00:00',
 '2001-01-09 00:00:00', '1934-08-13 00:00:00', '1934-10-05 00:00:00',
 '1962-09-17 00:00:00',
 ...
 '1986-01-05 00:00:00', '1958-01-18 00:00:00', '1970-01-17 00:00:00',
 '1976-05-09 00:00:00', '1989-02-04 00:00:00', '1991-06-08 00:00:00',
 '1981-08-28 00:00:00', '1992-05-13 00:00:00', '1894-07-31 00:00:00',
 '1926-06-15 00:00:00']
Length: 22662, dtype: datetime64[ns]

In [30]:
# Fixing in_AllocationFlow_CFS datatype
outdf['in_AllocationFlow_CFS'] = pd.to_numeric(outdf['in_AllocationFlow_CFS'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationFlow_CFS'].unique()

array([0.04, 0.005, 0.11, ..., 10.52, 0.1581, 0.1626], dtype=object)

In [31]:
# Fixing in_AllocationVolume_AF datatype
outdf['in_AllocationVolume_AF'] = pd.to_numeric(outdf['in_AllocationVolume_AF'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationVolume_AF'].unique()

array(['', 40.0, 3.0, ..., 157.0, 3.825, 3.925], dtype=object)

In [32]:
# Fixing in_AllocationCropDutyAmount datatype
outdf['in_AllocationCropDutyAmount'] = pd.to_numeric(outdf['in_AllocationCropDutyAmount'], errors='coerce').replace(0,"").fillna("")
outdf['in_AllocationCropDutyAmount'].unique()

array(['', 2.5, 5.0, 3.0, 1.0, 4.5, 0.05, 0.15, 4.0, 3.4, 3.52, 1.5, 3.5,
       3.29, 1.6, 8.0, 3.2, 1.375, 0.855, 0.244, 1.008, 0.164, 0.8, 1.9,
       0.51, 0.0025, 2.0, 7.5, 6.0, 3.9, 4.6, 4.4, 4.8, 7.0, 0.17, 0.03,
       3.6, 0.63, 1.2, 2.6, 4.9, 15.0, 42.0, 42.5, 22.5, 0.75, 4.2, 6.7,
       0.5, 1.3, 2.23, 30.0, 10.0, 3.58, 3.25, 0.3, 0.9, 0.7, 3.75, 1.97,
       2.4, 2.625, 3.06, 4.3, 2.19, 1.88, 2.27, 1.83, 3.1, 2.9, 0.1, 1.84,
       3.42, 2.34, 0.2, 0.6, 2.15, 2.75, 5.25, 2.1, 1.75, 3.84, 3.21,
       0.25, 72.0, 71.0, 32.5, 0.96, 0.71, 0.73, 1.35, 1.74, 1.32, 3.73,
       3.38, 1.875, 0.39, 25.0, 1.53, 1.95, 1.43, 2.93, 2.3, 0.06, 0.733,
       0.928], dtype=object)

In [33]:
# Fixing in_IrrigatedAcreage datatype
outdf['in_IrrigatedAcreage'] = pd.to_numeric(outdf['in_IrrigatedAcreage'], errors='coerce').replace(0,"").fillna("")
outdf['in_IrrigatedAcreage'].unique()

array(['', 10.0, 38.5, ..., 542.5, 37.55, 188.9], dtype=object)

In [34]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "wadeID" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceName'] = outdf['in_WaterSourceName']
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = outdf['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)
dfWaterSourceNativeID['linkKey'] = dfWaterSourceNativeID['in_WaterSourceName'].astype(str) + dfWaterSourceNativeID['in_WaterSourceTypeCV'].astype(str)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
WaterSourceNativeIDdict = pd.Series(dfWaterSourceNativeID.in_WaterSourceNativeID.values, index=dfWaterSourceNativeID.linkKey.astype(str)).to_dict()
def retrieveWaterSourceNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        colrowValue = str(A).strip() + str(B).strip()
        try:
            outList = WaterSourceNativeIDdict[colrowValue]
        except:
            outList = ''
    return outList

outdf['in_WaterSourceNativeID'] = outdf.apply(lambda row: retrieveWaterSourceNativeID( row['in_WaterSourceName'], row['in_WaterSourceTypeCV']), axis=1)
outdf['in_WaterSourceNativeID'].unique()

array(['wadeID1', 'wadeID2', 'wadeID3', ..., 'wadeID16511', 'wadeID16512',
       'wadeID16513'], dtype=object)

## Shapefile Data
- For attaching gemetry to csv inputs.

In [35]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/wr_v_pou_public.zip')
dfPoUshapetemp.head(3)

Unnamed: 0,pou_displa,pou_disp_1,wris_link,snp_id,pou_use_id,app_char,app_nbr,permit_cha,permit_nbr,cert_nbr,claim_char,claim_nbr,decree_tit,transfer_n,wr_type,name_last,name_first,name_compa,use_code,use_catego,use_code_d,priority_d,supplement,wris_acres,technician,agency,rec_creati,last_updt_,feature_qu,delta_size,remarks,cent_Longi,cent_Latit,Shape_Leng,Shape_Area,geometry
0,App: P 74762 * LV,P 74762,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,2306,1075,P,74762,,0,0,,0,,,ST,GARREN,ROY,,LV,8,LIVESTOCK,1994-10-13,0,0.0,DAM,OWRD,2022-02-02,2022-02-02,30,0.0,PLACED USING 2018 IMAGERY,-123.36539,43.07279,0.00324,0.0,"POLYGON ((-123.36603 43.07302, -123.36573 43.0..."
1,App: P 77106 * LV,P 77106,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,2813,1590,P,77106,,0,0,,0,,,ST,HOPKINS,MILDRED,JERRY BARRY TRUST,LV,8,LIVESTOCK,1994-12-30,0,0.0,DAM,OWRD,2024-05-31,2024-05-31,15,0.0,ADDED BY POLY-POINT TOOL,-122.31927,42.03179,0.00062,0.0,"POLYGON ((-122.31938 42.03179, -122.31927 42.0..."
2,App: P 81441 * LV,P 81441,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...,5135,4124,P,81441,,0,0,,0,,,ST,WILLIAMS,TERRY,DASH W BAR RANCH,LV,8,LIVESTOCK,1996-08-07,0,0.0,MW,OWRD,2005-06-30,2005-07-22,30,0.0,PLACED USING DRG,-118.36448,43.73686,0.00894,0.0,"POLYGON ((-118.36564 43.73765, -118.36556 43.7..."


In [36]:
# # check if dupbliate siteNative IDs
# dfPoUshapetemp['pou_use_id'] = np.where(dfPoUshapetemp['pou_use_id'].duplicated(keep=False),
#                                         dfPoUshapetemp['pou_use_id'].astype(str).str.cat(dfPoUshapetemp.groupby('pou_use_id').cumcount().add(1).astype(str), sep='_'),
#                                         dfPoUshapetemp['pou_use_id'])

# print(dfPoUshapetemp['pou_use_id'].nunique())
# dfPoUshapetemp['pou_use_id'].unique()

In [37]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['pou_use_id'].replace("", 0).fillna(0).astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POU1075,"POLYGON ((-123.36603 43.07302, -123.36573 43.0..."
1,POU1590,"POLYGON ((-122.31938 42.03179, -122.31927 42.0..."
2,POU4124,"POLYGON ((-118.36564 43.73765, -118.36556 43.7..."


## The Output

In [38]:
outdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416641 entries, 0 to 416640
Data columns (total 63 columns):
 #   Column                                        Non-Null Count   Dtype         
---  ------                                        --------------   -----         
 0   WaDEUUID                                      416641 non-null  object        
 1   in_MethodUUID                                 416641 non-null  object        
 2   in_VariableSpecificUUID                       416641 non-null  object        
 3   in_OrganizationUUID                           416641 non-null  object        
 4   in_Geometry                                   416641 non-null  object        
 5   in_GNISFeatureNameCV                          416641 non-null  object        
 6   in_WaterQualityIndicatorCV                    416641 non-null  object        
 7   in_WaterSourceName                            416641 non-null  object        
 8   in_WaterSourceNativeID                        416641 n

In [39]:
outdf

Unnamed: 0,WaDEUUID,in_MethodUUID,in_VariableSpecificUUID,in_OrganizationUUID,in_Geometry,in_GNISFeatureNameCV,in_WaterQualityIndicatorCV,in_WaterSourceName,in_WaterSourceNativeID,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_CoordinateMethodCV,in_County,in_EPSGCodeCV,in_GNISCodeCV,in_HUC12,in_HUC8,in_Latitude,in_Longitude,in_NHDNetworkStatusCV,in_NHDProductCV,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_SitePoint,in_SiteTypeCV,in_StateCV,in_USGSSiteID,in_AllocationApplicationDate,in_AllocationAssociatedConsumptiveUseSiteIDs,in_AllocationAssociatedWithdrawalSiteIDs,in_AllocationBasisCV,in_AllocationChangeApplicationIndicator,in_AllocationCommunityWaterSupplySystem,in_AllocationCropDutyAmount,in_AllocationExpirationDate,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationSDWISIdentifierCV,in_AllocationTimeframeEnd,in_AllocationTimeframeStart,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_CommunityWaterSupplySystem,in_CropTypeCV,in_CustomerTypeCV,in_DataPublicationDate,in_DataPublicationDOI,in_ExemptOfVolumeFlowPriority,in_GeneratedPowerCapacityMW,in_IrrigatedAcreage,in_IrrigationMethodCV,in_LegacyAllocationIDs,in_OwnerClassificationCV,in_PopulationServed,in_PowerType,in_PrimaryBeneficialUseCategory,in_SDWISIdentifierCV,in_WaterAllocationNativeURL
0,orD0,ORwr_M1,ORwr_V1,ORwr_O1,,,,Formosa 1 Adit,wadeID1,Groundwater,,,,4326,,,,42.85582,-123.38290,,,POD,,POD6909,,well,OR,,,,,,,,,,0.04000,,21755,Formosa Exploration Inc,1989-11-21,,12/31,1/1,,,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
1,orD1,ORwr_M1,ORwr_V1,ORwr_O1,,,,Silver Butte 1 Adit,wadeID2,Groundwater,,,,4326,,,,42.85455,-123.38351,,,POD,,POD6910,,well,OR,,,,,,,,,,0.00500,,21755,Formosa Exploration Inc,1989-11-21,,12/31,1/1,,,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
2,orD2,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Well,wadeID3,Groundwater,,,,4326,,,,42.68227,-123.62944,,,POD,,POD9355,,well,OR,,,,,,,,,,0.11000,,23327,Benton Mines Inc,1994-05-06,,12/31,1/1,,,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
3,orD3,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Well,wadeID3,Groundwater,,,,4326,,,,44.30105,-121.65465,,,POD,,POD9480,,well,OR,,,,,,,,,,0.67000,,23390,Jeff Steyaert Knife River Corp,1995-01-18,,12/31,1/1,,,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
4,orD4,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Well,wadeID3,Groundwater,,,,4326,,,,44.30105,-121.65465,,,POD,,POD9480,,well,OR,,,,,,,,,,0.11000,,23390,Jeff Steyaert Knife River Corp,1995-01-18,,12/31,1/1,,,MINING,,,,,,0,,,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416636,orU110620,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Reservoir,wadeID23,Surface Water,,,,4326,,,,45.05922,-123.38644,,,POU,,POU293943,,,OR,,,,,,,,2.50000,,,,225969,Crowe Farms Llc,2021-05-10,,9/30,4/1,,2.50000,SUPPLEMENTAL IRRIGATION,,,,,,0,,15.40000,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
416637,orU110620,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Reservoir,wadeID23,Surface Water,,,,4326,,,,45.05922,-123.38644,,,POU,,POU293943,,,OR,,,,,,,,2.50000,,,,225969,Crowe Farms Llc,2021-05-10,,9/30,4/1,,10.25000,SUPPLEMENTAL IRRIGATION,,,,,,0,,15.40000,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
416638,orU110621,ORwr_M1,ORwr_V1,ORwr_O1,,,,Willamette River,wadeID379,Surface Water,,,,4326,,,,45.09928,-123.05295,,,POU,,POU295164,,,OR,,,,,,,,2.50000,,0.46000,,227094,Carlton Plants Nursery Co Llc,1963-02-07,,12/31,1/1,,,SUPPLEMENTAL IRRIGATION,,,,,,0,,36.90000,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...
416639,orU110622,ORwr_M1,ORwr_V1,ORwr_O1,,,,A Well,wadeID3,Groundwater,,,,4326,,,,45.41009,-122.96201,,,POU,,POU295192,,,OR,,,,,,,,5.00000,,0.22260,,227123,Midway Equities Llc,1991-07-05,,12/31,1/1,,,NURSERY USES,,,,,,0,,80.70000,,,,,,,,https://apps.wrd.state.or.us/apps/wr/wrinfo/wr...


In [40]:
# Export the output dataframe
outdf.to_csv('Pwr_orMain.zip', compression=dict(method='zip', archive_name='Pwr_orMain.csv'), index=False)  # The output, save as a zip
dfPoUshape.to_csv('P_Geometry.zip', compression=dict(method='zip', archive_name='P_Geometry.csv'), index=False)  # The output geometry.