# Preprocessing Washington Allocation data for WaDEQA upload.
- Date Updated: 04/01/2020
- Purpose:  To preprocess the Washington data into one master file for simple DataFrame creation and extraction

Useful Links to Data:
- The Data - Geographic Water Information System (GWIS)Data from the WA stat: https://fortress.wa.gov/ecy/gispublic/DataDownload/wr/GWIS_Data/
- Data dictionary - https://fortress.wa.gov/ecy/gispublic/DataDownload/wr/GWIS_Data/GWIS_Data_Dictionary/
- Public website   - https://ecology.wa.gov/Water-Shorelines/Water-supply/Water-rights

In [1]:
# Needed Libararies
import os
import numpy as np
import pandas as pd
import geopandas as gpd # the library that lets us read in shapefiles
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

from pyproj import Transformer, transform
transformer = Transformer.from_proj(2927, 4326)  # A trick to drastically optimize the Transformer of pyproj.
# Washignton projection = EPSG:2927. WGS84 projection used by WaDE 2.0 = epsg:4326.

# Working Directory
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Washington/WaterAllocation/RawInputData"
os.chdir(workingDir)

## POD Data

In [2]:
# Input Files
d_pointFile = "D_PointTable.csv"  # contains PoD info
D_Point_WR_DocFile = "D_Point_WR_Doc.csv"  #Bridge table
Person_Plus_EXTRACT_FromWRTSnotGWISFile = "Person_Plus_EXTRACT_FromWRTSnotGWIS.csv"  #Contains water use and owner info

In [3]:
# Dataframe creation
df_1 = pd.read_csv(d_pointFile, encoding = "ISO-8859-1") #Input
df_2 = pd.read_csv(D_Point_WR_DocFile, encoding = "ISO-8859-1") #Input
df_3 = pd.read_csv(Person_Plus_EXTRACT_FromWRTSnotGWISFile, encoding = "ISO-8859-1") #Input

  df_1 = pd.read_csv(d_pointFile, encoding = "ISO-8859-1") #Input
  df_2 = pd.read_csv(D_Point_WR_DocFile, encoding = "ISO-8859-1") #Input


In [4]:
# Merging dataframes into one, using left-join.
df = pd.DataFrame()
df = pd.merge(df_1, df_2, left_on='D_Point_ID', right_on='D_Point_ID', how='left') # Joinning PoD data
df = pd.merge(df,   df_3, left_on='WR_Doc_ID', right_on='WR_Doc_ID', how='left') # Joinning PoD data

df = df.drop_duplicates().replace(np.nan, "").reset_index(drop=True)
print(len(df))
df

198075


Unnamed: 0,OBJECTID_x,D_Point_ID,D_Point_Ty,Location_C,Assoc_FL,Misc_CD,Position_W,Active_DT_x,Inactive_D,Update_TD_x,Update_Use,Comment_DS,Created_TD_x,Created_Us,POINT_X,POINT_Y,OBJECTID_y,D_Point_WR_Doc_ID,WR_Doc_NR,WR_Doc_ID,Active_DT_y,Inactive_DT,Update_TD_y,Update_User_ID,Created_TD_y,Created_User_ID,ï»¿OID_,OBJECTID_1,WaRecId,WaRecId_1,WaRecPhaseId,PartyRoleTypeCode,PersonLastOrOrganizationNM,PersonFirstNM,PersonMINM,PersonAddressLine1AD,PersonAddressLine2AD,PersonAddressLine3AD,PersonAddressCityAD,PersonAddressStateCode,PersonAddressZipCodeAD,WaRecRCWClassTypeCode,EcologyRegionCode,WaRecPrimaryNumber,PriorityDate,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes
0,1,200801.0,WL,U,N,,S,,,3/28/2013 9:58:00,"ECY\DKRO461""""",,,,1665873.058973,454923.236269,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,200889.0,MW,G,Y,,S,,,,,,,,1816741.203102,456744.055134,24110.0,,CG4-GWC5458-A,4271597.0,,,,,,,138472.0,254546.0,4271597.0,4271597.0,129960.0,Attorney,Mackie,Sandy,,Perkins Coie LLP,1201 3rd Ave Ste 4800,,Seattle,WA,98101-3099,groundwater,CRO,CG4-GWC5458-A,4/27/2006 0:00:00,Active,,ChangeROE,PA Exam,1500.0,345.26,283.0,GPM,IR
2,2,200889.0,MW,G,Y,,S,,,,,,,,1816741.203102,456744.055134,24110.0,,CG4-GWC5458-A,4271597.0,,,,,,,138473.0,274371.0,4271597.0,4271597.0,129960.0,Primary,Ste Michelle Wine Estates Ltd (Paterson),,,PO Box 231,,,Paterson,WA,99345-0231,groundwater,CRO,CG4-GWC5458-A,4/27/2006 0:00:00,Active,,ChangeROE,PA Exam,1500.0,345.26,283.0,GPM,IR
3,2,200889.0,MW,G,Y,,S,,,,,,,,1816741.203102,456744.055134,25501.0,,GWC05458-A,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2,200889.0,MW,G,Y,,S,,,,,,,,1816741.203102,456744.055134,93460.0,,G4-30287,2085743.0,,,2009-02-23T10:11:37.000,"""ECY\DKRO461""",,,63477.0,174572.0,2085743.0,2085743.0,58478.0,Primary,Stimson Lane Limited,,,,14111 NE 145th,PO Box 1976,Woodinville,WA,98072-1976,groundwater,CRO,G4-30287,6/6/1990 0:00:00,Active,,NewApp,AppAccepted,2150.0,,175.0,GPM,IR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198070,155876,581518.0,WL,UA,Y,,BLMPLS,,,2/4/2020 14:06:36,CROE461,,2/4/2020 14:06:36,CROE461,963015.372827,702531.560277,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
198071,155877,305327.0,WL,UA,Y,,S,,,2/5/2020 8:46:29,LYHA461,,2/5/2020 8:46:29,LYHA461,1980930.272033,353897.600066,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
198072,155878,305328.0,WL,UA,Y,,S,,,2/5/2020 9:20:00,LYHA461,,2/5/2020 9:20:00,LYHA461,1811329.188991,562390.751649,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
198073,155879,581519.0,WL,UX,Y,,P20070221,,,2/5/2020 13:54:39,CROE461,,2/5/2020 13:54:39,CROE461,976789.133994,696086.326449,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [5]:
# For converting projection latitude.
def assignLat(colrowValueLat, colrowValueLong):
    if colrowValueLat == '' or pd.isnull(colrowValueLat):
        lat = ""
    else:
        lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return lat

# For converting projection longitude.
def assignLong(colrowValueLat, colrowValueLong):
    if colrowValueLong == '' or pd.isnull(colrowValueLong):
        long = ""
    else:
        lat, long = transformer.transform(colrowValueLat, colrowValueLong)
    return long

df['in_Latitude'] = df.apply(lambda row: assignLat(row['POINT_X'], row['POINT_Y']), axis=1)
df['in_Longitude'] = df.apply(lambda row: assignLong(row['POINT_X'], row['POINT_Y']), axis=1)

In [6]:
#Changing datatype of used date fields. 
df['PriorityDate'] = pd.to_datetime(df['PriorityDate'], errors = 'coerce')
df['PriorityDate'] = pd.to_datetime(df["PriorityDate"].dt.strftime('%m/%d/%Y'))

In [7]:
def assignOwner(valueFirst, valueMid, valueLast):
    #--- First Name ---
    if valueFirst == "" or pd.isnull(valueFirst):
        FirstName = ""
    else:
        FirstName = str(valueFirst).strip()
        
    #--- Midile Initial ---
    if valueMid == "" or pd.isnull(valueMid):
        MidName = ""
    else:
        MidName = str(valueMid).strip()
    
    #--- Last Name ---
    if valueLast == "" or pd.isnull(valueLast):
        LastName = ""
    else:
        LastName = str(valueLast).strip()

    if LastName == "":
        outlist = LastName + FirstName + MidName
    else:
        outlist = LastName + ", " + FirstName + " "+ MidName
        
    outlist = outlist.strip()
    return outlist


df['Owner'] = df.apply(lambda row: assignOwner(row['PersonFirstNM'],
                                               row['PersonMINM'],
                                               row['PersonLastOrOrganizationNM']), axis=1)

In [8]:
#Manually filling in empty class code with ‘Unspecified’ value.
def assignWaRecRCWClassTypeCode(colValue):
    if colValue == "" or pd.isnull(colValue):
        outlist = "Unspecified"
    else:
        outlist = colValue.strip()
    return outlist


df['WaRecRCWClassTypeCode'] = df.apply(lambda row: assignWaRecRCWClassTypeCode(row['WaRecRCWClassTypeCode']), axis=1)

In [9]:
# For creating AllocationAmount
def assignAllocationAmount(colrowValueIQ, colrowValueUC):
    if colrowValueIQ == '' or pd.isnull(colrowValueIQ):
        outVal = ""
    elif colrowValueIQ <= 0 or pd.isnull(colrowValueIQ):
        outVal = 0
    else:
        MultiFactor = 1.0
        gpmcfsUnit = colrowValueUC.strip()
        if gpmcfsUnit == 'GPM':
            MultiFactor = 0.00222800926
        elif gpmcfsUnit == 'GPD':
            MultiFactor = 1.0 / 646317.0
        try:
            outVal = MultiFactor * colrowValueIQ
        except:
            outVal = colrowValueIQ
    return outVal

df['in_AllocationFlow_CFS'] =  df.apply(lambda row: assignAllocationAmount(row['InstantaneousQuantity'], row['InstantaneousUnitCode']), axis=1)

In [10]:
# Create output dataframe for ground water
columnslist = [   
    ### Water Source Info ###
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_CoordinateAccuracy",
    "in_Latitude",
    "in_Longitude",
    "in_SiteTypeCV",
    "in_PODorPOUSite",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationFlow_CFS",
     "in_AllocationLegalStatusCV",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationTypeCV",
    "in_AllocationVolume_AF",
    "in_BeneficialUseCategory",
    "in_IrrigatedAcreage"]

dfPOD = pd.DataFrame(columns=columnslist, index=df.index)

In [11]:
#############################################################################################
#WaterSource
dfPOD['in_WaterSourceTypeCV'] = df['WaRecRCWClassTypeCode']
                                    
#Site
dfPOD['in_CoordinateAccuracy'] = df['Location_C']
dfPOD['in_Latitude'] = df['in_Latitude']
dfPOD['in_Longitude'] = df['in_Longitude']
dfPOD['in_SiteNativeID'] = "POD" + df['D_Point_ID'].astype(str)
dfPOD['in_SiteTypeCV'] = df['D_Point_Ty']
dfPOD['in_PODorPOUSite'] = "POD"

#AllocationAmount_fact
dfPOD['in_AllocationFlow_CFS'] = df['in_AllocationFlow_CFS']
dfPOD['in_AllocationLegalStatusCV'] = df['WaRecProcessStatusTypeCode'].astype(str)
dfPOD['in_AllocationNativeID'] = df['WR_Doc_ID'].astype(str)
dfPOD['in_AllocationOwner'] = df['Owner'].astype(str)
dfPOD['in_AllocationPriorityDate'] = df['PriorityDate']
dfPOD['in_AllocationTypeCV'] = df['WaRecPhaseTypeCode']
dfPOD['in_AllocationVolume_AF'] = df['AnnualVolumeQuantity']
dfPOD['in_BeneficialUseCategory'] = df['PurposeOfUseTypeCodes'].astype(str)
dfPOD['in_IrrigatedAcreage'] = df['IrrigatedAreaQuantity']

dfPOD = dfPOD.drop_duplicates().replace(np.nan, "").reset_index(drop=True)
print(len(dfPOD))
dfPOD

191811


Unnamed: 0,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_Latitude,in_Longitude,in_SiteTypeCV,in_PODorPOUSite,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_IrrigatedAcreage,in_SiteNativeID
0,Unspecified,U,46.580802,-120.398762,WL,POD,,,,,NaT,,,,,POD200801.0
1,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Mackie, Sandy",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0
2,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Ste Michelle Wine Estates Ltd (Paterson),",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0
3,Unspecified,G,46.583691,-119.798724,MW,POD,,,,,NaT,,,,,POD200889.0
4,groundwater,G,46.583691,-119.798724,MW,POD,4.79022,Active,2085743.0,"Stimson Lane Limited,",1990-06-06,NewApp,,IR,175.0,POD200889.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191806,Unspecified,UA,47.227667,-123.226948,WL,POD,,,,,NaT,,,,,POD581518.0
191807,Unspecified,UA,46.295838,-119.152795,WL,POD,,,,,NaT,,,,,POD305327.0
191808,Unspecified,UA,46.873502,-119.816607,WL,POD,,,,,NaT,,,,,POD305328.0
191809,Unspecified,UX,47.211299,-123.170665,WL,POD,,,,,NaT,,,,,POD581519.0


## POU Data

In [12]:
# Input Files
pouInput = "WA_POU_Input.csv"   # contains POU info
Person_Plus_EXTRACT_FromWRTSnotGWISFile = "Person_Plus_EXTRACT_FromWRTSnotGWIS.csv"  # Contains water use and owner info

In [13]:
# Dataframe creation
# df_1 = pd.read_csv(pouInput, encoding = "ISO-8859-1")
# df_3 = pd.read_csv(Person_Plus_EXTRACT_FromWRTSnotGWISFile, encoding = "ISO-8859-1")
df_1 = pd.read_csv(pouInput)
df_3 = pd.read_csv(Person_Plus_EXTRACT_FromWRTSnotGWISFile)

  df_1 = pd.read_csv(pouInput)


In [14]:
# Merging dataframes into one, using left-join.
df = pd.DataFrame()
df = pd.merge(df_1, df_3, left_on='WR_DOC_ID', right_on='WR_Doc_ID', how='left')

df = df.drop_duplicates().replace(np.nan, "").reset_index(drop=True)
print(len(df))
df

177340


Unnamed: 0,OID__x,WR_DOC_ID,WR_Doc_POU_ID,Fill_CD,WR_Doc_NR,WR_Doc_Type_CD,Quality_CD,Misc_CD,Position_With_CD,Active_DT,Inactive_DT,Update_TD,Update_User_ID,Comment_DS,Created_TD,Created_User_ID,Shape_Length,Shape_Area,Latitude,Longitude,OID__y,OBJECTID_1,WaRecId,WaRecId_1,WR_Doc_ID,WaRecPhaseId,PartyRoleTypeCode,PersonLastOrOrganizationNM,PersonFirstNM,PersonMINM,PersonAddressLine1AD,PersonAddressLine2AD,PersonAddressLine3AD,PersonAddressCityAD,PersonAddressStateCode,PersonAddressZipCodeAD,WaRecRCWClassTypeCode,EcologyRegionCode,WaRecPrimaryNumber,PriorityDate,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes
0,1,2084118.0,,7.0,GWC01066-D,CE,G,RECHECKED\WWT,S,,,1/23/2009 11:18:38,"ECY\DKRO461""""",,,,32011.934922,3.540485e+07,46.591212,-119.735369,58522.0,84235.0,2084118.0,2084118.0,2084118.0,59585.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01105SWRIS,4/1/1927 0:00:00,Active,,Certificate,,1375.0,800.0,200.0,GPM,DG IR
1,2,2084120.0,,14.0,GWC01067-D,CE,G,,S,,,5/27/2011 12:41:50,"ECY\ATRO461""""",,,,21354.797895,1.962663e+07,46.582825,-119.761836,48022.0,195133.0,2084120.0,2084120.0,2084120.0,59258.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01106SWRIS,4/1/1927 0:00:00,Active,,Certificate,,1700.0,1760.0,440.0,GPM,DG IR
2,3,2084124.0,,39.0,GWC01070-D,CE,G,,S,,,5/27/2011 12:41:50,"ECY\ATRO461""""",,,,5267.284590,1.733865e+06,46.586282,-119.778452,63619.0,64101.0,2084124.0,2084124.0,2084124.0,59262.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01109SWRIS,4/1/1927 0:00:00,Active,,Certificate,,150.0,160.0,40.0,GPM,DG IR
3,4,2084121.0,,4.0,GWC01068-D,CE,G,,S,,,5/27/2011 12:41:50,"ECY\ATRO461""""",,,,15886.739890,1.414678e+07,46.580765,-119.781034,57223.0,66971.0,2084121.0,2084121.0,2084121.0,59259.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01107SWRIS,4/1/1927 0:00:00,Active,,Certificate,,800.0,320.0,80.0,GPM,DG IR
4,5,2084122.0,,47.0,GWC01069-D,CE,G,,S,,,5/27/2011 12:41:50,"ECY\ATRO461""""",,,,10499.592038,6.884824e+06,46.573518,-119.765162,63169.0,130619.0,2084122.0,2084122.0,2084122.0,59260.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01108SWRIS,4/1/1927 0:00:00,Active,,Certificate,,400.0,320.0,80.0,GPM,DG IR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177335,156367,6803048.0,,1.0,G4-36099,AP,G,,P20200120,,,11/3/2020 11:46:55,KWIN461,,11/3/2020 11:46:55,KWIN461,3854.102341,2.388147e+05,47.166769,-121.053191,260112.0,302814.0,6803048.0,6803048.0,6803048.0,327392.0,Agent,Aspect Consulting - Price,,,1106 N 35th Ave,,,Yakima,WA,98902,groundwater,CRO,G4-36099,5/22/2020 0:00:00,Active,,NewApp,AppAccepted,,0.414,0.011,,DS IR
177336,156368,6803092.0,,1.0,G4-36102,AP,G,WBN-UK-Y,P20200120,,,11/9/2020 11:56:43,KWIN461,,11/9/2020 11:51:05,KWIN461,2069.872243,1.861377e+05,47.16921,-121.080172,249351.0,302905.0,6803092.0,6803092.0,6803092.0,327442.0,Agent,Aspect Consulting - Price,,,1106 N 35th Ave,,,Yakima,WA,98902,groundwater,CRO,G4-36102,6/5/2020 0:00:00,Active,,NewApp,AppReceived,,0.414,0.011,,DS IR
177337,156368,6803092.0,,1.0,G4-36102,AP,G,WBN-UK-Y,P20200120,,,11/9/2020 11:56:43,KWIN461,,11/9/2020 11:51:05,KWIN461,2069.872243,1.861377e+05,47.16921,-121.080172,249352.0,302904.0,6803092.0,6803092.0,6803092.0,327442.0,Co-Primary,Richardson,Monica,,6405 229th Ave SE,,,Issaquah,WA,98029,groundwater,CRO,G4-36102,6/5/2020 0:00:00,Active,,NewApp,AppReceived,,0.414,0.011,,DS IR
177338,156368,6803092.0,,1.0,G4-36102,AP,G,WBN-UK-Y,P20200120,,,11/9/2020 11:56:43,KWIN461,,11/9/2020 11:51:05,KWIN461,2069.872243,1.861377e+05,47.16921,-121.080172,251088.0,302903.0,6803092.0,6803092.0,6803092.0,327442.0,Primary,Richardson,Thomas,,6405 229th Ave SE,,,Issaquah,WA,98029,groundwater,CRO,G4-36102,6/5/2020 0:00:00,Active,,NewApp,AppReceived,,0.414,0.011,,DS IR


In [15]:
#Changing datatype of used date fields. 
df['PriorityDate'] = pd.to_datetime(df['PriorityDate'], errors = 'coerce')
df['PriorityDate'] = pd.to_datetime(df["PriorityDate"].dt.strftime('%m/%d/%Y'))

In [16]:
def assignOwner(valueFirst, valueMid, valueLast):
    #--- First Name ---
    if valueFirst == "" or pd.isnull(valueFirst):
        FirstName = ""
    else:
        FirstName = str(valueFirst).strip()
        
    #--- Midile Initial ---
    if valueMid == "" or pd.isnull(valueMid):
        MidName = ""
    else:
        MidName = str(valueMid).strip()
    
    #--- Last Name ---
    if valueLast == "" or pd.isnull(valueLast):
        LastName = ""
    else:
        LastName = str(valueLast).strip()

    if LastName == "":
        outlist = LastName + FirstName + MidName
    else:
        outlist = LastName + ", " + FirstName + " "+ MidName
        
    outlist = outlist.strip()
    return outlist


df['Owner'] = df.apply(lambda row: assignOwner(row['PersonFirstNM'],
                                               row['PersonMINM'],
                                               row['PersonLastOrOrganizationNM']), axis=1)

In [17]:
#Manually filling in empty class code with ‘unknown’ value.
def assignWaRecRCWClassTypeCode(colValue):
    if colValue == "" or pd.isnull(colValue):
        outlist = "Unspecified"
    else:
        outlist = colValue.strip()
    return outlist


df['WaRecRCWClassTypeCode'] = df.apply(lambda row: assignWaRecRCWClassTypeCode(row['WaRecRCWClassTypeCode']), axis=1)

In [18]:
# For creating AllocationAmount
def assignAllocationAmount(colrowValueIQ, colrowValueUC):
    if colrowValueIQ == '' or pd.isnull(colrowValueIQ):
        outVal = ""
    elif colrowValueIQ <= 0 or pd.isnull(colrowValueIQ):
        outVal = 0
    else:
        MultiFactor = 1.0
        gpmcfsUnit = colrowValueUC.strip()
        if gpmcfsUnit == 'GPM':
            MultiFactor = 0.00222800926
        elif gpmcfsUnit == 'GPD':
            MultiFactor = 1.0 / 646317.0
        try:
            outVal = MultiFactor * colrowValueIQ
        except:
            outVal = colrowValueIQ
    return outVal

df['in_AllocationFlow_CFS'] =  df.apply(lambda row: assignAllocationAmount(row['InstantaneousQuantity'], row['InstantaneousUnitCode']), axis=1)

In [19]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWA_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = df['Latitude']
dfSiteNativeID['in_Longitude'] = df['Longitude']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df['in_SiteNativeID'] = df.apply(lambda row: retrieveSiteNativeID( row['Latitude'], row['Longitude']), axis=1)
df.head(2)

Unnamed: 0,OID__x,WR_DOC_ID,WR_Doc_POU_ID,Fill_CD,WR_Doc_NR,WR_Doc_Type_CD,Quality_CD,Misc_CD,Position_With_CD,Active_DT,Inactive_DT,Update_TD,Update_User_ID,Comment_DS,Created_TD,Created_User_ID,Shape_Length,Shape_Area,Latitude,Longitude,OID__y,OBJECTID_1,WaRecId,WaRecId_1,WR_Doc_ID,WaRecPhaseId,PartyRoleTypeCode,PersonLastOrOrganizationNM,PersonFirstNM,PersonMINM,PersonAddressLine1AD,PersonAddressLine2AD,PersonAddressLine3AD,PersonAddressCityAD,PersonAddressStateCode,PersonAddressZipCodeAD,WaRecRCWClassTypeCode,EcologyRegionCode,WaRecPrimaryNumber,PriorityDate,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,Owner,in_AllocationFlow_CFS,in_SiteNativeID
0,1,2084118.0,,7.0,GWC01066-D,CE,G,RECHECKED\WWT,S,,,1/23/2009 11:18:38,"ECY\DKRO461""""",,,,32011.934922,35404850.0,46.591212,-119.735369,58522.0,84235.0,2084118.0,2084118.0,2084118.0,59585.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01105SWRIS,1927-04-01,Active,,Certificate,,1375.0,800.0,200.0,GPM,DG IR,"USARMY Corps Engineers,",3.063513,WaDEWA_S1
1,2,2084120.0,,14.0,GWC01067-D,CE,G,,S,,,5/27/2011 12:41:50,"ECY\ATRO461""""",,,,21354.797895,19626630.0,46.582825,-119.761836,48022.0,195133.0,2084120.0,2084120.0,2084120.0,59258.0,Primary,USARMY Corps Engineers,,,,,,,,,groundwater,CRO,G4-*01106SWRIS,1927-04-01,Active,,Certificate,,1700.0,1760.0,440.0,GPM,DG IR,"USARMY Corps Engineers,",3.787616,WaDEWA_S2


In [20]:
# Create output dataframe for ground water
columnslist = [   
    ### Water Source Info ###
    "in_WaterSourceName",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_CoordinateAccuracy",
    "in_Latitude",
    "in_Longitude",
    "in_SiteTypeCV",
    "in_PODorPOUSite",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationFlow_CFS",
     "in_AllocationLegalStatusCV",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationTypeCV",
    "in_AllocationVolume_AF",
    "in_BeneficialUseCategory",
    "in_IrrigatedAcreage"]

dfPOU = pd.DataFrame(columns=columnslist, index=df.index)

In [21]:
#############################################################################################
#WaterSource
dfPOU['in_WaterSourceTypeCV'] = df['WaRecRCWClassTypeCode']
                                    
#Site
dfPOU['in_CoordinateAccuracy'] = ""
dfPOU['in_Latitude'] = df['Latitude']
dfPOU['in_Longitude'] = df['Longitude']
dfPOU['in_SiteNativeID'] = "POU" + df['in_SiteNativeID'].astype(str)
dfPOU['in_SiteTypeCV'] = "Unspecified"
dfPOU['in_PODorPOUSite'] = "POU"

#AllocationAmount_fact
dfPOU['in_AllocationFlow_CFS'] = df['in_AllocationFlow_CFS']
dfPOU['in_AllocationLegalStatusCV'] = df['WaRecProcessStatusTypeCode'].astype(str)
dfPOU['in_AllocationNativeID'] = df['WR_Doc_ID'].astype(str)
dfPOU['in_AllocationOwner'] = df['Owner'].astype(str)
dfPOU['in_AllocationPriorityDate'] = df['PriorityDate']
dfPOU['in_AllocationTypeCV'] = df['WaRecPhaseTypeCode']
dfPOU['in_AllocationVolume_AF'] = df['AnnualVolumeQuantity']
dfPOU['in_BeneficialUseCategory'] = df['PurposeOfUseTypeCodes'].astype(str)
dfPOU['in_IrrigatedAcreage'] = df['IrrigatedAreaQuantity']

dfPOU = dfPOU.drop_duplicates().replace(np.nan, "").reset_index(drop=True)
print(len(dfPOU))
dfPOU

176572


Unnamed: 0,in_WaterSourceName,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_Latitude,in_Longitude,in_SiteTypeCV,in_PODorPOUSite,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_IrrigatedAcreage,in_SiteNativeID
0,,groundwater,,46.591212,-119.735369,Unspecified,POU,3.063513,Active,2084118.0,"USARMY Corps Engineers,",1927-04-01,Certificate,800.0,DG IR,200.0,POUWaDEWA_S1
1,,groundwater,,46.582825,-119.761836,Unspecified,POU,3.787616,Active,2084120.0,"USARMY Corps Engineers,",1927-04-01,Certificate,1760.0,DG IR,440.0,POUWaDEWA_S2
2,,groundwater,,46.586282,-119.778452,Unspecified,POU,0.334201,Active,2084124.0,"USARMY Corps Engineers,",1927-04-01,Certificate,160.0,DG IR,40.0,POUWaDEWA_S3
3,,groundwater,,46.580765,-119.781034,Unspecified,POU,1.782407,Active,2084121.0,"USARMY Corps Engineers,",1927-04-01,Certificate,320.0,DG IR,80.0,POUWaDEWA_S4
4,,groundwater,,46.573518,-119.765162,Unspecified,POU,0.891204,Active,2084122.0,"USARMY Corps Engineers,",1927-04-01,Certificate,320.0,DG IR,80.0,POUWaDEWA_S5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176567,,groundwater,,47.166769,-121.053191,Unspecified,POU,,Active,6803048.0,"Aspect Consulting - Price,",2020-05-22,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136199
176568,,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Aspect Consulting - Price,",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200
176569,,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Monica",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200
176570,,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Thomas",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200


## Concatenate POD & POU

In [22]:
# Concatenate
frames = [dfPOD, dfPOU]
dfout = pd.concat(frames)
dfout = dfout.replace(np.nan, "").drop_duplicates()
dfout = dfout.replace("nan", "").drop_duplicates()

print(len(dfout))
dfout

368383


Unnamed: 0,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_Latitude,in_Longitude,in_SiteTypeCV,in_PODorPOUSite,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_IrrigatedAcreage,in_SiteNativeID,in_WaterSourceName
0,Unspecified,U,46.580802,-120.398762,WL,POD,,,,,NaT,,,,,POD200801.0,
1,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Mackie, Sandy",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0,
2,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Ste Michelle Wine Estates Ltd (Paterson),",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0,
3,Unspecified,G,46.583691,-119.798724,MW,POD,,,,,NaT,,,,,POD200889.0,
4,groundwater,G,46.583691,-119.798724,MW,POD,4.79022,Active,2085743.0,"Stimson Lane Limited,",1990-06-06,NewApp,,IR,175.0,POD200889.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176567,groundwater,,47.166769,-121.053191,Unspecified,POU,,Active,6803048.0,"Aspect Consulting - Price,",2020-05-22,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136199,
176568,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Aspect Consulting - Price,",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,
176569,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Monica",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,
176570,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Thomas",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,


In [23]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWA_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
dfout

Unnamed: 0,in_WaterSourceTypeCV,in_CoordinateAccuracy,in_Latitude,in_Longitude,in_SiteTypeCV,in_PODorPOUSite,in_AllocationFlow_CFS,in_AllocationLegalStatusCV,in_AllocationNativeID,in_AllocationOwner,in_AllocationPriorityDate,in_AllocationTypeCV,in_AllocationVolume_AF,in_BeneficialUseCategory,in_IrrigatedAcreage,in_SiteNativeID,in_WaterSourceName,in_WaterSourceNativeID
0,Unspecified,U,46.580802,-120.398762,WL,POD,,,,,NaT,,,,,POD200801.0,,WaDEWA_WS1
1,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Mackie, Sandy",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0,,WaDEWA_WS2
2,groundwater,G,46.583691,-119.798724,MW,POD,3.342014,Active,4271597.0,"Ste Michelle Wine Estates Ltd (Paterson),",2006-04-27,ChangeROE,345.26,IR,283.0,POD200889.0,,WaDEWA_WS2
3,Unspecified,G,46.583691,-119.798724,MW,POD,,,,,NaT,,,,,POD200889.0,,WaDEWA_WS1
4,groundwater,G,46.583691,-119.798724,MW,POD,4.79022,Active,2085743.0,"Stimson Lane Limited,",1990-06-06,NewApp,,IR,175.0,POD200889.0,,WaDEWA_WS2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176567,groundwater,,47.166769,-121.053191,Unspecified,POU,,Active,6803048.0,"Aspect Consulting - Price,",2020-05-22,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136199,,WaDEWA_WS2
176568,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Aspect Consulting - Price,",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,,WaDEWA_WS2
176569,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Monica",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,,WaDEWA_WS2
176570,groundwater,,47.16921,-121.080172,Unspecified,POU,,Active,6803092.0,"Richardson, Thomas",2020-06-05,NewApp,0.414,DS IR,0.011,POUWaDEWA_S136200,,WaDEWA_WS2


## Shapefile Data
- For attaching gemetry to csv inputs.

In [32]:
# PoU Shapefile Data
# Shapefile input
ShapeFileInput = gpd.read_file('shapefile/WA_PoU.shp')
dfPoUshapetemp = pd.DataFrame(ShapeFileInput)
dfPoUshapetemp.head(3)

Unnamed: 0,WR_DOC_ID,WR_Doc_POU,Fill_CD,WR_Doc_NR,WR_Doc_Typ,Quality_CD,Misc_CD,Position_W,Active_DT,Inactive_D,Update_TD,Update_Use,Comment_DS,Created_TD,Created_Us,Shape_Leng,Shape_Area,Latitude,Longitude,geometry
0,2084118,0,7,GWC01066-D,CE,G,RECHECKED\WWT,S,,,2009-01-23,"""ECY\DKRO461""",,,,32011.934922,35404850.0,46.591212,-119.735369,"POLYGON ((-119.74933 46.58447, -119.74899 46.5..."
1,2084120,0,14,GWC01067-D,CE,G,,S,,,2011-05-27,"""ECY\ATRO461""",,,,21354.797895,19626630.0,46.582825,-119.761836,"POLYGON ((-119.74933 46.58447, -119.74966 46.5..."
2,2084124,0,39,GWC01070-D,CE,G,,S,,,2011-05-27,"""ECY\ATRO461""",,,,5267.28459,1733865.0,46.586282,-119.778452,"POLYGON ((-119.77580 46.58451, -119.78105 46.5..."


In [33]:
# Creating WaDE Custom site native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp SiteNativeID dataframe of unique site.
def assignSiteUUID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWA_S" + string1
    return outstring

dfSiteNativeID = pd.DataFrame()
dfSiteNativeID['in_Latitude'] = dfPoUshapetemp['Latitude']
dfSiteNativeID['in_Longitude'] = dfPoUshapetemp['Longitude']
dfSiteNativeID = dfSiteNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfSiteNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfSiteNativeID['in_SiteNativeID'] = dftemp.apply(lambda row: assignSiteUUID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom site native ID
def retrieveSiteNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfSiteNativeID.loc[(dfSiteNativeID['in_Latitude'] == A) & 
                                (dfSiteNativeID['in_Longitude'] == B), 'in_SiteNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfPoUshapetemp['in_SiteNativeID'] = dfPoUshapetemp.apply(lambda row: retrieveSiteNativeID( row['Latitude'], row['Longitude']), axis=1)
dfPoUshapetemp.head(2)

Unnamed: 0,WR_DOC_ID,WR_Doc_POU,Fill_CD,WR_Doc_NR,WR_Doc_Typ,Quality_CD,Misc_CD,Position_W,Active_DT,Inactive_D,Update_TD,Update_Use,Comment_DS,Created_TD,Created_Us,Shape_Leng,Shape_Area,Latitude,Longitude,geometry,in_SiteNativeID
0,2084118,0,7,GWC01066-D,CE,G,RECHECKED\WWT,S,,,2009-01-23,"""ECY\DKRO461""",,,,32011.934922,35404850.0,46.591212,-119.735369,"POLYGON ((-119.74933 46.58447, -119.74899 46.5...",WaDEWA_S1
1,2084120,0,14,GWC01067-D,CE,G,,S,,,2011-05-27,"""ECY\ATRO461""",,,,21354.797895,19626630.0,46.582825,-119.761836,"POLYGON ((-119.74933 46.58447, -119.74966 46.5...",WaDEWA_S2


In [34]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = "POU" + dfPoUshapetemp['in_SiteNativeID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,POUWaDEWA_S1,"POLYGON ((-119.74933 46.58447, -119.74899 46.5..."
1,POUWaDEWA_S2,"POLYGON ((-119.74933 46.58447, -119.74966 46.5..."
2,POUWaDEWA_S3,"POLYGON ((-119.77580 46.58451, -119.78105 46.5..."


## Export Outputs

In [35]:
#Exporting to Finished File
dfout.to_csv('P_WashingtonMaster.csv', index=False)  # The output
dfPoUshape.to_csv('P_WashingtonGeometry.csv', index=False) # The output geometry.