In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse

In [2]:
# working directory
working_dir = "./ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "Person_Plus_EXTRACT_FromWRTSnotGWIS.csv" 
fileInput2 = "D_Point_WR_Doc.csv"
# water sources look up
inp_wtrsrs="watersources.csv"
# sites look up
inp_sitdim = 'sites.csv'

#output: water allocation
out_alloc = "waterallocations.csv"    #output

In [4]:
######## WaDE columns

#the followwing fields have difference between the table here (edited by DPL) and that on the schema website
#http://schema.westernstateswater.org/tables/Input_AllocationAmounts_fact.html
"""
BeneficialUseCategory, PrimaryUseCategory, AllocationTimeframeStart, AllocationTimeframeEnd, " "
BeneficialUseCategoryCV, PrimaryUseCategoryCV, TimeframeStartDate,	TimeframeEndDate,	Geometry	
"""
# UUIDs: Add UUIDs for all dim tables
# OrganizationUUID, SiteUUID, VariableSpecificUUID, WaterSourceUUID, MethodUUID
columns = ["OrganizationUUID", "SiteUUID", "VariableSpecificUUID", "WaterSourceUUID", "MethodUUID", "PrimaryUseCategory",
           "BeneficialUseCategory", "AllocationNativeID", "AllocationTypeCV", "AllocationOwner",
           "AllocationApplicationDate", "AllocationPriorityDate", "AllocationLegalStatusCV", "AllocationCropDutyAmount",
           "AllocationExpirationDate",
           "AllocationChangeApplicationIndicator", "LegacyAllocationIDs", "AllocationBasisCV", "AllocationTimeframeStart",
           "AllocationTimeframeEnd", "AllocationAmount", "AllocationMaximum", "PopulationServed", "PowerGeneratedGWh",
           "IrrigatedAcreage", "AllocationCommunityWaterSupplySystem", "AllocationSDWISIdentifierCV",
           "AllocationAssociatedWithdrawalSiteIDs", "AllocationAssociatedConsumptiveUseSiteIDs", "WaterAllocationNativeURL",
           "CustomerTypeCV", "IrrigationMethodCV", "CropTypeCV", "CommunityWaterSupplySystem", "DataPublicationDate",
           "DataPublicationDOI"]

dtypesx = [''] #here we could theoretically specify data types for each column name, but we didn't need to do that

In [5]:
### target dataFrame

# TODO: assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")

# Read Inputs and merge tables
# ToDO: We are joining 'on-left': 

# 
df100 = pd.read_csv(fileInput1,encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(df100.index))

#df100


# sites look up
df500 = pd.read_csv(inp_sitdim, encoding = "ISO-8859-1")

# water sources look up
df400 = pd.read_csv(inp_wtrsrs, encoding = "ISO-8859-1")
#drop duplicate rows ---this one is not necessary once the water sources table is refined to remove duplicates
df400 = df400.drop_duplicates(subset=['WaterSourceName', 'WaterSourceTypeCV'])
df400

Reading inputs...
489112


Unnamed: 0,WaterSourceUUID,WaterSourceNativeID,WaterSourceName,WaterSourceTypeCV,WaterQualityIndicatorCV,GNISFeatureNameCV,Geometry
0,WA_1,1,Unspecified,surfaceWater,Fresh,,
1,WA_2,2,Unspecified,groundwater,Fresh,,
2,WA_3,3,Unspecified,reservoir,Fresh,,
3,WA_4,4,Unspecified,Unknown,Fresh,,


In [7]:
print("Join to Sites native ID source...")

# DPoint_WR
input_cols = ['D_Point_ID', 'WR_Doc_ID']
df200 = pd.read_csv(fileInput2,encoding = "ISO-8859-1", usecols =input_cols)  
#print(len(df200))
df200.drop_duplicates(inplace=True)
#print(len(df200))

df100=pd.merge(df100, df200, left_on='WR_Doc_ID', right_on='WR_Doc_ID', how='left') #

#print (len(df100.index))

#df100 = df100.head(10000) #only runs first 100 lines for testing.

#df100 = df100.replace('', np.nan)

df100
#df100.head(5)

Join to Sites native ID source...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
1,0,0,2,6809900,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
2,0,0,3,6811534,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
3,0,0,4,6813072,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
4,0,0,5,6814107,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
5,0,0,6,6815804,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
6,0,0,7,6817474,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
7,0,0,8,6818766,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
8,0,0,9,6821228,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
9,0,0,10,6823023,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,


In [8]:
# use only unique water rights that may have multiple sites/pds
print("Dropping duplicates...")

df100.drop_duplicates(subset = ['D_Point_ID'], inplace=True)   #
df100 = df100.reset_index(drop=True)

print (len(df100.index))

df100

Dropping duplicates...
124637


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.000,,,CFS,FS,
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,Active,LongForm,Claim,,20.000,6.00,0.50,GPM,IR DG,209971.0
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,Active,ShortForm,Claim,,,,,,DG,630570.0
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,Active,,Certificate,,300.000,36.00,,GPM,DM,644800.0
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,Active,,Certificate,,125.000,18.00,,GPM,DM,384055.0
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,Active,,Certificate,Final,2000.000,625.00,250.00,GPM,IR,330069.0
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,Active,,Certificate,,60.000,29.50,11.00,GPM,DM CI IR,658137.0
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,Active,,Certificate,,60.000,29.50,11.00,GPM,DM CI IR,658138.0
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,Active,,Certificate,,0.022,2.00,0.50,CFS,DS IR,398327.0
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,Active,ShortForm,Claim,,,,,,DG,254088.0


In [9]:
print("Adding SiteUUID...")

def assignSiteID(colrowValue, df500):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        sitl = df500.loc[df500['SiteNativeID'] == colrowValue, 'WaDESiteUUID']
        #print(sitl)
        #print(sitl.empty)
        if not(sitl.empty):            # check if the series is empty
            outList = ','.join(str(inx) for inx in sitl) #sil.iloc[0]
        else:
            outList = ''
    return outList

df100 = df100.assign(SiteUUID='')  #add new column and make is nan

#Permit Number
df100['SiteUUID'] = df100.apply(lambda row: assignSiteID(row['D_Point_ID'], df500), axis=1)

df100

Adding SiteUUID...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,Certificate,,12.000,,,CFS,FS,,
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,LongForm,Claim,,20.000,6.00,0.50,GPM,IR DG,209971.0,WA_2195
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,ShortForm,Claim,,,,,,DG,630570.0,WA_20397
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,Certificate,,300.000,36.00,,GPM,DM,644800.0,WA_46066
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,Certificate,,125.000,18.00,,GPM,DM,384055.0,WA_108331
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,,Certificate,Final,2000.000,625.00,250.00,GPM,IR,330069.0,WA_30223
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,,Certificate,,60.000,29.50,11.00,GPM,DM CI IR,658137.0,WA_97966
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,,Certificate,,60.000,29.50,11.00,GPM,DM CI IR,658138.0,WA_97967
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,,Certificate,,0.022,2.00,0.50,CFS,DS IR,398327.0,WA_43482
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,ShortForm,Claim,,,,,,DG,254088.0,WA_10071


In [10]:
print("Water sources...")

# water source name is "unspecified" as it is not known
def assignWaterSourceID(colrowValue, df400):
    if colrowValue == '' or pd.isnull(colrowValue):
        colrowValue = 'Unknown'
    ml = df400.loc[df400['WaterSourceTypeCV'] == colrowValue, 'WaterSourceUUID']
    #print(ml.empty)
    if not(ml.empty):            # check if the series is empty
        outList = ml.iloc[0]   # watersourceSer.append(ml.iloc[0])
    else:
        outList = ''
    return outList

df100 = df100.assign(WaterSourceUUID='')

df100 = df100.replace(np.nan, '')

df100['WaterSourceUUID'] = df100.apply(lambda row: 
                        assignWaterSourceID(row['WaRecRCWClassTypeCode'], df400), axis=1)

df100

Water sources...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Certificate,,12,,,CFS,FS,,,WA_1
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,Claim,,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,Claim,,,,,,DG,630570,WA_20397,WA_2
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,Certificate,,300,36,,GPM,DM,644800,WA_46066,WA_2
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,Certificate,,125,18,,GPM,DM,384055,WA_108331,WA_2
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,Certificate,Final,2000,625,250,GPM,IR,330069,WA_30223,WA_2
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,Certificate,,60,29.5,11,GPM,DM CI IR,658137,WA_97966,WA_2
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,Certificate,,60,29.5,11,GPM,DM CI IR,658138,WA_97967,WA_2
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,Certificate,,0.022,2,0.5,CFS,DS IR,398327,WA_43482,WA_1
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,Claim,,,,,,DG,254088,WA_10071,WA_2


In [15]:
print("Beneficial use categories dictionary and function...")

BenUseDictWA = {
    "508-14":"508-14",
    "AI":"Agricultural Irrigation",
    "CI":"Commercial & indust",
    "CM":"Commercial",
    "CO":"Cooling for indust proces",
    "DC":"Dust Control",
    "DG":"Domestic general",
    "DM":"Domestic multiple",
    "DS":"Domestic single",
    "DY":"Dairy",
    "EN":"Environmental quality",
    "FP":"Frost protection",
    "FR":"Fire protection",
    "FS":"Fish propagation",
    "GP":"Groundwater Preservation",
    "HE":"Heat Exchange",
    "HP":"Heat protection for crops",
    "HW":"Highway",
    "IFlow":"Instream Flow",
    "II":"Individual Irrigation",
    "IR":"Irrigation",
    "IT":"Municipal inter-tie system",
    "IU":"Irrigation Unknown",
    "MI":"Mining",
    "MT":"Mitigation",
    "MU":"Municipal",
    "NR":"No Purpose Identified",
    "OT":"Other",
    "PO":"Power",
    "PR":"Parks and Recreation",
    "RE":"Recreation - beautification",
    "RW":"Railway",
    "SA":"Stream augmentation",
    "SR":"Storage",
    "ST":"Stock water",
    "TS":"Test Well",
    "TW-P":"Trust water, Permanent",
    "TW-T":"Trust water, Temporary",
    "WL":"Wildlife refuge"
}

# Get BenUse based on the field "PurposeOfUseTypeCodes" 
def assignBenUseDictWA(colrowValue):
    # may need to modify capitalization in beneficialUseDictionary
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        keyStr = colrowValue.strip()  # remove whitespace chars
        try:
            benUseListStr = keyStr.split()
            outList = ", ".join(BenUseDictWA[inx] for inx in benUseListStr)
        except:
            outList = ''

    return outList

Beneficial use categories dictionary and function...


In [16]:
print("Beneficial uses...")

df100 = df100.assign(BeneficialUseCategory='')
df100['BeneficialUseCategory'] = df100.apply(lambda row: 
                                             assignBenUseDictWA(row['PurposeOfUseTypeCodes']),
                                             axis=1)
df100

Beneficial uses...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,12,,,CFS,FS,,,WA_1,Fish propagation
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general"
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,,,DG,630570,WA_20397,WA_2,Domestic general
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,300,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,125,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,Final,2000,625,250,GPM,IR,330069,WA_30223,WA_2,Irrigation
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,,60,29.5,11,GPM,DM CI IR,658137,WA_97966,WA_2,"Domestic multiple, Commercial & indust, Irriga..."
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,,60,29.5,11,GPM,DM CI IR,658138,WA_97967,WA_2,"Domestic multiple, Commercial & indust, Irriga..."
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,,0.022,2,0.5,CFS,DS IR,398327,WA_43482,WA_1,"Domestic single, Irrigation"
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,,,,,,DG,254088,WA_10071,WA_2,Domestic general


In [19]:
print("AllocationOwner...")

def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = ", ".join(map(str, [colrowValue1, colrowValue2]))
    return outList

df100 = df100.assign(AllocationOwner='')
df100['AllocationOwner'] = df100.apply(lambda row: 
                                       assignownerName(row['PersonLastOrOrganizationNM'], 
                                                       row['PersonFirstNM']), axis=1)
df100

AllocationOwner...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,12,,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V."
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,300,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,125,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester"
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,2000,625,250,GPM,IR,330069,WA_30223,WA_2,Irrigation,"Shannon, C"
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,60,29.5,11,GPM,DM CI IR,658137,WA_97966,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,60,29.5,11,GPM,DM CI IR,658138,WA_97967,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,0.022,2,0.5,CFS,DS IR,398327,WA_43482,WA_1,"Domestic single, Irrigation","Evans, Steve"
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,,,,,DG,254088,WA_10071,WA_2,Domestic general,"PICARD, CORA E."


In [20]:
print("Allocation priority date...")

# input format 1973-12-07T00:00:00.000
def formatDateString(inString):
    #print(inString)
    try:
        if inString == '' or pd.isnull(inString):
            valndf = ''
        else:
            valD = datetime.strptime(inString, '%Y-%m-%dT00:00:00.000')
            #print(valD)
            valnDd = valD.date()
            #print(valnDd)
            valndf = valnDd.strftime('%m/%d/%Y')
            #print('date:', valndf)
    except:
        valndf = ''

    return valndf

df100 = df100.assign(AllocationPriorityDate='')

df100['AllocationPriorityDate'] = df100.apply(lambda row: 
                                        formatDateString(row['PriorityDate']), axis=1)

df100

Allocation priority date...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner,AllocationPriorityDate
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO,12/07/1973
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V.",07/01/1937
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER,
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M,07/06/1954
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester",12/05/1974
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,625,250,GPM,IR,330069,WA_30223,WA_2,Irrigation,"Shannon, C",06/17/1977
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,29.5,11,GPM,DM CI IR,658137,WA_97966,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty,03/20/1981
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,29.5,11,GPM,DM CI IR,658138,WA_97967,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty,03/20/1981
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,2,0.5,CFS,DS IR,398327,WA_43482,WA_1,"Domestic single, Irrigation","Evans, Steve",04/03/1986
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,,,,DG,254088,WA_10071,WA_2,Domestic general,"PICARD, CORA E.",


In [33]:
print("AllocationAmount...")

# Check unit from “InstantaneousUnitCode” 
# and leave CFS as is, 
# and convert GPM to CFS for uniformity
def allocAmountUnits(colrowValue1, colrowValue2):
    
    MultiFactor = 1.0
    gpmcfsUnit = colrowValue2.strip()
    
    if gpmcfsUnit == 'GPM':
        MultiFactor = 0.00222800926
    elif gpmcfsUnit == 'GPD':
        MultiFactor = 1.0 / 646317.0
    try:
        outVal = MultiFactor * colrowValue1
    except:
        outVal = colrowValue1
        
    return outVal


df100 = df100.assign(AllocationAmount='')
#make sure it is read as float 
df100[['InstantaneousQuantity']] = df100[['InstantaneousQuantity']].apply(pd.to_numeric)
df100['AllocationAmount'] = df100.apply(lambda row: 
                                        allocAmountUnits(row['InstantaneousQuantity'], 
                                                         row['InstantaneousUnitCode']),
                                        axis=1)

df100

AllocationAmount...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner,AllocationPriorityDate,AllocationAmount
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO,12/07/1973,12.000000
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V.",07/01/1937,0.044560
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER,,
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M,07/06/1954,0.668403
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester",12/05/1974,0.278501
5,0,0,131,2534872,2137984,2137984,2137984,7491,Primary,Shannon,...,250,GPM,IR,330069,WA_30223,WA_2,Irrigation,"Shannon, C",06/17/1977,4.456019
6,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,11,GPM,DM CI IR,658137,WA_97966,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty,03/20/1981,0.133681
7,0,0,140,2538391,2137733,2137733,2137733,8896,Primary,Whitman Cnty,...,11,GPM,DM CI IR,658138,WA_97967,WA_2,"Domestic multiple, Commercial & indust, Irriga...",Whitman Cnty,03/20/1981,0.133681
8,0,0,141,2436694,2143864,2143864,2143864,1594,Primary,Evans,...,0.5,CFS,DS IR,398327,WA_43482,WA_1,"Domestic single, Irrigation","Evans, Steve",04/03/1986,0.022000
9,0,0,144,2368440,2067337,2067337,2067337,79023,Primary,PICARD,...,,,DG,254088,WA_10071,WA_2,Domestic general,"PICARD, CORA E.",,


In [36]:
print("Copying all columns...")
#
destCols=["SiteUUID", "WaterSourceUUID", 
          "AllocationNativeID", "AllocationLegalStatusCV", 
          "BeneficialUseCategory", 
          "AllocationOwner", 
          "AllocationTypeCV", 
          #"AllocationApplicationDate", 
          "AllocationPriorityDate",
          "AllocationAmount", 
          "AllocationMaximum", 
          "IrrigatedAcreage",
          #"AllocationCropDutyAmount", "AllocationExpirationDate", 
          #"TimeframeStart", "TimeframeEnd",
          #"WaterAllocationNativeURL"
         ]
#
srsCols=["SiteUUID", "WaterSourceUUID", 
          "WR_Doc_ID", "WaRecProcessStatusTypeCode",
          "BeneficialUseCategory", 
          "AllocationOwner",
          "WaRecPhaseTypeCode", 
          #"AllocationApplicationDate",
          "AllocationPriorityDate", 
          "AllocationAmount",
          "AnnualVolumeQuantity",
          "IrrigatedAreaQuantity",
          #"IRRIGATION_DEPLETION", "DATE_TERMINATED",
          #"TimeframeStart", "TimeframeEnd",
          #"NRIS_LINK"
         ]

outdf100[destCols] = df100[srsCols]

outdf100

Copying all columns...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,,,,WA_1,,,Fish propagation,2132706,Certificate,WA Fish & Wildlife Dept - CRO,...,,,,,,,,,,
1,,WA_2195,,WA_2,,,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,,
2,,WA_20397,,WA_2,,,Domestic general,2065649,Claim,W. MARD & S. MILLER,...,,,,,,,,,,
3,,WA_46066,,WA_2,,,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,,
4,,WA_108331,,WA_2,,,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,,
5,,WA_30223,,WA_2,,,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,,
6,,WA_97966,,WA_2,,,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,,
7,,WA_97967,,WA_2,,,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,,
8,,WA_43482,,WA_1,,,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,,
9,,WA_10071,,WA_2,,,Domestic general,2067337,Claim,"PICARD, CORA E.",...,,,,,,,,,,


In [38]:
# hard coded
print("Hard coded...")
#hard coded
outdf100.OrganizationUUID = "WSDE"
outdf100.VariableSpecificUUID = "WSDE Allocation all"
outdf100.MethodUUID = "WSDE-Water Rights"
outdf100.AllocationBasisCV = "Unknown"
# check this later
outdf100.PrimaryUseCategory = "Irrigation"
#
outdf100.TimeframeStart = "01/01"
outdf100.TimeframeEnd = "12/31"

#
outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')    #"10/31/2019" # edit this to the code run date

outdf100

Hard coded...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,Fish propagation,2132706,Certificate,WA Fish & Wildlife Dept - CRO,...,,,,,,,,,12/19/2019,
1,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
2,WSDE,WA_20397,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065649,Claim,W. MARD & S. MILLER,...,,,,,,,,,12/19/2019,
3,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
4,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
5,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
6,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
7,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
8,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
9,WSDE,WA_10071,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2067337,Claim,"PICARD, CORA E.",...,,,,,,,,,12/19/2019,


In [40]:
print("Droping null allocations...")
# if both Allocation amount and Allocation maximum are empty drop row and save it to a Allocations_missing.csv
outdf100 = outdf100.replace(np.nan, '') #replace blank strings by NaN,
outdf100purge = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('waterallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100purge

Droping null allocations...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,Fish propagation,2132706,Certificate,WA Fish & Wildlife Dept - CRO,...,,,,,,,,,12/19/2019,
1,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
2,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
3,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
4,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
5,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
6,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
7,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
8,WSDE,WA_126597,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065515,Claim,"BEAUCHENE, PAUL J.",...,,,,,,,,,12/19/2019,
9,WSDE,WA_27651,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,


In [41]:
print("Droping null SiteUUIDs...")
outdf100nullID = outdf100.loc[outdf100["SiteUUID"] == '']
if len(outdf100nullID.index) > 0:
    dropIndex = outdf100.loc[outdf100["SiteUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping null SiteUUIDs...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
1,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
2,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
3,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
4,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
5,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
6,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
7,WSDE,WA_126597,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065515,Claim,"BEAUCHENE, PAUL J.",...,,,,,,,,,12/19/2019,
8,WSDE,WA_27651,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,
9,WSDE,WA_27650,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,


In [43]:
print("Droping null Priority date...")
outdf100nullPR = outdf100.loc[outdf100["AllocationPriorityDate"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["AllocationPriorityDate"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100nullPR

Droping null Priority date...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
1,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
2,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
3,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
4,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
5,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
6,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
7,WSDE,WA_126597,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065515,Claim,"BEAUCHENE, PAUL J.",...,,,,,,,,,12/19/2019,
8,WSDE,WA_27651,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,
9,WSDE,WA_27650,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,


In [45]:
print("Droping null WaterSourceUUID ...")
outdf100nullPR = outdf100.loc[outdf100["WaterSourceUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["WaterSourceUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
outdf100

Droping null WaterSourceUUID ...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
1,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
2,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
3,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
4,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
5,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
6,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
7,WSDE,WA_126597,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065515,Claim,"BEAUCHENE, PAUL J.",...,,,,,,,,,12/19/2019,
8,WSDE,WA_27651,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,
9,WSDE,WA_27650,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,


In [46]:
print("Droping duplicates...")
#drop duplicate rows; just make sure
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("waterallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping duplicates...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,WSDE,WA_2195,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Irrigation, Domestic general",2066186,Claim,"LARSON, ARNOLD V.",...,,,,,,,,,12/19/2019,
1,WSDE,WA_46066,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2141830,Certificate,HODIN G M,...,,,,,,,,,12/19/2019,
2,WSDE,WA_108331,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2138520,Certificate,"Leone, Chester",...,,,,,,,,,12/19/2019,
3,WSDE,WA_30223,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Irrigation,2137984,Certificate,"Shannon, C",...,,,,,,,,,12/19/2019,
4,WSDE,WA_97966,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
5,WSDE,WA_97967,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,"Domestic multiple, Commercial & indust, Irriga...",2137733,Certificate,Whitman Cnty,...,,,,,,,,,12/19/2019,
6,WSDE,WA_43482,WSDE Allocation all,WA_1,WSDE-Water Rights,Irrigation,"Domestic single, Irrigation",2143864,Certificate,"Evans, Steve",...,,,,,,,,,12/19/2019,
7,WSDE,WA_126597,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic general,2065515,Claim,"BEAUCHENE, PAUL J.",...,,,,,,,,,12/19/2019,
8,WSDE,WA_27651,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,
9,WSDE,WA_27650,WSDE Allocation all,WA_2,WSDE-Water Rights,Irrigation,Domestic multiple,2143446,Permit,Pelican Point Water Co,...,,,,,,,,,12/19/2019,


In [47]:
print("Checking required is not null...")
# check if any cell of these columns is null
requiredCols = ["OrganizationUUID", "VariableSpecificUUID", "WaterSourceUUID", 
                "MethodUUID", "AllocationPriorityDate"] #SiteUUID
# outdf100_nullMand = outdf100.loc[outdf100.isnull().any(axis=1)] --for all cols
# outdf100_nullMand = outdf100.loc[outdf100[requiredCols].isnull().any(axis=1)]
#(outdf100["SiteUUID"].isnull()) |
outdf100_nullMand = outdf100.loc[(outdf100["OrganizationUUID"] == '') |
                                (outdf100["VariableSpecificUUID"] == '') |
                                (outdf100["WaterSourceUUID"] == '') |
                                (outdf100["MethodUUID"] == '') |
                                (outdf100["AllocationPriorityDate"] == '')]
#outdf100_nullMand = outdf100.loc[[False | (outdf100[varName].isnull()) for varName in requiredCols]]
if(len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('waterallocations_mandatoryFieldMissing.csv')  # index=False,
#ToDO: purge these cells if there is any missing? #For now left to be inspected
#outdf100_nullMand

Checking required is not null...


In [48]:
print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Writing outputs...
Done Water Allocation


### Do not run the following with the rest of the code  (it is for inspection)

In [None]:
##### Do not run the following with the rest of the code  (it is for inspection)
print("Long site ids...")

#output: water allocation
in_alloc = "waterallocations_long.csv"    #output
# ground water
outdf100 = pd.read_csv(in_alloc, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(outdf100.index))

outdf100Long = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)]
if len(outdf100Long.index) > 0:
    print("There are rows with too long siteids")
    outdf100Long.to_csv("waterallocations_longsiteid.csv")  # index=False,
    dropIndex = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)].index
    outdf100 = outdf100.drop(dropIndex)   #
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

outdf100Long

print("Writing outputs...")
#write out
#output: water allocation
out_alloc = "waterallocations.csv"    #output
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")