In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse

In [2]:
# working directory
working_dir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Washington/WaterAllocation"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "RawInputData/Person_Plus_EXTRACT_FromWRTSnotGWIS.csv" 
fileInput2 = "RawInputData/D_Point_WR_Doc.csv"

# water sources look up
inp_wtrsrs="ProcessedInputData/watersources.csv"

# sites look up
inp_sitdim = 'ProcessedInputData/sites.csv'

#output: water allocation
out_alloc = "ProcessedInputData/waterallocations.csv"

In [4]:
######## WaDE columns
columns = [
    "MethodUUID",
    "OrganizationUUID",
    "SiteUUID",
    "VariableSpecificUUID",
    "WaterSourceUUID",
    "AllocationAmount",
    "AllocationApplicationDate",
    "AllocationAssociatedConsumptiveUseSiteIDs",
    "AllocationAssociatedWithdrawalSiteIDs",
    "AllocationBasisCV",
    "AllocationChangeApplicationIndicator",
    "AllocationCommunityWaterSupplySystem",
    "AllocationCropDutyAmount",
    "AllocationExpirationDate",
    "AllocationLegalStatusCV",
    "AllocationMaximum",
    "AllocationNativeID",
    "AllocationOwner",
    "AllocationPriorityDate",
    "AllocationSDWISIdentifierCV",
    "AllocationTimeframeEnd",
    "AllocationTimeframeStart",
    "AllocationTypeCV",
    "BeneficialUseCategory",
    "CommunityWaterSupplySystem",
    "CropTypeCV",
    "CustomerTypeCV",
    "DataPublicationDate",
    "DataPublicationDOI",
    "GeneratedPowerCapacityMW",
    "IrrigatedAcreage",
    "IrrigationMethodCV",
    "LegacyAllocationIDs",
    "PopulationServed",
    "PowerType",
    "PrimaryUseCategory",
    "WaterAllocationNativeURL"]

In [5]:
### target dataFrame

outdf100=pd.DataFrame(columns=columns)

In [6]:
# Read Inputs and merge tables
print("Reading inputs...")
 
df100 = pd.read_csv(fileInput1, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(df100.index))

# sites look up
df500 = pd.read_csv(inp_sitdim, encoding = "ISO-8859-1")

# water sources look up
df400 = pd.read_csv(inp_wtrsrs, encoding = "ISO-8859-1")
#drop duplicate rows ---this one is not necessary once the water sources table is refined to remove duplicates
df400 = df400.drop_duplicates(subset=['WaterSourceName', 'WaterSourceTypeCV'])
df400

Reading inputs...
489112


Unnamed: 0,WaterSourceUUID,WaterSourceNativeID,WaterSourceName,WaterSourceTypeCV,WaterQualityIndicatorCV,GNISFeatureNameCV,Geometry
0,WA_1,1,Unspecified,surfaceWater,Fresh,,
1,WA_2,2,Unspecified,groundwater,Fresh,,
2,WA_3,3,Unspecified,reservoir,Fresh,,
3,WA_4,4,Unspecified,Unknown,Fresh,,


In [7]:
print("Join to Sites native ID source...")

# DPoint_WR
input_cols = ['D_Point_ID', 'WR_Doc_ID']
df200 = pd.read_csv(fileInput2,encoding = "ISO-8859-1", usecols = input_cols)  
df200.drop_duplicates(inplace=True)

df100=pd.merge(df100, df200, left_on='WR_Doc_ID', right_on='WR_Doc_ID', how='left') #

df100

Join to Sites native ID source...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
1,0,0,2,6809900,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
2,0,0,3,6811534,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
3,0,0,4,6813072,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
4,0,0,5,6814107,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
562442,0,0,489108,6987142,2283068,2283068,2283068,130323,Former Primary,Northwest Farm Credit Services FLCA*,...,Inactive,,Permit,Superseded,1324.0,1041.0,257.0,GPM,IR,
562443,0,0,489109,6991296,2283068,2283068,2283068,130323,Primary,CBO Selah LLC,...,Inactive,,Permit,Superseded,1324.0,1041.0,257.0,GPM,IR,
562444,0,0,489110,6994728,2283068,2283068,2283068,130323,Former Primary,Northwest Farm Credit Services FLCA*,...,Inactive,,Permit,Superseded,1324.0,1041.0,257.0,GPM,IR,
562445,0,0,489111,6998881,2283068,2283068,2283068,130323,Primary,CBO Selah LLC,...,Inactive,,Permit,Superseded,1324.0,1041.0,257.0,GPM,IR,


In [8]:
# use only unique water rights that may have multiple sites/pds
print("Dropping duplicates...")

df100.drop_duplicates(subset = ['D_Point_ID'], inplace=True)   #
df100 = df100.reset_index(drop=True)

print (len(df100.index))

df100

Dropping duplicates...
124637


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecProcessStatusTypeCode,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Active,,Certificate,,12.0,,,CFS,FS,
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,Active,LongForm,Claim,,20.0,6.0,0.5,GPM,IR DG,209971.0
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,Active,ShortForm,Claim,,,,,,DG,630570.0
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,Active,,Certificate,,300.0,36.0,,GPM,DM,644800.0
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,Active,,Certificate,,125.0,18.0,,GPM,DM,384055.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,Active,ShortForm,Claim,,,,,,DG,515280.0
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,Active,ShortForm,Claim,,,,,,ST DG IR,284603.0
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,Active,ShortForm,Claim,,,,,,DG IR ST,275424.0
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,Active,LongForm,Claim,,,,,,ST,292917.0


In [9]:
print("Adding SiteUUID...")

def assignSiteID(colrowValue, df500):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        sitl = df500.loc[df500['SiteNativeID'] == colrowValue, 'SiteUUID']
        #print(sitl)
        #print(sitl.empty)
        if not(sitl.empty):            # check if the series is empty
            outList = ','.join(str(inx) for inx in sitl) #sil.iloc[0]
        else:
            outList = ''
    return outList

df100 = df100.assign(SiteUUID='')  #add new column and make is nan

#Permit Number
df100['SiteUUID'] = df100.apply(lambda row: assignSiteID(row['D_Point_ID'], df500), axis=1)

df100

Adding SiteUUID...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecClaimTypeCode,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,Certificate,,12.0,,,CFS,FS,,
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,LongForm,Claim,,20.0,6.0,0.5,GPM,IR DG,209971.0,WA_2195
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,ShortForm,Claim,,,,,,DG,630570.0,WA_20397
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,Certificate,,300.0,36.0,,GPM,DM,644800.0,WA_46066
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,Certificate,,125.0,18.0,,GPM,DM,384055.0,WA_108331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,ShortForm,Claim,,,,,,DG,515280.0,WA_75638
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,ShortForm,Claim,,,,,,ST DG IR,284603.0,WA_120360
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,ShortForm,Claim,,,,,,DG IR ST,275424.0,WA_61651
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,LongForm,Claim,,,,,,ST,292917.0,WA_141606


In [10]:
print("Water sources...")

# water source name is "unspecified" as it is not known
def assignWaterSourceID(colrowValue, df400):
    if colrowValue == '' or pd.isnull(colrowValue):
        colrowValue = 'Unknown'
    ml = df400.loc[df400['WaterSourceTypeCV'] == colrowValue, 'WaterSourceUUID']
    #print(ml.empty)
    if not(ml.empty):            # check if the series is empty
        outList = ml.iloc[0]   # watersourceSer.append(ml.iloc[0])
    else:
        outList = ''
    return outList

df100 = df100.assign(WaterSourceUUID='')

df100 = df100.replace(np.nan, '')

df100['WaterSourceUUID'] = df100.apply(lambda row: 
                        assignWaterSourceID(row['WaRecRCWClassTypeCode'], df400), axis=1)

df100

Water sources...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecPhaseTypeCode,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,Certificate,,12,,,CFS,FS,,,WA_1
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,Claim,,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,Claim,,,,,,DG,630570,WA_20397,WA_2
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,Certificate,,300,36,,GPM,DM,644800,WA_46066,WA_2
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,Certificate,,125,18,,GPM,DM,384055,WA_108331,WA_2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,Claim,,,,,,DG,515280,WA_75638,WA_2
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,Claim,,,,,,ST DG IR,284603,WA_120360,WA_2
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,Claim,,,,,,DG IR ST,275424,WA_61651,WA_2
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,Claim,,,,,,ST,292917,WA_141606,WA_1


In [11]:
print("Beneficial use categories dictionary and function...")

BenUseDictWA = {
    "508-14":"508-14",
    "AI":"Agricultural Irrigation",
    "CI":"Commercial & indust",
    "CM":"Commercial",
    "CO":"Cooling for indust proces",
    "DC":"Dust Control",
    "DG":"Domestic general",
    "DM":"Domestic multiple",
    "DS":"Domestic single",
    "DY":"Dairy",
    "EN":"Environmental quality",
    "FP":"Frost protection",
    "FR":"Fire protection",
    "FS":"Fish propagation",
    "GP":"Groundwater Preservation",
    "HE":"Heat Exchange",
    "HP":"Heat protection for crops",
    "HW":"Highway",
    "IFlow":"Instream Flow",
    "II":"Individual Irrigation",
    "IR":"Irrigation",
    "IT":"Municipal inter-tie system",
    "IU":"Irrigation Unknown",
    "MI":"Mining",
    "MT":"Mitigation",
    "MU":"Municipal",
    "NR":"No Purpose Identified",
    "OT":"Other",
    "PO":"Power",
    "PR":"Parks and Recreation",
    "RE":"Recreation - beautification",
    "RW":"Railway",
    "SA":"Stream augmentation",
    "SR":"Storage",
    "ST":"Stock water",
    "TS":"Test Well",
    "TW-P":"Trust water, Permanent",
    "TW-T":"Trust water, Temporary",
    "WL":"Wildlife refuge"
}

# Get BenUse based on the field "PurposeOfUseTypeCodes" 
def assignBenUseDictWA(colrowValue):
    # may need to modify capitalization in beneficialUseDictionary
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        keyStr = colrowValue.strip()  # remove whitespace chars
        try:
            benUseListStr = keyStr.split()
            outList = ", ".join(BenUseDictWA[inx] for inx in benUseListStr)
        except:
            outList = ''

    return outList

Beneficial use categories dictionary and function...


In [12]:
print("Beneficial uses...")

df100 = df100.assign(BeneficialUseCategory='')
df100['BeneficialUseCategory'] = df100.apply(lambda row: 
                                             assignBenUseDictWA(row['PurposeOfUseTypeCodes']),
                                             axis=1)
df100

Beneficial uses...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,WaRecPhaseStageTypeCode,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,12,,,CFS,FS,,,WA_1,Fish propagation
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general"
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,,,DG,630570,WA_20397,WA_2,Domestic general
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,300,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,125,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,,,,,,DG,515280,WA_75638,WA_2,Domestic general
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,,,,,,ST DG IR,284603,WA_120360,WA_2,"Stock water, Domestic general, Irrigation"
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,,,,,,DG IR ST,275424,WA_61651,WA_2,"Domestic general, Irrigation, Stock water"
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,,,,,,ST,292917,WA_141606,WA_1,Stock water


In [13]:
print("AllocationOwner...")

def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = ", ".join(map(str, [colrowValue1, colrowValue2]))
    return outList

df100 = df100.assign(AllocationOwner='')
df100['AllocationOwner'] = df100.apply(lambda row: 
                                       assignownerName(row['PersonLastOrOrganizationNM'], 
                                                       row['PersonFirstNM']), axis=1)
df100

AllocationOwner...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,InstantaneousQuantity,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,12,,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,20,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V."
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,300,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,125,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,,,,,DG,515280,WA_75638,WA_2,Domestic general,"SCHEEL, NORMAN W."
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,,,,,ST DG IR,284603,WA_120360,WA_2,"Stock water, Domestic general, Irrigation","BERNHARDT, RONALD W"
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,,,,,DG IR ST,275424,WA_61651,WA_2,"Domestic general, Irrigation, Stock water","MAC KAY, DONALD G"
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,,,,,ST,292917,WA_141606,WA_1,Stock water,"CAMPBELL, ROBERT H."


In [14]:
print("Allocation priority date...")

# input format 1973-12-07T00:00:00.000
def formatDateString(inString):
    #print(inString)
    try:
        if inString == '' or pd.isnull(inString):
            valndf = ''
        else:
            valD = datetime.strptime(inString, '%Y-%m-%dT00:00:00.000')
            #print(valD)
            valnDd = valD.date()
            #print(valnDd)
            valndf = valnDd.strftime('%m/%d/%Y')
            #print('date:', valndf)
    except:
        valndf = ''

    return valndf

df100 = df100.assign(AllocationPriorityDate='')

df100['AllocationPriorityDate'] = df100.apply(lambda row: formatDateString(row['PriorityDate']), axis=1)

df100

Allocation priority date...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,AnnualVolumeQuantity,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner,AllocationPriorityDate
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO,12/07/1973
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,6,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V.",07/01/1937
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER,
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,36,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M,07/06/1954
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,18,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester",12/05/1974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,,,,DG,515280,WA_75638,WA_2,Domestic general,"SCHEEL, NORMAN W.",
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,,,,ST DG IR,284603,WA_120360,WA_2,"Stock water, Domestic general, Irrigation","BERNHARDT, RONALD W",
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,,,,DG IR ST,275424,WA_61651,WA_2,"Domestic general, Irrigation, Stock water","MAC KAY, DONALD G",
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,,,,ST,292917,WA_141606,WA_1,Stock water,"CAMPBELL, ROBERT H.",


In [15]:
print("AllocationAmount...")

# Check unit from “InstantaneousUnitCode” 
# and leave CFS as is, 
# and convert GPM to CFS for uniformity
def allocAmountUnits(colrowValue1, colrowValue2):
    
    MultiFactor = 1.0
    gpmcfsUnit = colrowValue2.strip()
    
    if gpmcfsUnit == 'GPM':
        MultiFactor = 0.00222800926
    elif gpmcfsUnit == 'GPD':
        MultiFactor = 1.0 / 646317.0
    try:
        outVal = MultiFactor * colrowValue1
    except:
        outVal = colrowValue1
        
    return outVal


df100 = df100.assign(AllocationAmount='')
#make sure it is read as float 
df100[['InstantaneousQuantity']] = df100[['InstantaneousQuantity']].apply(pd.to_numeric)
df100['AllocationAmount'] = df100.apply(lambda row: 
                                        allocAmountUnits(row['InstantaneousQuantity'], 
                                                         row['InstantaneousUnitCode']),
                                        axis=1)

df100

AllocationAmount...


Unnamed: 0,X,Y,OBJECTID,OBJECTID_1,WaRecID,WaRecId_1,WR_Doc_ID,WaRecPhaseId,WaRecPhasePartyRoleTypeCode,PersonLastOrOrganizationNM,...,IrrigatedAreaQuantity,InstantaneousUnitCode,PurposeOfUseTypeCodes,D_Point_ID,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationOwner,AllocationPriorityDate,AllocationAmount
0,0,0,1,6666660,2132706,2132706,2132706,9971,Primary,WA Fish & Wildlife Dept - CRO,...,,CFS,FS,,,WA_1,Fish propagation,WA Fish & Wildlife Dept - CRO,12/07/1973,12.000000
1,0,0,103,2553418,2066186,2066186,2066186,77477,Primary,LARSON,...,0.5,GPM,IR DG,209971,WA_2195,WA_2,"Irrigation, Domestic general","LARSON, ARNOLD V.",07/01/1937,0.044560
2,0,0,105,2499326,2065649,2065649,2065649,78847,Primary,W. MARD & S. MILLER,...,,,DG,630570,WA_20397,WA_2,Domestic general,W. MARD & S. MILLER,,
3,0,0,116,2570635,2141830,2141830,2141830,1040,Primary,HODIN G M,...,,GPM,DM,644800,WA_46066,WA_2,Domestic multiple,HODIN G M,07/06/1954,0.668403
4,0,0,117,2358664,2138520,2138520,2138520,6084,Primary,Leone,...,,GPM,DM,384055,WA_108331,WA_2,Domestic multiple,"Leone, Chester",12/05/1974,0.278501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,0,0,488950,2594706,2256313,2256313,2256313,158582,Primary,SCHEEL,...,,,DG,515280,WA_75638,WA_2,Domestic general,"SCHEEL, NORMAN W.",,
124633,0,0,488951,2388775,2162208,2162208,2162208,252338,Primary,BERNHARDT,...,,,ST DG IR,284603,WA_120360,WA_2,"Stock water, Domestic general, Irrigation","BERNHARDT, RONALD W",,
124634,0,0,488953,2362296,2164480,2164480,2164480,252540,Primary,MAC KAY,...,,,DG IR ST,275424,WA_61651,WA_2,"Domestic general, Irrigation, Stock water","MAC KAY, DONALD G",,
124635,0,0,488980,2365722,2186470,2186470,2186470,227904,Primary,CAMPBELL,...,,,ST,292917,WA_141606,WA_1,Stock water,"CAMPBELL, ROBERT H.",,


In [16]:
print("Copying all columns...")
#
destCols=["SiteUUID", "WaterSourceUUID", 
          "AllocationNativeID", "AllocationLegalStatusCV", 
          "BeneficialUseCategory", 
          "AllocationOwner", 
          "AllocationTypeCV", 
          "AllocationPriorityDate",
          "AllocationAmount", 
          "AllocationMaximum", 
          "IrrigatedAcreage"]

srsCols=["SiteUUID", "WaterSourceUUID", 
          "WR_Doc_ID", "WaRecProcessStatusTypeCode",
          "BeneficialUseCategory", 
          "AllocationOwner",
          "WaRecPhaseTypeCode", 
          "AllocationPriorityDate", 
          "AllocationAmount",
          "AnnualVolumeQuantity",
          "IrrigatedAreaQuantity"]

outdf100[destCols] = df100[srsCols]

outdf100

Copying all columns...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,,,,,WA_1,12.000000,,,,,...,,,,,,,,,,
1,,,WA_2195,,WA_2,0.044560,,,,,...,,,,0.5,,,,,,
2,,,WA_20397,,WA_2,,,,,,...,,,,,,,,,,
3,,,WA_46066,,WA_2,0.668403,,,,,...,,,,,,,,,,
4,,,WA_108331,,WA_2,0.278501,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,,,WA_75638,,WA_2,,,,,,...,,,,,,,,,,
124633,,,WA_120360,,WA_2,,,,,,...,,,,,,,,,,
124634,,,WA_61651,,WA_2,,,,,,...,,,,,,,,,,
124635,,,WA_141606,,WA_1,,,,,,...,,,,,,,,,,


In [17]:
# hard coded
print("Hard coded...")

outdf100.OrganizationUUID = "WSDE"
outdf100.VariableSpecificUUID = "WSDE_Allocation All"
outdf100.MethodUUID = "WSDE_Water Rights"
outdf100.AllocationBasisCV = "Unknown"
outdf100.PrimaryUseCategory = "Irrigation"
outdf100.AllocationTimeframeStart = "01/01"
outdf100.AllocationTimeframeEnd = "12/31"

outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')    #"10/31/2019" # edit this to the code run date

outdf100

Hard coded...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,,WSDE_Allocation All,WA_1,12.000000,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.044560,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_20397,WSDE_Allocation All,WA_2,,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124632,WSDE_Water Rights,WSDE,WA_75638,WSDE_Allocation All,WA_2,,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
124633,WSDE_Water Rights,WSDE,WA_120360,WSDE_Allocation All,WA_2,,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
124634,WSDE_Water Rights,WSDE,WA_61651,WSDE_Allocation All,WA_2,,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
124635,WSDE_Water Rights,WSDE,WA_141606,WSDE_Allocation All,WA_1,,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [18]:
print("Droping null allocations...")
# if both Allocation amount and Allocation maximum are empty drop row and save it to a Allocations_missing.csv

outdf100 = outdf100.replace(np.nan, '') #replace blank strings by NaN,
outdf100purge = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('waterallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping null allocations...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,,WSDE_Allocation All,WA_1,12,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.0445602,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_30223,WSDE_Allocation All,WA_2,4.45602,,,,Unknown,...,03/09/2020,,,250,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61550,WSDE_Water Rights,WSDE,WA_56095,WSDE_Allocation All,WA_1,0.25,,,,Unknown,...,03/09/2020,,,25,,,,,Irrigation,
61551,WSDE_Water Rights,WSDE,WA_78680,WSDE_Allocation All,WA_1,0.01,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
61552,WSDE_Water Rights,WSDE,WA_98563,WSDE_Allocation All,WA_2,0.891204,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
61553,WSDE_Water Rights,WSDE,WA_63019,WSDE_Allocation All,WA_1,0.075,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [19]:
print("Droping null SiteUUIDs...")

outdf100nullID = outdf100.loc[outdf100["SiteUUID"] == '']
if len(outdf100nullID.index) > 0:
    dropIndex = outdf100.loc[outdf100["SiteUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping null SiteUUIDs...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.0445602,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_30223,WSDE_Allocation All,WA_2,4.45602,,,,Unknown,...,03/09/2020,,,250,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_97966,WSDE_Allocation All,WA_2,0.133681,,,,Unknown,...,03/09/2020,,,11,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61381,WSDE_Water Rights,WSDE,WA_56095,WSDE_Allocation All,WA_1,0.25,,,,Unknown,...,03/09/2020,,,25,,,,,Irrigation,
61382,WSDE_Water Rights,WSDE,WA_78680,WSDE_Allocation All,WA_1,0.01,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
61383,WSDE_Water Rights,WSDE,WA_98563,WSDE_Allocation All,WA_2,0.891204,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
61384,WSDE_Water Rights,WSDE,WA_63019,WSDE_Allocation All,WA_1,0.075,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [20]:
print("Droping null Priority date...")
outdf100nullPR = outdf100.loc[outdf100["AllocationPriorityDate"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["AllocationPriorityDate"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100nullPR

Droping null Priority date...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.0445602,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_30223,WSDE_Allocation All,WA_2,4.45602,,,,Unknown,...,03/09/2020,,,250,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_97966,WSDE_Allocation All,WA_2,0.133681,,,,Unknown,...,03/09/2020,,,11,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58226,WSDE_Water Rights,WSDE,WA_56095,WSDE_Allocation All,WA_1,0.25,,,,Unknown,...,03/09/2020,,,25,,,,,Irrigation,
58227,WSDE_Water Rights,WSDE,WA_78680,WSDE_Allocation All,WA_1,0.01,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58228,WSDE_Water Rights,WSDE,WA_98563,WSDE_Allocation All,WA_2,0.891204,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58229,WSDE_Water Rights,WSDE,WA_63019,WSDE_Allocation All,WA_1,0.075,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [21]:
print("Droping null WaterSourceUUID ...")

outdf100nullPR = outdf100.loc[outdf100["WaterSourceUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["WaterSourceUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
outdf100

Droping null WaterSourceUUID ...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.0445602,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_30223,WSDE_Allocation All,WA_2,4.45602,,,,Unknown,...,03/09/2020,,,250,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_97966,WSDE_Allocation All,WA_2,0.133681,,,,Unknown,...,03/09/2020,,,11,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58226,WSDE_Water Rights,WSDE,WA_56095,WSDE_Allocation All,WA_1,0.25,,,,Unknown,...,03/09/2020,,,25,,,,,Irrigation,
58227,WSDE_Water Rights,WSDE,WA_78680,WSDE_Allocation All,WA_1,0.01,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58228,WSDE_Water Rights,WSDE,WA_98563,WSDE_Allocation All,WA_2,0.891204,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58229,WSDE_Water Rights,WSDE,WA_63019,WSDE_Allocation All,WA_1,0.075,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [22]:
print("Droping duplicates...")
#drop duplicate rows; just make sure

outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("waterallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping duplicates...


Unnamed: 0,MethodUUID,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,AllocationAmount,AllocationApplicationDate,AllocationAssociatedConsumptiveUseSiteIDs,AllocationAssociatedWithdrawalSiteIDs,AllocationBasisCV,...,DataPublicationDate,DataPublicationDOI,GeneratedPowerCapacityMW,IrrigatedAcreage,IrrigationMethodCV,LegacyAllocationIDs,PopulationServed,PowerType,PrimaryUseCategory,WaterAllocationNativeURL
0,WSDE_Water Rights,WSDE,WA_2195,WSDE_Allocation All,WA_2,0.0445602,,,,Unknown,...,03/09/2020,,,0.5,,,,,Irrigation,
1,WSDE_Water Rights,WSDE,WA_46066,WSDE_Allocation All,WA_2,0.668403,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
2,WSDE_Water Rights,WSDE,WA_108331,WSDE_Allocation All,WA_2,0.278501,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
3,WSDE_Water Rights,WSDE,WA_30223,WSDE_Allocation All,WA_2,4.45602,,,,Unknown,...,03/09/2020,,,250,,,,,Irrigation,
4,WSDE_Water Rights,WSDE,WA_97966,WSDE_Allocation All,WA_2,0.133681,,,,Unknown,...,03/09/2020,,,11,,,,,Irrigation,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58226,WSDE_Water Rights,WSDE,WA_56095,WSDE_Allocation All,WA_1,0.25,,,,Unknown,...,03/09/2020,,,25,,,,,Irrigation,
58227,WSDE_Water Rights,WSDE,WA_78680,WSDE_Allocation All,WA_1,0.01,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58228,WSDE_Water Rights,WSDE,WA_98563,WSDE_Allocation All,WA_2,0.891204,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,
58229,WSDE_Water Rights,WSDE,WA_63019,WSDE_Allocation All,WA_1,0.075,,,,Unknown,...,03/09/2020,,,,,,,,Irrigation,


In [23]:
print("Checking required is not null...")
# check if any cell of these columns is null

requiredCols = ["OrganizationUUID", "VariableSpecificUUID", "WaterSourceUUID", 
                "MethodUUID", "AllocationPriorityDate"]

outdf100_nullMand = outdf100.loc[(outdf100["OrganizationUUID"] == '') |
                                (outdf100["VariableSpecificUUID"] == '') |
                                (outdf100["WaterSourceUUID"] == '') |
                                (outdf100["MethodUUID"] == '') |
                                (outdf100["AllocationPriorityDate"] == '')]

if(len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('ProcessedInputData/waterallocations_mandatoryFieldMissing.csv')  # index=False,

Checking required is not null...


In [24]:
outdf100.columns

Index(['MethodUUID', 'OrganizationUUID', 'SiteUUID', 'VariableSpecificUUID',
       'WaterSourceUUID', 'AllocationAmount', 'AllocationApplicationDate',
       'AllocationAssociatedConsumptiveUseSiteIDs',
       'AllocationAssociatedWithdrawalSiteIDs', 'AllocationBasisCV',
       'AllocationChangeApplicationIndicator',
       'AllocationCommunityWaterSupplySystem', 'AllocationCropDutyAmount',
       'AllocationExpirationDate', 'AllocationLegalStatusCV',
       'AllocationMaximum', 'AllocationNativeID', 'AllocationOwner',
       'AllocationPriorityDate', 'AllocationSDWISIdentifierCV',
       'AllocationTimeframeEnd', 'AllocationTimeframeStart',
       'AllocationTypeCV', 'BeneficialUseCategory',
       'CommunityWaterSupplySystem', 'CropTypeCV', 'CustomerTypeCV',
       'DataPublicationDate', 'DataPublicationDOI', 'GeneratedPowerCapacityMW',
       'IrrigatedAcreage', 'IrrigationMethodCV', 'LegacyAllocationIDs',
       'PopulationServed', 'PowerType', 'PrimaryUseCategory',
       'WaterA

In [25]:
# Solving WaDE 2.0 Upload Issues
############################################################################
#Date Noted: 03/09/2020
#Note: Missing a few columns.
outdf100.DataPublicationDate = "03/09/2020"
outdf100.GeneratedPowerCapacityMW = ''
outdf100.PowerType = ''
outdf100.PrimaryUseCategory = ''

# Date Noted: 03/03/2020
# Note: Insure single 'AllocationNativeID' entry.
print("Joining outdf duplicates based on AllocationNativeID...")
outdf100 = outdf100.groupby('AllocationNativeID', sort=False).agg(lambda x: ','.join([str(elem) for elem in (list(set(x)))])).reset_index()

Joining outdf duplicates based on AllocationNativeID...


In [26]:
print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Writing outputs...
Done Water Allocation


In [27]:
outdf100.columns

Index(['AllocationNativeID', 'MethodUUID', 'OrganizationUUID', 'SiteUUID',
       'VariableSpecificUUID', 'WaterSourceUUID', 'AllocationAmount',
       'AllocationApplicationDate',
       'AllocationAssociatedConsumptiveUseSiteIDs',
       'AllocationAssociatedWithdrawalSiteIDs', 'AllocationBasisCV',
       'AllocationChangeApplicationIndicator',
       'AllocationCommunityWaterSupplySystem', 'AllocationCropDutyAmount',
       'AllocationExpirationDate', 'AllocationLegalStatusCV',
       'AllocationMaximum', 'AllocationOwner', 'AllocationPriorityDate',
       'AllocationSDWISIdentifierCV', 'AllocationTimeframeEnd',
       'AllocationTimeframeStart', 'AllocationTypeCV', 'BeneficialUseCategory',
       'CommunityWaterSupplySystem', 'CropTypeCV', 'CustomerTypeCV',
       'DataPublicationDate', 'DataPublicationDOI', 'GeneratedPowerCapacityMW',
       'IrrigatedAcreage', 'IrrigationMethodCV', 'LegacyAllocationIDs',
       'PopulationServed', 'PowerType', 'PrimaryUseCategory',
       'WaterA