In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse

In [2]:
# working directory
working_dir = "./ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "wr_v_pod_public_xy.csv" 
# water sources look up
inp_wtrsrs="watersources.csv"
# sites look up
inp_sitdim = 'sites.csv'

#output: water allocation
out_alloc = "waterallocations.csv"    #output

In [4]:
######## WaDE columns

#the followwing fields have difference between the table here (edited by DPL) and that on the schema website
#http://schema.westernstateswater.org/tables/Input_AllocationAmounts_fact.html
"""
BeneficialUseCategory, PrimaryUseCategory, AllocationTimeframeStart, AllocationTimeframeEnd, " "
BeneficialUseCategoryCV, PrimaryUseCategoryCV, TimeframeStartDate,	TimeframeEndDate,	Geometry	
"""
# UUIDs: Add UUIDs for all dim tables
# OrganizationUUID, SiteUUID, VariableSpecificUUID, WaterSourceUUID, MethodUUID
columns = ["OrganizationUUID", "SiteUUID", "VariableSpecificUUID", "WaterSourceUUID", "MethodUUID", "PrimaryUseCategory",
           "BeneficialUseCategory", "AllocationNativeID", "AllocationTypeCV", "AllocationOwner",
           "AllocationApplicationDate", "AllocationPriorityDate", "AllocationLegalStatusCV", "AllocationCropDutyAmount",
           "AllocationExpirationDate",
           "AllocationChangeApplicationIndicator", "LegacyAllocationIDs", "AllocationBasisCV", "AllocationTimeframeStart",
           "AllocationTimeframeEnd", "AllocationAmount", "AllocationMaximum", "PopulationServed", "PowerGeneratedGWh",
           "IrrigatedAcreage", "AllocationCommunityWaterSupplySystem", "AllocationSDWISIdentifierCV",
           "AllocationAssociatedWithdrawalSiteIDs", "AllocationAssociatedConsumptiveUseSiteIDs", "WaterAllocationNativeURL",
           "CustomerTypeCV", "IrrigationMethodCV", "CropTypeCV", "CommunityWaterSupplySystem", "DataPublicationDate",
           "DataPublicationDOI"]

dtypesx = [''] #here we could theoretically specify data types for each column name, but we didn't need to do that

In [5]:
### target dataFrame

# TODO: assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")

# 
df100 = pd.read_csv(fileInput1, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
#print (len(df100.index))

#df100


# sites look up
df500 = pd.read_csv(inp_sitdim, encoding = "ISO-8859-1")

# water sources look up
df400 = pd.read_csv(inp_wtrsrs, encoding = "ISO-8859-1")
#drop duplicate rows 
#---this one is not necessary once the water sources table is refined to remove duplicates
df400 = df400.drop_duplicates(subset=['WaterSourceName', 'WaterSourceTypeCV'])
df400

Reading inputs...


  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,WaterSourceUUID,WaterSourceNativeID,WaterSourceName,WaterSourceTypeCV,WaterQualityIndicatorCV,GNISFeatureNameCV,Geometry
0,OR_1,1,FORMOSA 1 ADIT ...,groundwater,Fresh,,
1,OR_2,2,SILVER BUTTE 1 ADIT ...,groundwater,Fresh,,
2,OR_3,3,A WELL ...,groundwater,Fresh,,
3,OR_4,4,A WELL,groundwater,Fresh,,
4,OR_5,5,WELL 1 ...,groundwater,Fresh,,
5,OR_6,6,WELL 2 ...,groundwater,Fresh,,
6,OR_7,7,WELL 3 ...,groundwater,Fresh,,
7,OR_8,8,WELL 4 ...,groundwater,Fresh,,
8,OR_9,9,EAST FORK GRANDE RONDE RIVER ...,surface water,Fresh,,
9,OR_10,10,LITTLE MEADOW CREEK ...,surface water,Fresh,,


In [7]:
# use only unique water rights that may have multiple sites/pds
print("Dropping duplicates...")

print (len(df100.index))

df100.drop_duplicates(subset = ['permit_nbr'], inplace=True)   #
df100 = df100.reset_index(drop=True)

print (len(df100.index))

df100

Dropping duplicates...
184377
46334


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,1.0,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,1.0,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,1.0,1.0,12.0,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,1.0,1.0,12.0,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,1.0,1.0,12.0,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1


In [8]:
print("Adding SiteUUID...")

def assignSiteID(colrowValue, df500):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        sitl = df500.loc[df500['SiteNativeID'] == colrowValue, 'SiteUUID']
        #print(sitl)
        #print(sitl.empty)
        if not(sitl.empty):            # check if the series is empty
            outList = ', '.join(str(inx) for inx in sitl) #sil.iloc[0]
        else:
            outList = ''
    return outList

df100 = df100.assign(SiteUUID='')  #add new column and make is nan

#Permit Number
df100['SiteUUID'] = df100.apply(lambda row: assignSiteID(row['snp_id'], df500), axis=1)
#pod_location_id
df100

Adding SiteUUID...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2"
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,1.0,12.0,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,1.0,12.0,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,OR_5
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9"
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,1.0,12.0,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,1.0,12.0,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,1.0,12.0,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,1.0,12.0,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15"


In [9]:
print("Water sources...")

WRSourceTypeCVDictOR = {
    "ST":"storage",
    "SW":"surface water",
    "GW":"groundwater"
}

# water source name is "unspecified" when not known
def assignWaterSourceID2(colrowValue11, colrowValue22, df400):
    colrowValue1 = str(colrowValue11).strip()
    colrowValue2 = str(colrowValue22).strip()
    if ((colrowValue1 == '') | (pd.isnull(colrowValue1))) & ((colrowValue2 == '') | (pd.isnull(colrowValue2))):
        outList = 'Unspecificed'
    else:
        ml = df400.loc[(df400['WaterSourceName'] == colrowValue1) 
                       & (df400['WaterSourceTypeCV'] == WRSourceTypeCVDictOR[colrowValue2]),
                       'WaterSourceUUID']
        #print(ml)
        #print(ml.empty)
        if not(ml.empty):            # check if the series is empty
            outList = ml.iloc[0]   # watersourceSer.append(ml.iloc[0])
        else:
            outList = 'Unspecificed'
    return outList

df100 = df100.assign(WaterSourceUUID='')

df100 = df100.replace(np.nan, '')

df100['WaterSourceUUID'] = df100.apply(lambda row: 
                        assignWaterSourceID2(row['source'], row['wr_type'], df400), axis=1)

df100

Water sources...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,12.0,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,12.0,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,OR_5,OR_4
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,12.0,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10,OR_4
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,12.0,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,12.0,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12,OR_4
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,12.0,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13,OR_4
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed


In [10]:
print("AllocationTypeCV dictionary and function...")

claimCharDictOR = {
    "GR":"groundwater registrations",
    "PC":"power claim",
    "SW":"surface water registrations",
    "KL":"Klamath Adjudication claim",
    "KA":"Klamath Adjudication"
}

permitCharDictOR = {
    "E":"enlargement",
    "R":"reservoir",
    "G":"groundwater",
    "U":"underground",
    "S":"surface",
    "AL":"aquifer storage & recovery limited license"
}

def assignAllocTypeCVOR(colrowValue11, colrowValue22):
    colrowValue1 = str(colrowValue11).strip()
    colrowValue2 = str(colrowValue22).strip()
    if ((colrowValue1 == '') | (pd.isnull(colrowValue1))) & ((colrowValue2 == '') | (pd.isnull(colrowValue2))):
        outList = ''
    elif ((colrowValue1 == '') | (pd.isnull(colrowValue1))): 
        keyStr = colrowValue2.strip()  # remove whitespace chars
        #try:
        outList = permitCharDictOR[keyStr] 
        #except:
        #    outList = ''
    else:
        keyStr = colrowValue1.strip()  # remove whitespace chars
        #try:
        outList = claimCharDictOR[keyStr] 
        #except:
        #    outList = ''
        
    return outList


AllocationTypeCV dictionary and function...


In [11]:
print("AllocationTypeCV...")

df100 = df100.assign(AllocationTypeCV='')

df100 = df100.replace(np.nan, '')

df100['AllocationTypeCV'] = df100.apply(lambda row: 
                        assignAllocTypeCVOR(row['claim_char'], row['permit_char']), axis=1)

df100

AllocationTypeCV...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID,AllocationTypeCV
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed,groundwater
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4,groundwater
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4,groundwater
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,OR_5,OR_4,groundwater
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445,groundwater
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10,OR_4,groundwater
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4,groundwater
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12,OR_4,groundwater
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13,OR_4,groundwater
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed,surface


In [12]:
print("AllocationOwner functions...")

def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = ", ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


def assignownerNameORCompany(colrowValue1, colrowValue2, colrowValue3):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList = assignownerName(colrowValue2, colrowValue3)
    else:
        outList = colrowValue1
    return outList

AllocationOwner functions...


In [13]:
print("AllocationOwner...")

df100 = df100.assign(AllocationOwner='')
df100['AllocationOwner'] = df100.apply(lambda row: 
                                       assignownerNameORCompany(row['name_company'], 
                                            row['name_last'], row['name_first']), axis=1)
df100

AllocationOwner...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed,groundwater,FORMOSA EXPLORATION INC.
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4,groundwater,DUTCH MINING LLC
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4,groundwater,KNIFE RIVER CORP.
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,OR_5,OR_4,groundwater,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445,groundwater,"CASPER, KENNETH"
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10,OR_4,groundwater,PARKIN FAMILY LLC
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4,groundwater,THE TOTTEN GROUP INC.
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12,OR_4,groundwater,"HOUSTON, HOWARD"
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13,OR_4,groundwater,MUNSEN PAVING LLC
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed,surface,"CLEMENT, ZELVA"


In [14]:
print("Allocation priority date...")

# input format 1989-11-21T00:00:00.000
def formatDateString(inString):
    #print(inString)
    try:
        if inString == '' or pd.isnull(inString):
            valndf = ''
        else:
            valD = datetime.strptime(inString, '%Y-%m-%dT00:00:00.000')
            #print(valD)
            valnDd = valD.date()
            #print(valnDd)
            valndf = valnDd.strftime('%m/%d/%Y')
            #print('date:', valndf)
    except:
        valndf = ''

    return valndf

df100 = df100.assign(AllocationPriorityDate='')

df100['AllocationPriorityDate'] = df100.apply(lambda row: 
                                        formatDateString(row['priority_date']), axis=1)

df100

Allocation priority date...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed,groundwater,FORMOSA EXPLORATION INC.,11/21/1989
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4,groundwater,DUTCH MINING LLC,05/06/1994
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4,groundwater,KNIFE RIVER CORP.,01/18/1995
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,OR_5,OR_4,groundwater,,09/03/1992
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445,groundwater,"CASPER, KENNETH",12/21/1995
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10,OR_4,groundwater,PARKIN FAMILY LLC,11/14/1997
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4,groundwater,THE TOTTEN GROUP INC.,03/19/1998
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12,OR_4,groundwater,"HOUSTON, HOWARD",01/07/2000
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13,OR_4,groundwater,MUNSEN PAVING LLC,01/09/2001
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed,surface,"CLEMENT, ZELVA",08/13/1934


In [15]:
# 
def formatDateString(inString1, inString2):
    #print(inString)
    try:
        if inString1 == '' | pd.isnull(inString1) | inString2 == '' | pd.isnull(inString2):
            valndf = ''
        else:
            valD = datetime.strptime(inString, '%Y-%m-%dT00:00:00.000')
            #print(valD)
            valnDd = valD.date()
            #print(valnDd)
            valndf = valnDd.strftime('%m/%d/%Y')
            #print('date:', valndf)
    except:
        valndf = ''

    return valndf

In [16]:
print("Timeframe start...")

df100 = df100.assign(TimeframeStart='')

df100['TimeframeStart'] = df100.apply(lambda row: 
                      str(int(row['begin_month'])).strip() + '/' + str(int(row['begin_day'])).strip(),
                                axis=1)
df100

Timeframe start...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,rec_creation_date,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate,TimeframeStart
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed,groundwater,FORMOSA EXPLORATION INC.,11/21/1989,1/1
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4,groundwater,DUTCH MINING LLC,05/06/1994,1/1
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4,groundwater,KNIFE RIVER CORP.,01/18/1995,1/1
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,OR_5,OR_4,groundwater,,09/03/1992,1/1
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445,groundwater,"CASPER, KENNETH",12/21/1995,1/1
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,OR_10,OR_4,groundwater,PARKIN FAMILY LLC,11/14/1997,1/1
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4,groundwater,THE TOTTEN GROUP INC.,03/19/1998,1/1
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,OR_12,OR_4,groundwater,"HOUSTON, HOWARD",01/07/2000,1/1
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,OR_13,OR_4,groundwater,MUNSEN PAVING LLC,01/09/2001,1/1
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed,surface,"CLEMENT, ZELVA",08/13/1934,1/1


In [17]:
print("Timeframe end...")

df100 = df100.assign(TimeframeEnd='')

df100['TimeframeEnd'] = df100.apply(lambda row: 
                        str(int(row['end_month'])).strip() + '/' + str(int(row['end_day'])).strip(),
                                axis=1)
df100

Timeframe end...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,last_updt_date,feature_quality_code,remarks,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate,TimeframeStart,TimeframeEnd
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1996-06-01T00:00:00.000,,0 G 10961 1,"OR_1, OR_2",Unspecificed,groundwater,FORMOSA EXPLORATION INC.,11/21/1989,1/1,12/31
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,2001-06-01T00:00:00.000,,0 G 12684 1,OR_3,OR_4,groundwater,DUTCH MINING LLC,05/06/1994,1/1,12/31
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,2001-05-01T00:00:00.000,,0 G 12750 1,OR_4,OR_4,groundwater,KNIFE RIVER CORP.,01/18/1995,1/1,12/31
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,,10,Automapped as center of the envelope for the T...,OR_5,OR_4,groundwater,,09/03/1992,1/1,12/31
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,,10,Automapped as center of the envelope for the T...,"OR_6, OR_7, OR_8, OR_9",OR_445,groundwater,"CASPER, KENNETH",12/21/1995,1/1,12/31
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,2003-10-17T00:00:00.000,,,OR_10,OR_4,groundwater,PARKIN FAMILY LLC,11/14/1997,1/1,12/31
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,2002-04-01T00:00:00.000,,0 G 13584 1,OR_11,OR_4,groundwater,THE TOTTEN GROUP INC.,03/19/1998,1/1,12/31
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,2003-08-28T00:00:00.000,,,OR_12,OR_4,groundwater,"HOUSTON, HOWARD",01/07/2000,1/1,12/31
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,2003-07-21T00:00:00.000,,,OR_13,OR_4,groundwater,MUNSEN PAVING LLC,01/09/2001,1/1,12/31
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1996-06-01T00:00:00.000,,0 S 11398 1,"OR_14, OR_15",Unspecificed,surface,"CLEMENT, ZELVA",08/13/1934,1/1,12/31


In [18]:
print("Copying all columns...")
#
destCols=["SiteUUID", "WaterSourceUUID", 
          "AllocationNativeID", 
          #"AllocationLegalStatusCV", 
          "BeneficialUseCategory", 
          "AllocationOwner", 
          "AllocationTypeCV", 
          #"AllocationApplicationDate", 
          "AllocationPriorityDate",
          "AllocationAmount", 
          "AllocationMaximum", 
          #"IrrigatedAcreage",
          #"AllocationCropDutyAmount", "AllocationExpirationDate", 
          "TimeframeStart", "TimeframeEnd",
          "WaterAllocationNativeURL"
         ]
#
srsCols=["SiteUUID", "WaterSourceUUID", 
          "permit_nbr", 
         #"WaRecProcessStatusTypeCode",
          "use_code_description", 
          "AllocationOwner",
          "AllocationTypeCV", 
          #"AllocationApplicationDate",
          "AllocationPriorityDate", 
          "rate_cfs",
          "max_rate_acre_feet",
          #"IrrigatedAreaQuantity",
          #"IRRIGATION_DEPLETION", "DATE_TERMINATED",
          "TimeframeStart", "TimeframeEnd",
          "wris_link"
         ]

outdf100[destCols] = df100[srsCols]

outdf100

Copying all columns...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,,"OR_1, OR_2",,Unspecificed,,,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
1,,OR_3,,OR_4,,,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
2,,OR_4,,OR_4,,,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
3,,OR_5,,OR_4,,,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
4,,"OR_6, OR_7, OR_8, OR_9",,OR_445,,,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
5,,OR_10,,OR_4,,,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
6,,OR_11,,OR_4,,,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
7,,OR_12,,OR_4,,,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
8,,OR_13,,OR_4,,,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31
9,,"OR_14, OR_15",,Unspecificed,,,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,,1/1,12/31


In [19]:
# hard coded
print("Hard coded...")
#hard coded
outdf100.OrganizationUUID = "OWRD"
outdf100.VariableSpecificUUID = "OWRD Allocation all"
outdf100.MethodUUID = "OWRD-Water Rights"
outdf100.AllocationBasisCV = "Unknown"
# check this later
outdf100.PrimaryUseCategory = "Irrigation"
#
#outdf100.TimeframeStart = "01/01"
#outdf100.TimeframeEnd = "12/31"

#
outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')    #"10/31/2019" # edit this to the code run date

outdf100

Hard coded...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [20]:
print("Droping null allocations...")
# if both Allocation amount and Allocation maximum are empty drop row and save it to a Allocations_missing.csv
outdf100 = outdf100.replace(np.nan, '') #replace blank strings by NaN,
outdf100purge = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('waterallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100purge

Droping null allocations...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [21]:
print("Droping null SiteUUIDs...")
outdf100nullID = outdf100.loc[outdf100["SiteUUID"] == '']
if len(outdf100nullID.index) > 0:
    dropIndex = outdf100.loc[outdf100["SiteUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping null SiteUUIDs...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [22]:
print("Droping null Priority date...")
outdf100nullPR = outdf100.loc[outdf100["AllocationPriorityDate"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["AllocationPriorityDate"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100nullPR

Droping null Priority date...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [23]:
print("Droping null WaterSourceUUID ...")
outdf100nullPR = outdf100.loc[outdf100["WaterSourceUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["WaterSourceUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
outdf100

Droping null WaterSourceUUID ...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [24]:
print("Droping duplicates...")
#drop duplicate rows; just make sure
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("waterallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping duplicates...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI,TimeframeStart,TimeframeEnd
0,OWRD,"OR_1, OR_2",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,10961,groundwater,FORMOSA EXPLORATION INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
1,OWRD,OR_3,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12684,groundwater,DUTCH MINING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
2,OWRD,OR_4,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12750,groundwater,KNIFE RIVER CORP.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
3,OWRD,OR_5,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,12779,groundwater,,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
4,OWRD,"OR_6, OR_7, OR_8, OR_9",OWRD Allocation all,OR_445,OWRD-Water Rights,Irrigation,MINING,13450,groundwater,"CASPER, KENNETH",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
5,OWRD,OR_10,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13525,groundwater,PARKIN FAMILY LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
6,OWRD,OR_11,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,13584,groundwater,THE TOTTEN GROUP INC.,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
7,OWRD,OR_12,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,14006,groundwater,"HOUSTON, HOWARD",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
8,OWRD,OR_13,OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,15090,groundwater,MUNSEN PAVING LLC,...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31
9,OWRD,"OR_14, OR_15",OWRD Allocation all,Unspecificed,OWRD-Water Rights,Irrigation,MINING,11398,surface,"CLEMENT, ZELVA",...,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,12/27/2019,,1/1,12/31


In [25]:
print("Checking required is not null...")
# check if any cell of these columns is null
requiredCols = ["OrganizationUUID", "VariableSpecificUUID", "WaterSourceUUID", 
                "MethodUUID", "AllocationPriorityDate"] #SiteUUID
# outdf100_nullMand = outdf100.loc[outdf100.isnull().any(axis=1)] --for all cols
# outdf100_nullMand = outdf100.loc[outdf100[requiredCols].isnull().any(axis=1)]
#(outdf100["SiteUUID"].isnull()) |
outdf100_nullMand = outdf100.loc[(outdf100["OrganizationUUID"] == '') |
                                (outdf100["VariableSpecificUUID"] == '') |
                                (outdf100["WaterSourceUUID"] == '') |
                                (outdf100["MethodUUID"] == '') |
                                (outdf100["AllocationPriorityDate"] == '')]
#outdf100_nullMand = outdf100.loc[[False | (outdf100[varName].isnull()) for varName in requiredCols]]
if(len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('waterallocations_mandatoryFieldMissing.csv')  # index=False,
#ToDO: purge these cells if there is any missing? #For now left to be inspected
#outdf100_nullMand

Checking required is not null...


In [26]:
print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Writing outputs...
Done Water Allocation


### Do not run the following with the rest of the code  (it is for inspection)

In [None]:
##### Do not run the following with the rest of the code  (it is for inspection)
print("Long site ids...")

#output: water allocation
in_alloc = "waterallocations_long.csv"    #output
# ground water
outdf100 = pd.read_csv(in_alloc, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(outdf100.index))

outdf100Long = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)]
if len(outdf100Long.index) > 0:
    print("There are rows with too long siteids")
    outdf100Long.to_csv("waterallocations_longsiteid.csv")  # index=False,
    dropIndex = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)].index
    outdf100 = outdf100.drop(dropIndex)   #
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

outdf100Long

print("Writing outputs...")
#write out
#output: water allocation
out_alloc = "waterallocations.csv"    #output
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")