In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse

In [2]:
# working directory
working_dir = "./ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "wr_v_pod_public_xy.csv" 
# water sources look up
inp_wtrsrs="watersources.csv"
# sites look up
inp_sitdim = 'sites.csv'

#output: water allocation
out_alloc = "waterallocations.csv"    #output

In [4]:
######## WaDE columns

#the followwing fields have difference between the table here (edited by DPL) and that on the schema website
#http://schema.westernstateswater.org/tables/Input_AllocationAmounts_fact.html
"""
BeneficialUseCategory, PrimaryUseCategory, AllocationTimeframeStart, AllocationTimeframeEnd, " "
BeneficialUseCategoryCV, PrimaryUseCategoryCV, TimeframeStartDate,	TimeframeEndDate,	Geometry	
"""
# UUIDs: Add UUIDs for all dim tables
# OrganizationUUID, SiteUUID, VariableSpecificUUID, WaterSourceUUID, MethodUUID
columns = ["OrganizationUUID", "SiteUUID", "VariableSpecificUUID", "WaterSourceUUID", "MethodUUID", "PrimaryUseCategory",
           "BeneficialUseCategory", "AllocationNativeID", "AllocationTypeCV", "AllocationOwner",
           "AllocationApplicationDate", "AllocationPriorityDate", "AllocationLegalStatusCV", "AllocationCropDutyAmount",
           "AllocationExpirationDate",
           "AllocationChangeApplicationIndicator", "LegacyAllocationIDs", "AllocationBasisCV", "AllocationTimeframeStart",
           "AllocationTimeframeEnd", "AllocationAmount", "AllocationMaximum", "PopulationServed", "PowerType", "GeneratedPowerCapacityMW",
           "IrrigatedAcreage", "AllocationCommunityWaterSupplySystem", "AllocationSDWISIdentifierCV",
           "AllocationAssociatedWithdrawalSiteIDs", "AllocationAssociatedConsumptiveUseSiteIDs", "WaterAllocationNativeURL",
           "CustomerTypeCV", "IrrigationMethodCV", "CropTypeCV", "CommunityWaterSupplySystem", "DataPublicationDate",
           "DataPublicationDOI"]

dtypesx = [''] #here we could theoretically specify data types for each column name, but we didn't need to do that

In [5]:
### target dataFrame

# TODO: assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")

# 
df100 = pd.read_csv(fileInput1, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
#print (len(df100.index))

# water sources look up
df400 = pd.read_csv(inp_wtrsrs, encoding = "ISO-8859-1")

#df100

Reading inputs...


  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
print("Read Sites file and map pod location to snp id")

# sites look up
df500 = pd.read_csv(inp_sitdim, encoding = "ISO-8859-1")

# the sites uuid is mapped as siteuuid-->sitenativeid(pod_location)--->snp_id(allocationativeid)
def assignNativeAlloc(colrowValue, df100):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        sitl = df100.loc[df100['pod_location_id'] == colrowValue, 'snp_id']
        if not(sitl.empty):            # check if the series is empty
            outList = sitl.iloc[0]
        else:
            outList = ''
    return outList

df500 = df500.assign(snp_id='')  #add new column and make is nan
#pod_location_id
df500['snp_id'] = df500.apply(lambda row: assignNativeAlloc(row['SiteNativeID'], df100), axis=1)

df500

Read Sites file and map pod location to snp id


Unnamed: 0,SiteUUID,SiteNativeID,SiteName,USGSSiteID,SiteTypeCV,Longitude,Latitude,SitePoint,SiteNativeURL,Geometry,...,NHDNetworkStatusCV,NHDProductCV,NHDUpdateDate,NHDReachCode,NHDMeasureNumber,StateCV,HUC8,HUC12,County,snp_id
0,OR_6909,6909,Unspecified,,well,-123.382877,42.855813,,,,...,,,,,,OR,,,,21755
1,OR_6910,6910,Unspecified,,well,-123.383487,42.854551,,,,...,,,,,,OR,,,,21755
2,OR_9355,9355,Unspecified,,well,-123.629420,42.682269,,,,...,,,,,,OR,,,,23327
3,OR_9480,9480,Unspecified,,well,-121.654631,44.301041,,,,...,,,,,,OR,,,,23390
4,OR_9515,9515,Unspecified,,well,-117.488914,44.467820,,,,...,,,,,,OR,,,,23418
5,OR_10663,10663,Unspecified,,well,-117.597891,44.406309,,,,...,,,,,,OR,,,,24080
6,OR_10664,10664,Unspecified,,well,-117.592731,44.395407,,,,...,,,,,,OR,,,,24080
7,OR_10665,10665,Unspecified,,well,-117.617453,44.301031,,,,...,,,,,,OR,,,,24080
8,OR_10666,10666,Unspecified,,well,-117.609214,44.370739,,,,...,,,,,,OR,,,,24080
9,OR_10776,10776,Unspecified,,well,-123.193846,45.550864,,,,...,,,,,,OR,,,,24155


In [8]:
# print("Read water sources file and map sources to snpid...")

# WRSourceTypeCVDictOR_Inv = {
#     "storage":"ST",
#     "surface water":"SW",
#     "groundwater":"GW"
# }

# def assignNativeAllocToSrc(colrowValue1, colrowValue2, df100):
# #     if (colrowValue1 == '') | (colrowValue2 == ''):
# #         outList = ''
# #     else:
#     sitl = df100.loc[(df100['source'] == colrowValue1), 
#                    #& (df100['wr_type'] == WRSourceTypeCVDictOR_Inv[colrowValue2]),
#                    'snp_id']
#     if not(sitl.empty):            # check if the series is empty
#         outList = sitl.iloc[0]   
#     else:
#         outList = ''
#     return outList

# # water sources look up
# df400 = pd.read_csv(inp_wtrsrs, encoding = "ISO-8859-1")
# #
# df400 = df400.assign(snp_id='')  #add new column and make is nan
# #
# df100 = df100.replace(np.nan, '')

# df400 = df400.replace(np.nan, '')

# df400['snp_id'] = df400.apply(lambda row: 
#                         assignNativeAllocToSrc(row['WaterSourceName'], row['WaterSourceTypeCV'], 
#                                                df100), axis=1)
# df400

In [9]:
print("Aggregate amounts for a water right...")

# there may be multiple PODs for each water right, and in this case each pod has allocation amount
# we aggregate these
tarCols = ["snp_id", "rate_cfs", "max_rate_acre_feet"]
df200=pd.DataFrame(columns = tarCols)
df200[tarCols] = df100[tarCols]
# sort=False keeps the order of rows (important to make sure they are same as d100 below)
df300 = df200.groupby("snp_id", sort=False).apply(pd.DataFrame.sum, skipna=False) #agg(np.sum, skipna=False)
#df300 = df200.groupby("snp_id", sort=False).agg({"rate_cfs": np.sum(skipna=False)})

df300R = df300[["rate_cfs", "max_rate_acre_feet"]]
df300R = df300R.reset_index(drop=True)
df300R

Aggregate amounts for a water right...


Unnamed: 0,rate_cfs,max_rate_acre_feet
0,0.0450,
1,0.2200,
2,0.7800,
3,0.5570,
4,1.0000,
5,0.1000,
6,0.5570,
7,0.2229,
8,0.4460,
9,4.0000,


In [10]:
# use only unique water rights that may have multiple sites/pds
print("Dropping duplicate native ids...")

print (len(df100.index))

df100.drop_duplicates(subset = ['snp_id'], inplace=True)   #
df100 = df100.reset_index(drop=True)

print (len(df100.index))

df100

Dropping duplicate native ids...
184377
85607


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,begin_month,begin_day,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,1.0,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,1.0,1.0,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,1.0,1.0,12.0,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,1.0,1.0,12.0,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,1.0,1.0,12.0,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1.0,1.0,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1


In [11]:
print("copying aggregated rages...")

df100[["rate_cfs_agg", "max_rate_acre_feet_agg"]] = df300R[["rate_cfs", "max_rate_acre_feet"]]
#df100["rate_cfs"] = df300R["rate_cfs"]
#df100["max_rate_acre_feet"] = df300R["max_rate_acre_feet"]
df100

copying aggregated rages...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,end_month,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,0.0450,
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,12.0,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,0.2200,
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,12.0,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,0.7800,
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,0.5570,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,12.0,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,1.0000,
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,12.0,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,0.1000,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,12.0,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,0.5570,
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,12.0,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,0.2229,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,12.0,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,0.4460,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,12.0,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,4.0000,


In [12]:
print("Adding SiteUUID...")

def assignSiteID(colrowValue, df500):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        sitl = df500.loc[df500['snp_id'] == colrowValue, 'SiteUUID']
        if not(sitl.empty):            # check if the series is empty
            outList = ', '.join(str(inx) for inx in sitl) #sil.iloc[0]
        else:
            outList = ''
    return outList

df100 = df100.assign(SiteUUID='')  #add new column and make is nan
#Permit Number
df100['SiteUUID'] = df100.apply(lambda row: assignSiteID(row['snp_id'], df500), axis=1)
#pod_location_id
df100

Adding SiteUUID...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,end_day,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,0.0450,,"OR_6909, OR_6910"
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,31.0,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,0.2200,,OR_9355
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,31.0,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,0.7800,,OR_9480
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,0.5570,,OR_9515
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,31.0,KLS,OWRD,2007-10-26T12:42:33.000,,10.0,Automapped as center of the envelope for the T...,1.0000,,"OR_10663, OR_10664, OR_10665, OR_10666"
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,31.0,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,0.1000,,OR_10776
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,31.0,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,0.5570,,OR_10862
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,31.0,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,0.2229,,OR_11558
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,31.0,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,0.4460,,OR_12037
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,31.0,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,4.0000,,"OR_35776, OR_35777"


In [13]:
# print("Water sources IDs...")

# def assignWaterSourceID3(colrowValue, df400):
#     if colrowValue == '' or pd.isnull(colrowValue):
#             outList = ''
#         else:
#             sitl = df400.loc[df400['snp_id'] == colrowValue, 'WaterSourceUUID']
#             if not(sitl.empty):            # check if the series is empty
#                 outList = ', '.join(str(inx) for inx in sitl) #sil.iloc[0]
#             else:
#                 outList = ''
#         return outList

# df100 = df100.assign(WaterSourceUUID='')

# df100['WaterSourceUUID'] = df100.apply(lambda row: 
#                         assignWaterSourceID2(row['snp_id'], df400), axis=1)

# df100

In [14]:
print("Water source IDs...")

WRSourceTypeCVDictOR = {
    "ST":"storage",
    "SW":"surface water",
    "GW":"groundwater"
}

def assignWaterSourceID2(colrowValue11, colrowValue22, df400):
    colrowValue1 = str(colrowValue11).strip()
    colrowValue2 = str(colrowValue22).strip()
    if colrowValue1 == '' or colrowValue2 == '':
        outList = ''
    else:
        sitl = df400.loc[(df400['WaterSourceName'] == colrowValue1) 
                       & (df400['WaterSourceTypeCV'] == WRSourceTypeCVDictOR[colrowValue2]),
                       'WaterSourceUUID']
        #print(sitl)
        #print(sitl.empty)
        if not(sitl.empty):            # check if the series is empty
            outList = sitl.iloc[0]   # ', '.join(str(inx) for inx in sitl) #
            # watersourceSer.append(ml.iloc[0])
        else:
            outList = ''
    return outList

df100 = df100.assign(WaterSourceUUID='')

df100 = df100.replace(np.nan, '')

df100['WaterSourceUUID'] = df100.apply(lambda row: 
                        assignWaterSourceID2(row['source'], row['wr_type'], df400), axis=1)

df100

Water source IDs...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,technician_initials,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,0.045,,"OR_6909, OR_6910",OR_1
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,MIGRT,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,0.22,,OR_9355,OR_3
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,MIGRT,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,0.78,,OR_9480,OR_3
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,0.557,,OR_9515,OR_3
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,KLS,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,RL,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,0.1,,OR_10776,OR_3
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,MIGRT,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,0.557,,OR_10862,OR_3
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,DA,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,0.2229,,OR_11558,OR_3
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,SM,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,0.446,,OR_12037,OR_3
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,MIGRT,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,4,,"OR_35776, OR_35777",OR_8


In [15]:
print("AllocationTypeCV...")

claimCharDictOR = {
    "GR":"groundwater registrations",
    "PC":"power claim",
    "SW":"surface water registrations",
    "KL":"Klamath Adjudication claim",
    "KA":"Klamath Adjudication"
}

df100 = df100.assign(AllocationTypeCV='')

df100 = df100.replace(np.nan, '')

df100['AllocationTypeCV'] = df100.apply(lambda row: '' if str(row['claim_char']) == ''
                                        else claimCharDictOR[str(row['claim_char']).strip()],
                                        axis=1)

df100

AllocationTypeCV...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,agency,rec_creation_date,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID,AllocationTypeCV
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,0.045,,"OR_6909, OR_6910",OR_1,
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,OWRD,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,0.22,,OR_9355,OR_3,
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,OWRD,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,0.78,,OR_9480,OR_3,
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,0.557,,OR_9515,OR_3,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,OWRD,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4,
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,ESU,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,0.1,,OR_10776,OR_3,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,OWRD,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,0.557,,OR_10862,OR_3,
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,ESU,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,0.2229,,OR_11558,OR_3,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,ESU,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,0.446,,OR_12037,OR_3,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,OWRD,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,4,,"OR_35776, OR_35777",OR_8,


In [None]:
# outdf100 = outdf100.replace(np.nan, '')

# print("Writing outputs...")
#         #write out
# #output: water allocation
# outdf100 = df100[["snp_id",	"pod_location_id",	"app_char",	"permit_char",
#                  "source_type",	"use_code_description",	"SiteUUID", "source", "wr_type","WaterSourceUUID",
#                  "AllocationTypeCV", "claim_char"]]
# out_alloc = "waterallocations_interm.csv"    #output
# outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

# print("Done Water Allocation")


In [16]:
print("AllocationOwner functions...")

def assignownerName(colrowValue1, colrowValue2):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList1 = ''
    else:
        outList1 = colrowValue1.strip()  # remove whitespace chars
    if colrowValue2 == '' or pd.isnull(colrowValue2):
        outList2 = ''
    else:
        outList2 = colrowValue2.strip()  # remove whitespace chars

    if outList1 == '' and outList2 == '':
        outList = ''
    elif outList1 == '':
        outList = outList2
    elif outList2 == '':
        outList = outList1
    else:
        outList = ", ".join(map(str, [colrowValue1, colrowValue2]))
    return outList


def assignownerNameORCompany(colrowValue1, colrowValue2, colrowValue3):
    if colrowValue1 == '' or pd.isnull(colrowValue1):
        outList = assignownerName(colrowValue2, colrowValue3)
    else:
        outList = colrowValue1
    return outList

AllocationOwner functions...


In [17]:
print("AllocationOwner...")

df100 = df100.assign(AllocationOwner='')
df100['AllocationOwner'] = df100.apply(lambda row: assignownerNameORCompany(row['name_company'], 
                                                                            row['name_last'], 
                                                                            row['name_first']), axis=1)
df100

AllocationOwner...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,rec_creation_date,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 G 10961 1,0.045,,"OR_6909, OR_6910",OR_1,,FORMOSA EXPLORATION INC.
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,2001-06-01T00:00:00.000,2001-06-01T00:00:00.000,,0 G 12684 1,0.22,,OR_9355,OR_3,,DUTCH MINING LLC
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,2001-05-01T00:00:00.000,2001-05-01T00:00:00.000,,0 G 12750 1,0.78,,OR_9480,OR_3,,KNIFE RIVER CORP.
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,0.557,,OR_9515,OR_3,,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,2007-10-26T12:42:33.000,,10,Automapped as center of the envelope for the T...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4,,"CASPER, KENNETH"
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,2003-10-17T00:00:00.000,2003-10-17T00:00:00.000,,,0.1,,OR_10776,OR_3,,PARKIN FAMILY LLC
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,2002-04-01T00:00:00.000,2002-04-01T00:00:00.000,,0 G 13584 1,0.557,,OR_10862,OR_3,,THE TOTTEN GROUP INC.
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,2003-08-28T00:00:00.000,2003-08-28T00:00:00.000,,,0.2229,,OR_11558,OR_3,,"HOUSTON, HOWARD"
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,2003-07-21T00:00:00.000,2003-07-21T00:00:00.000,,,0.446,,OR_12037,OR_3,,MUNSEN PAVING LLC
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1996-06-01T00:00:00.000,1996-06-01T00:00:00.000,,0 S 11398 1,4,,"OR_35776, OR_35777",OR_8,,"CLEMENT, ZELVA"


In [18]:
print("Allocation priority date...")

# input format 1989-11-21T00:00:00.000
def formatDateString(inString):
    #print(inString)
    try:
        if inString == '' or pd.isnull(inString):
            valndf = ''
        else:
            valD = datetime.strptime(inString, '%Y-%m-%dT00:00:00.000')
            #print(valD)
            valnDd = valD.date()
            #print(valnDd)
            valndf = valnDd.strftime('%m/%d/%Y')
            #print('date:', valndf)
    except:
        valndf = ''

    return valndf

df100 = df100.assign(AllocationPriorityDate='')

df100['AllocationPriorityDate'] = df100.apply(lambda row: 
                                        formatDateString(row['priority_date']), axis=1)

df100

Allocation priority date...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,last_updt_date,feature_quality_code,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,1996-06-01T00:00:00.000,,0 G 10961 1,0.045,,"OR_6909, OR_6910",OR_1,,FORMOSA EXPLORATION INC.,11/21/1989
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,2001-06-01T00:00:00.000,,0 G 12684 1,0.22,,OR_9355,OR_3,,DUTCH MINING LLC,05/06/1994
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,2001-05-01T00:00:00.000,,0 G 12750 1,0.78,,OR_9480,OR_3,,KNIFE RIVER CORP.,01/18/1995
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,,10,Automapped as center of the envelope for the T...,0.557,,OR_9515,OR_3,,,09/03/1992
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,,10,Automapped as center of the envelope for the T...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4,,"CASPER, KENNETH",12/21/1995
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,2003-10-17T00:00:00.000,,,0.1,,OR_10776,OR_3,,PARKIN FAMILY LLC,11/14/1997
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,2002-04-01T00:00:00.000,,0 G 13584 1,0.557,,OR_10862,OR_3,,THE TOTTEN GROUP INC.,03/19/1998
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,2003-08-28T00:00:00.000,,,0.2229,,OR_11558,OR_3,,"HOUSTON, HOWARD",01/07/2000
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,2003-07-21T00:00:00.000,,,0.446,,OR_12037,OR_3,,MUNSEN PAVING LLC,01/09/2001
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,1996-06-01T00:00:00.000,,0 S 11398 1,4,,"OR_35776, OR_35777",OR_8,,"CLEMENT, ZELVA",08/13/1934


In [19]:
# 
def formatDateString(inString1, inString2):
    #print(inString)
    try:
        valndf = str(int(inString156de)).strip() + '/' + str(int(inString2)).strip()
    except:
        valndf = ''

    return valndf;

In [20]:
print("Timeframe start...")

df100 = df100.assign(AllocationTimeframeStart='')

df100['AllocationTimeframeStart'] = df100.apply(lambda row: 
                                        formatDateString(row['begin_month'], row['begin_day']),
                                        axis=1)
df100

Timeframe start...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,remarks,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate,TimeframeStart,AllocationTimeframeStart
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,0 G 10961 1,0.045,,"OR_6909, OR_6910",OR_1,,FORMOSA EXPLORATION INC.,11/21/1989,,
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,0 G 12684 1,0.22,,OR_9355,OR_3,,DUTCH MINING LLC,05/06/1994,,
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,0 G 12750 1,0.78,,OR_9480,OR_3,,KNIFE RIVER CORP.,01/18/1995,,
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,Automapped as center of the envelope for the T...,0.557,,OR_9515,OR_3,,,09/03/1992,,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,Automapped as center of the envelope for the T...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4,,"CASPER, KENNETH",12/21/1995,,
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,,0.1,,OR_10776,OR_3,,PARKIN FAMILY LLC,11/14/1997,,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,0 G 13584 1,0.557,,OR_10862,OR_3,,THE TOTTEN GROUP INC.,03/19/1998,,
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,,0.2229,,OR_11558,OR_3,,"HOUSTON, HOWARD",01/07/2000,,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,,0.446,,OR_12037,OR_3,,MUNSEN PAVING LLC,01/09/2001,,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,0 S 11398 1,4,,"OR_35776, OR_35777",OR_8,,"CLEMENT, ZELVA",08/13/1934,,


In [21]:
print("Timeframe end...")

df100 = df100.assign(AllocationTimeframeEnd='')

df100['AllocationTimeframeEnd'] = df100.apply(lambda row: 
                                            formatDateString(row['end_month'], row['end_day']),
                                            axis=1)
df100

Timeframe end...


Unnamed: 0,X,Y,OBJECTID,pod_display,pod_display_short,wris_link,snp_id,pod_location_id,pod_use_id,app_char,...,rate_cfs_agg,max_rate_acre_feet_agg,SiteUUID,WaterSourceUUID,AllocationTypeCV,AllocationOwner,AllocationPriorityDate,TimeframeStart,AllocationTimeframeStart,AllocationTimeframeEnd
0,5.394125e+05,4.167058e+05,1,Permit: G 10961 * MI,G 10961,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,21755,6909,26859,G,...,0.045,,"OR_6909, OR_6910",OR_1,,FORMOSA EXPLORATION INC.,11/21/1989,,,
1,4.709328e+05,3.559155e+05,3,Permit: G 12684 * MI,G 12684,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23327,9355,29682,G,...,0.22,,OR_9355,OR_3,,DUTCH MINING LLC,05/06/1994,,,
2,1.010124e+06,9.319935e+05,4,Permit: G 12750 * MI,G 12750,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23390,9480,29835,G,...,0.78,,OR_9480,OR_3,,KNIFE RIVER CORP.,01/18/1995,,,
3,2.098075e+06,1.005065e+06,6,Permit: G 12779 * MI,G 12779,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,23418,9515,29873,G,...,0.557,,OR_9515,OR_3,,,09/03/1992,,,
4,2.070442e+06,9.816352e+05,7,Permit: G 13450 * MI,G 13450,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24080,10663,31263,G,...,1,,"OR_10663, OR_10664, OR_10665, OR_10666",OR_4,,"CASPER, KENNETH",12/21/1995,,,
5,6.223012e+05,1.396810e+06,11,Permit: G 13525 * MI,G 13525,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24155,10776,31422,G,...,0.1,,OR_10776,OR_3,,PARKIN FAMILY LLC,11/14/1997,,,
6,9.970726e+05,1.441323e+05,12,Permit: G 13584 * MI,G 13584,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24213,10862,31532,G,...,0.557,,OR_10862,OR_3,,THE TOTTEN GROUP INC.,03/19/1998,,,
7,1.056970e+06,1.444679e+06,13,Permit: G 14006 * MI,G 14006,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24627,11558,32335,G,...,0.2229,,OR_11558,OR_3,,"HOUSTON, HOWARD",01/07/2000,,,
8,1.130385e+06,1.419008e+06,14,Permit: G 15090 * MI,G 15090,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,24805,12037,32736,G,...,0.446,,OR_12037,OR_3,,MUNSEN PAVING LLC,01/09/2001,,,
9,1.900659e+06,1.217460e+06,15,Permit: S 11398 * MI,S 11398,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,38924,35776,42650,S,...,4,,"OR_35776, OR_35777",OR_8,,"CLEMENT, ZELVA",08/13/1934,,,


In [22]:
print("Copying all columns...")
#
destCols=["SiteUUID", "WaterSourceUUID", 
          "AllocationNativeID", 
          #"AllocationLegalStatusCV", 
          "BeneficialUseCategory", 
          "AllocationOwner", 
          "AllocationTypeCV", 
          #"AllocationApplicationDate", 
          "AllocationPriorityDate",
          "AllocationAmount", 
          "AllocationMaximum", 
          #"IrrigatedAcreage",
          #"AllocationCropDutyAmount", "AllocationExpirationDate", 
          "AllocationTimeframeStart", "AllocationTimeframeEnd",
          "WaterAllocationNativeURL"
         ]
#
srsCols=["SiteUUID", "WaterSourceUUID", 
          "snp_id", 
         #"WaRecProcessStatusTypeCode",
          "use_code_description", 
          "AllocationOwner",
          "AllocationTypeCV", 
          #"AllocationApplicationDate",
          "AllocationPriorityDate", 
          "rate_cfs_agg",
          "max_rate_acre_feet_agg",
          #"IrrigatedAreaQuantity",
          #"IRRIGATION_DEPLETION", "DATE_TERMINATED",
          "AllocationTimeframeStart", "AllocationTimeframeEnd",
          "wris_link"
         ]

outdf100[destCols] = df100[srsCols]

outdf100

Copying all columns...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,,"OR_6909, OR_6910",,OR_1,,,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
1,,OR_9355,,OR_3,,,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
2,,OR_9480,,OR_3,,,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
3,,OR_9515,,OR_3,,,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
4,,"OR_10663, OR_10664, OR_10665, OR_10666",,OR_4,,,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
5,,OR_10776,,OR_3,,,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
6,,OR_10862,,OR_3,,,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
7,,OR_11558,,OR_3,,,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
8,,OR_12037,,OR_3,,,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,
9,,"OR_35776, OR_35777",,OR_8,,,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,,


In [23]:
# hard coded
print("Hard coded...")
#hard coded
outdf100.OrganizationUUID = "OWRD"
outdf100.VariableSpecificUUID = "OWRD Allocation all"
outdf100.MethodUUID = "OWRD-Water Rights"
outdf100.AllocationBasisCV = "Unknown"
# check this later
outdf100.PrimaryUseCategory = "Irrigation"
#
#outdf100.AllocationTimeframeStart = "01/01"
#outdf100.AllocationTimeframeEnd = "12/31"

#
outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')    #"10/31/2019" # edit this to the code run date

outdf100

Hard coded...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [24]:
print("Droping null allocations...")
# if both Allocation amount and Allocation maximum are empty drop row and save it to a Allocations_missing.csv
outdf100 = outdf100.replace(np.nan, '') #replace blank strings by NaN,
outdf100purge = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('waterallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100purge

Droping null allocations...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [25]:
print("Droping null SiteUUIDs...")
outdf100nullID = outdf100.loc[outdf100["SiteUUID"] == '']
if len(outdf100nullID.index) > 0:
    dropIndex = outdf100.loc[outdf100["SiteUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping null SiteUUIDs...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [26]:
print("Droping null Priority date...")
outdf100nullPR = outdf100.loc[outdf100["AllocationPriorityDate"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["AllocationPriorityDate"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

outdf100
#outdf100nullPR

Droping null Priority date...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [27]:
print("Droping null WaterSourceUUID ...")
outdf100nullPR = outdf100.loc[outdf100["WaterSourceUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["WaterSourceUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
outdf100

Droping null WaterSourceUUID ...


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [28]:
print("Droping duplicates...")

print (len(outdf100.index))
#drop duplicate rows; just make sure
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("waterallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

print (len(outdf100.index))

outdf100

Droping duplicates...
84912
84912


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
0,OWRD,"OR_6909, OR_6910",OWRD Allocation all,OR_1,OWRD-Water Rights,Irrigation,MINING,21755,,FORMOSA EXPLORATION INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
1,OWRD,OR_9355,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23327,,DUTCH MINING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2,OWRD,OR_9480,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23390,,KNIFE RIVER CORP.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
3,OWRD,OR_9515,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,23418,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
4,OWRD,"OR_10663, OR_10664, OR_10665, OR_10666",OWRD Allocation all,OR_4,OWRD-Water Rights,Irrigation,MINING,24080,,"CASPER, KENNETH",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
5,OWRD,OR_10776,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24155,,PARKIN FAMILY LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
6,OWRD,OR_10862,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24213,,THE TOTTEN GROUP INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7,OWRD,OR_11558,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24627,,"HOUSTON, HOWARD",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
8,OWRD,OR_12037,OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,MINING,24805,,MUNSEN PAVING LLC,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
9,OWRD,"OR_35776, OR_35777",OWRD Allocation all,OR_8,OWRD-Water Rights,Irrigation,MINING,38924,,"CLEMENT, ZELVA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [29]:
print("Checking required is not null...")
# check if any cell of these columns is null
requiredCols = ["OrganizationUUID", "VariableSpecificUUID", "WaterSourceUUID", 
                "MethodUUID", "AllocationPriorityDate"] #SiteUUID
# outdf100_nullMand = outdf100.loc[outdf100.isnull().any(axis=1)] --for all cols
# outdf100_nullMand = outdf100.loc[outdf100[requiredCols].isnull().any(axis=1)]
#(outdf100["SiteUUID"].isnull()) |
outdf100_nullMand = outdf100.loc[(outdf100["OrganizationUUID"] == '') |
                                (outdf100["VariableSpecificUUID"] == '') |
                                (outdf100["WaterSourceUUID"] == '') |
                                (outdf100["MethodUUID"] == '') |
                                (outdf100["AllocationPriorityDate"] == '')]
#outdf100_nullMand = outdf100.loc[[False | (outdf100[varName].isnull()) for varName in requiredCols]]
if(len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('waterallocations_mandatoryFieldMissing.csv')  # index=False,
#ToDO: purge these cells if there is any missing? #For now left to be inspected
#outdf100_nullMand

Checking required is not null...


In [30]:
print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Writing outputs...
Done Water Allocation


### Do not run the following with the rest of the code. It is for checking if the SiteUUID is too long (if it is too long then drop those wthe length > 250 chars long)

In [34]:
##### Do not run the following with the rest of the code  (it is for inspection)
print("Inspect long site ids...")

outdf100Long = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 250)]
if len(outdf100Long.index) > 0:
    print("There are rows with too long siteids")
    outdf100Long.to_csv("waterallocations_longsiteid.csv")  # index=False,
#outdf100

outdf100Long

Inspect long site ids...
There are rows with too long siteids


Unnamed: 0,OrganizationUUID,SiteUUID,VariableSpecificUUID,WaterSourceUUID,MethodUUID,PrimaryUseCategory,BeneficialUseCategory,AllocationNativeID,AllocationTypeCV,AllocationOwner,...,AllocationSDWISIdentifierCV,AllocationAssociatedWithdrawalSiteIDs,AllocationAssociatedConsumptiveUseSiteIDs,WaterAllocationNativeURL,CustomerTypeCV,IrrigationMethodCV,CropTypeCV,CommunityWaterSupplySystem,DataPublicationDate,DataPublicationDOI
1826,OWRD,"OR_270181, OR_270182, OR_270183, OR_270184, OR...",OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,NURSERY USES,168998,,"COOK, CLIVE",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
2998,OWRD,"OR_47451, OR_47452, OR_47453, OR_47454, OR_474...",OWRD Allocation all,OR_579,OWRD-Water Rights,Irrigation,GROUP DOMESTIC,51925,,HAPPY ROCK COLLECTIVE,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
7987,OWRD,"OR_97173, OR_97174, OR_97175, OR_97176, OR_971...",OWRD Allocation all,OR_123,OWRD-Water Rights,Irrigation,DOMESTIC EXPANDED,92674,,"KERBER, LONELLA",...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
13022,OWRD,"OR_193490, OR_193491, OR_193492, OR_193493, OR...",OWRD Allocation all,OR_1817,OWRD-Water Rights,Irrigation,DOMESTIC,145835,,CITY OF DUNES CITY,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
13259,OWRD,"OR_210650, OR_210651, OR_210652, OR_210653, OR...",OWRD Allocation all,OR_3,OWRD-Water Rights,Irrigation,GROUP DOMESTIC,150600,,AVION WATER CO. INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
14057,OWRD,"OR_288173, OR_288174, OR_288175, OR_288176, OR...",OWRD Allocation all,OR_907,OWRD-Water Rights,Irrigation,HUMAN CONSUMPTION,175710,,LAKE OF THE WOODS RECREATION ASSOCIATION; U.S....,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
14754,OWRD,"OR_329829, OR_329830, OR_329831, OR_329832, OR...",OWRD Allocation all,OR_627,OWRD-Water Rights,Irrigation,DOMESTIC EXPANDED,196296,,LOOKINGGLASS OLALLA WATER CONTROL DISTRICT,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
19485,OWRD,"OR_38708, OR_38709, OR_38710, OR_38711, OR_387...",OWRD Allocation all,OR_46,OWRD-Water Rights,Irrigation,SUPPLEMENTAL IRRIGATION,46679,,,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
19492,OWRD,"OR_39211, OR_39212, OR_39213, OR_39214, OR_392...",OWRD Allocation all,OR_3089,OWRD-Water Rights,Irrigation,PRIMARY AND SUPPLEMENTAL IRRIGATION,46997,,CIRCLE FIVE RANCH INC.,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,
19679,OWRD,"OR_44142, OR_44143, OR_44144, OR_44145, OR_441...",OWRD Allocation all,OR_28,OWRD-Water Rights,Irrigation,PRIMARY AND SUPPLEMENTAL IRRIGATION,50063,,COUNTY OF DOUGLAS; PUBLIC WORKS,...,,,,http://apps.wrd.state.or.us/apps/wr/wrinfo/wr_...,,,,,01/12/2020,


In [35]:
##### Do not run the following with the rest of the code  (it is for inspection)
print("Drop long site ids...")

print (len(outdf100.index))
if len(outdf100Long.index) > 0:
    dropIndex = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 250)].index
    outdf100 = outdf100.drop(dropIndex)   #
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

print(len(outdf100.index))

print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Drop long site ids...
84912
84878
Writing outputs...
Done Water Allocation
