In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse
from utilityFunctions import *

In [2]:
# working directory
working_dir = "./ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "OSE_Points_of_Diversion.csv" 
# water sources look up
inp_wtrsrs="watersources.csv"
# sites look up
inp_sitdim = 'sites.csv'

#output: water allocation
out_alloc = "waterallocations.csv"    #output

In [4]:
######## WaDE columns

#the followwing fields have difference between the table here (edited by DPL) and that on the schema website
#http://schema.westernstateswater.org/tables/Input_AllocationAmounts_fact.html
"""
BeneficialUseCategory, PrimaryUseCategory, AllocationTimeframeStart, AllocationTimeframeEnd, " "
BeneficialUseCategoryCV, PrimaryUseCategoryCV, TimeframeStartDate,	TimeframeEndDate,	Geometry	
"""
# UUIDs: Add UUIDs for all dim tables
# OrganizationUUID, SiteUUID, VariableSpecificUUID, WaterSourceUUID, MethodUUID
columns = ["OrganizationUUID", "SiteUUID", "VariableSpecificUUID", "WaterSourceUUID", "MethodUUID", "PrimaryUseCategory",
           "BeneficialUseCategory", "AllocationNativeID", "AllocationTypeCV", "AllocationOwner",
           "AllocationApplicationDate", "AllocationPriorityDate", "AllocationLegalStatusCV", "AllocationCropDutyAmount",
           "AllocationExpirationDate",
           "AllocationChangeApplicationIndicator", "LegacyAllocationIDs", "AllocationBasisCV", "AllocationTimeframeStart",
           "AllocationTimeframeEnd", "AllocationAmount", "AllocationMaximum", "PopulationServed", "PowerGeneratedGWh",
           "IrrigatedAcreage", "AllocationCommunityWaterSupplySystem", "AllocationSDWISIdentifierCV",
           "AllocationAssociatedWithdrawalSiteIDs", "AllocationAssociatedConsumptiveUseSiteIDs", "WaterAllocationNativeURL",
           "CustomerTypeCV", "IrrigationMethodCV", "CropTypeCV", "CommunityWaterSupplySystem", "DataPublicationDate",
           "DataPublicationDOI"]

dtypesx = [''] #here we could theoretically specify data types for each column name, but we didn't need to do that

In [5]:
### target dataFrame

# TODO: assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")

# Read Inputs and merge tables
# ToDO: We are joining 'on-left': keep all rows of mater table (check if need to be refined)

# ground water
df100 = pd.read_csv(fileInput1,encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(df100.index))

#df100 = df100.head(10000) #only runs first 100 lines for testing.

#df100 = df100.replace('', np.nan)
df100.head(5)

# water sources look up
df400 = pd.read_csv(inp_wtrsrs,encoding = "ISO-8859-1")
#drop duplicate rows ---this one is not necessary once the water sources table is refined to remove duplicates
df400 = df400.drop_duplicates(subset=['WaterSourceName'])
#df400

# sites look up
df500 = pd.read_csv(inp_sitdim,encoding = "ISO-8859-1")

Reading inputs...


  interactivity=interactivity, compiler=compiler, result=result)


234660


  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
# use only unique water rights based on permit number 
print("Dropping duplicates...")

df100.drop_duplicates(subset = ['pod_nbr'], inplace=True)   #
df100 = df100.reset_index(drop=True)

print (len(df100.index))

Dropping duplicates...
83915


In [8]:
print("Adding SiteUUID...")

df100 = df100.assign(SiteUUID='')  #add new column and make is nan

#Permit Number
df100['SiteUUID'] = df100.apply(lambda row: assignSiteID(row['pod_nbr'], df500), axis=1)

df100

Adding SiteUUID...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,zip,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteUUID
0,1,1,B,928,,,,11N,10W,22,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66..."
1,2,2,B,691,,,,10N,10W,03,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618..."
2,3,6,B,1077,,,,12N,12W,06,...,88240,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68..."
3,4,7,B,735,,,,13N,08W,23,...,87050,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43..."
4,5,8,B,1094,,,,09N,12W,14,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521..."
5,6,9,B,1322,,,,14N,11W,19,...,87045,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67..."
6,7,10,B,1250,,,,12N,12W,06,...,87120,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774..."
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,87503,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM..."
8,9,12,B,681,,,,12N,08W,36,...,87021,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43..."
9,10,13,B,1290,,,,11N,10W,16,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842..."


In [9]:
print("Water sources...")
df100 = df100.assign(WaterSourceUUID='')

df100 = df100.replace(np.nan, '')

df100['pod_name'] = df100.apply(lambda row: 'Unspecificed' if str(row['pod_name']).strip() == ''
                                                       else str(row['pod_name']).strip(), axis=1)
#df100.loc[df100['pod_name'] == '', 'pod_name'] = 'Unspecificed'
#df100['pod_name']

df100['WaterSourceUUID'] = df100.apply(lambda row: assignWaterSourceID(row['pod_name'], df400),
                                       axis=1)

df100

Water sources...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteUUID,WaterSourceUUID
0,1,1,B,928,,,Unspecificed,11N,10W,22,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66...",NM_1
1,2,2,B,691,,,Unspecificed,10N,10W,03,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618...",NM_1
2,3,6,B,1077,,,Unspecificed,12N,12W,06,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68...",NM_1
3,4,7,B,735,,,Unspecificed,13N,08W,23,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43...",NM_1
4,5,8,B,1094,,,Unspecificed,09N,12W,14,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521...",NM_1
5,6,9,B,1322,,,Unspecificed,14N,11W,19,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67...",NM_1
6,7,10,B,1250,,,Unspecificed,12N,12W,06,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774...",NM_1
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM...",NM_2
8,9,12,B,681,,,Unspecificed,12N,08W,36,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43...",NM_1
9,10,13,B,1290,,,Unspecificed,11N,10W,16,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842...",NM_1


In [10]:
print("Beneficial uses...")
#

df100 = df100.assign(BeneficialUseCategory='')

df100['BeneficialUseCategory'] = df100.apply(lambda row: assignBenUseCategoryNM(row['use']),
                                             axis=1)

df100

Beneficial uses...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteUUID,WaterSourceUUID,BeneficialUseCategory
0,1,1,B,928,,,Unspecificed,11N,10W,22,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66...",NM_1,72-12-1 domestic one household
1,2,2,B,691,,,Unspecificed,10N,10W,03,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618...",NM_1,72-12-1 domestic one household
2,3,6,B,1077,,,Unspecificed,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68...",NM_1,72-12-1 domestic one household
3,4,7,B,735,,,Unspecificed,13N,08W,23,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43...",NM_1,72-12-1 domestic one household
4,5,8,B,1094,,,Unspecificed,09N,12W,14,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521...",NM_1,72-12-1 domestic one household
5,6,9,B,1322,,,Unspecificed,14N,11W,19,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67...",NM_1,72-12-1 domestic one household
6,7,10,B,1250,,,Unspecificed,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774...",NM_1,72-12-1 domestic one household
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM...",NM_2,72-12-1 domestic one household
8,9,12,B,681,,,Unspecificed,12N,08W,36,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43...",NM_1,72-12-1 domestic one household
9,10,13,B,1290,,,Unspecificed,11N,10W,16,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842...",NM_1,72-12-1 domestic one household


In [11]:
print("Allocation Legal Status...")

df100 = df100.assign(AllocationLegalStatusCV='')
df100['AllocationLegalStatusCV'] = df100.apply(lambda row: 
                                               assignallocLegalStatausCVNM(row['status']),
                                               axis=1)
df100

Allocation Legal Status...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationLegalStatusCV
0,1,1,B,928,,,Unspecificed,11N,10W,22,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66...",NM_1,72-12-1 domestic one household,Permit
1,2,2,B,691,,,Unspecificed,10N,10W,03,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618...",NM_1,72-12-1 domestic one household,Permit
2,3,6,B,1077,,,Unspecificed,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68...",NM_1,72-12-1 domestic one household,Expired
3,4,7,B,735,,,Unspecificed,13N,08W,23,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43...",NM_1,72-12-1 domestic one household,Permit
4,5,8,B,1094,,,Unspecificed,09N,12W,14,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521...",NM_1,72-12-1 domestic one household,Permit
5,6,9,B,1322,,,Unspecificed,14N,11W,19,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67...",NM_1,72-12-1 domestic one household,Expired
6,7,10,B,1250,,,Unspecificed,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774...",NM_1,72-12-1 domestic one household,Permit
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM...",NM_2,72-12-1 domestic one household,Permit
8,9,12,B,681,,,Unspecificed,12N,08W,36,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43...",NM_1,72-12-1 domestic one household,Permit
9,10,13,B,1290,,,Unspecificed,11N,10W,16,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842...",NM_1,72-12-1 domestic one household,Permit


In [12]:
print("AllocationOwner...")

df100 = df100.assign(AllocationOwner='')
df100['AllocationOwner'] = df100.apply(lambda row: 
                                       assignownerName(row['own_lname'], row['own_fname']),
                                       axis=1)

df100

AllocationOwner...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,in_state,podlocdate,loc_error,wr_count,replaced,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationLegalStatusCV,AllocationOwner
0,1,1,B,928,,,Unspecificed,11N,10W,22,...,1,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66...",NM_1,72-12-1 domestic one household,Permit,"GALLEGOS, BENNIE G"
1,2,2,B,691,,,Unspecificed,10N,10W,03,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618...",NM_1,72-12-1 domestic one household,Permit,"JICHA, RON"
2,3,6,B,1077,,,Unspecificed,12N,12W,06,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68...",NM_1,72-12-1 domestic one household,Expired,"SHERFICK, GEORGE"
3,4,7,B,735,,,Unspecificed,13N,08W,23,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43...",NM_1,72-12-1 domestic one household,Permit,"CANDELARIA, EUCARIO"
4,5,8,B,1094,,,Unspecificed,09N,12W,14,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521...",NM_1,72-12-1 domestic one household,Permit,"VANCE, WILLIAM A."
5,6,9,B,1322,,,Unspecificed,14N,11W,19,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67...",NM_1,72-12-1 domestic one household,Expired,"NORRIS, PAMELA J."
6,7,10,B,1250,,,Unspecificed,12N,12W,06,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774...",NM_1,72-12-1 domestic one household,Permit,"HARRISON, JOHN F."
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM...",NM_2,72-12-1 domestic one household,Permit,NEW MEXICO E.I.A.
8,9,12,B,681,,,Unspecificed,12N,08W,36,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43...",NM_1,72-12-1 domestic one household,Permit,"LACKEY, SAM"
9,10,13,B,1290,,,Unspecificed,11N,10W,16,...,1,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842...",NM_1,72-12-1 domestic one household,Permit,"LYNN, SAMMY D."


In [13]:
print("Allocation priority date...")

df100 = df100.assign(AllocationPriorityDate='')

df100['AllocationPriorityDate'] = df100.apply(lambda row: 
                                        formatDateString(row['finish_dat']), axis=1)
df100

Allocation priority date...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,podlocdate,loc_error,wr_count,replaced,SiteUUID,WaterSourceUUID,BeneficialUseCategory,AllocationLegalStatusCV,AllocationOwner,AllocationPriorityDate
0,1,1,B,928,,,Unspecificed,11N,10W,22,...,2019-10-01T00:00:00.000Z,0,1,1,"NM_1,NM_34117,NM_43965,NM_48461,NM_64672,NM_66...",NM_1,72-12-1 domestic one household,Permit,"GALLEGOS, BENNIE G",
1,2,2,B,691,,,Unspecificed,10N,10W,03,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_2,NM_851,NM_21756,NM_29682,NM_35403,NM_3618...",NM_1,72-12-1 domestic one household,Permit,"JICHA, RON",06/19/1979
2,3,6,B,1077,,,Unspecificed,12N,12W,06,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_3,NM_38407,NM_47436,NM_51079,NM_65205,NM_68...",NM_1,72-12-1 domestic one household,Expired,"SHERFICK, GEORGE",
3,4,7,B,735,,,Unspecificed,13N,08W,23,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_4,NM_11155,NM_29875,NM_35466,NM_43151,NM_43...",NM_1,72-12-1 domestic one household,Permit,"CANDELARIA, EUCARIO",07/03/1980
4,5,8,B,1094,,,Unspecificed,09N,12W,14,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_5,NM_4468,NM_13073,NM_47438,NM_49146,NM_521...",NM_1,72-12-1 domestic one household,Permit,"VANCE, WILLIAM A.",09/30/1985
5,6,9,B,1322,,,Unspecificed,14N,11W,19,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_6,NM_26419,NM_29687,NM_47141,NM_56346,NM_67...",NM_1,72-12-1 domestic one household,Expired,"NORRIS, PAMELA J.",06/15/2001
6,7,10,B,1250,,,Unspecificed,12N,12W,06,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_7,NM_4488,NM_45081,NM_55438,NM_66537,NM_774...",NM_1,72-12-1 domestic one household,Permit,"HARRISON, JOHN F.",01/06/1993
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_8,NM_468,NM_500,NM_691,NM_35493,NM_45400,NM...",NM_2,72-12-1 domestic one household,Permit,NEW MEXICO E.I.A.,08/30/1977
8,9,12,B,681,,,Unspecificed,12N,08W,36,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_9,NM_17682,NM_35467,NM_38418,NM_42251,NM_43...",NM_1,72-12-1 domestic one household,Permit,"LACKEY, SAM",05/15/1979
9,10,13,B,1290,,,Unspecificed,11N,10W,16,...,2019-10-01T00:00:00.000Z,0,1,0,"NM_10,NM_35,NM_10518,NM_29884,NM_36121,NM_3842...",NM_1,72-12-1 domestic one household,Permit,"LYNN, SAMMY D.",04/11/1995


In [14]:
print("Copying all columns...")
#
destCols=["SiteUUID", "WaterSourceUUID", 
          "AllocationNativeID", "AllocationLegalStatusCV", 
          "BeneficialUseCategory", 
          "AllocationOwner", 
          #"AllocationTypeCV", 
          #"AllocationApplicationDate", 
          "AllocationPriorityDate",
          #"AllocationAmount", 
          "AllocationMaximum", 
          #"IrrigatedAcreage",
          #"AllocationCropDutyAmount", "AllocationExpirationDate", 
          #"AllocationTimeframeStart", "AllocationTimeframeEnd"
         ]
#
sourCols=["SiteUUID", "WaterSourceUUID", 
          "OBJECTID", "AllocationLegalStatusCV",
          "BeneficialUseCategory", 
          "AllocationOwner",
          #"PERMIT_TYPE", 
          #"AllocationApplicationDate",
          "AllocationPriorityDate", 
          #"AllocationAmount",
          "total_div",
          #"Areas_of_Use.SHAPE.AREA",
          #"IRRIGATION_DEPLETION", "DATE_TERMINATED",
          #"USE_BEG_DATE", "USE_END_DATE"
         ]

outdf100[destCols] = df100[sourCols]
#outdf100

Copying all columns...


In [15]:
# hard coded
print("Hard coded...")
#hard coded
outdf100.OrganizationUUID = "NMOSE"
outdf100.VariableSpecificUUID = "NMOSE Allocation All"
outdf100.MethodUUID = "NM_WaterAllocation"
outdf100.AllocationBasisCV = "Unknown"
# check this later
outdf100.PrimaryUseCategory = "Irrigation"
outdf100.TimeframeStart = "01/01"
outdf100.TimeframeEnd = "12/31"
#
outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')    #"10/31/2019" # edit this to the code run date

#outdf100

Hard coded...


In [16]:
print("Droping null allocations...")
# if both Allocation amount and Allocation maximum are empty drop row and save it to a Allocations_missing.csv
#outdf100 = outdf100.replace('', np.nan) #replace blank strings by NaN,
outdf100purge = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('waterallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100["AllocationAmount"] == '') & (outdf100["AllocationMaximum"] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

Droping null allocations...


  result = method(y)


In [17]:
print("Droping null SiteUUIDs...")
outdf100nullID = outdf100.loc[outdf100["SiteUUID"] == '']
if len(outdf100nullID.index) > 0:
    dropIndex = outdf100.loc[outdf100["SiteUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

Droping null SiteUUIDs...


In [18]:
print("Droping null Priority date...")
outdf100nullPR = outdf100.loc[outdf100["AllocationPriorityDate"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["AllocationPriorityDate"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

Droping null Priority date...


In [19]:
print("Droping null WaterSourceUUID ...")
outdf100nullPR = outdf100.loc[outdf100["WaterSourceUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["WaterSourceUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

Droping null WaterSourceUUID ...


In [20]:
print("Droping duplicates...")
#drop duplicate rows; just make sure
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("waterallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

Droping duplicates...


In [21]:
print("Checking required is not null...")
# check if any cell of these columns is null
requiredCols = ["OrganizationUUID", "VariableSpecificUUID", "WaterSourceUUID", 
                "MethodUUID", "AllocationPriorityDate"] #SiteUUID
# outdf100_nullMand = outdf100.loc[outdf100.isnull().any(axis=1)] --for all cols
# outdf100_nullMand = outdf100.loc[outdf100[requiredCols].isnull().any(axis=1)]
#(outdf100["SiteUUID"].isnull()) |
outdf100_nullMand = outdf100.loc[(outdf100["OrganizationUUID"] == '') |
                                (outdf100["VariableSpecificUUID"] == '') |
                                (outdf100["WaterSourceUUID"] == '') |
                                (outdf100["MethodUUID"] == '') |
                                (outdf100["AllocationPriorityDate"] == '')]
#outdf100_nullMand = outdf100.loc[[False | (outdf100[varName].isnull()) for varName in requiredCols]]
if(len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('waterallocations_mandatoryFieldMissing.csv')  # index=False,
#ToDO: purge these cells if there is any missing? #For now left to be inspected
#outdf100_nullMand

Checking required is not null...


In [22]:
print("Writing outputs...")
#write out
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Writing outputs...
Done Water Allocation


In [3]:
##### Do not run the following with the rest of the code  (it is for inspection)
print("Long site ids...")

#output: water allocation
in_alloc = "waterallocations_long.csv"    #output
# ground water
outdf100 = pd.read_csv(in_alloc, encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(outdf100.index))

outdf100Long = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)]
if len(outdf100Long.index) > 0:
    print("There are rows with too long siteids")
    outdf100Long.to_csv("waterallocations_longsiteid.csv")  # index=False,
    dropIndex = outdf100[outdf100['SiteUUID'].apply(lambda x: len(x) > 500)].index
    outdf100 = outdf100.drop(dropIndex)   #
    outdf100 = outdf100.reset_index(drop=True)
#outdf100

outdf100Long

print("Writing outputs...")
#write out
#output: water allocation
out_alloc = "waterallocations.csv"    #output
outdf100.to_csv(out_alloc, index=False, encoding = "utf-8")

print("Done Water Allocation")

Long site ids...
60926
There are rows with too long siteids
Writing outputs...
Done Water Allocation
