In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
import beneficialUseDictionary
from utilityFunctions import *

In [2]:
# working directory
working_dir = "./ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "OSE_Points_of_Diversion.csv "

# output water sources
out_put = "watersources.csv"

In [4]:
##### WaDE columns

columns=['WaterSourceUUID', 'WaterSourceNativeID',	'WaterSourceName', 'WaterSourceTypeCV',
         'WaterQualityIndicatorCV',	'GNISFeatureNameCV', 'Geometry']

dtypesx = ['BigInt	NVarChar(250)	NVarChar(250)	NVarChar(250)	NVarChar(100)	NVarChar(100)',
           'NVarChar(250)	Geometry']

In [5]:
### target dataFrame

# TODO: assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")


df100 = pd.read_csv(fileInput1,encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"

print (len(df100.index))

#df100 = df100.head(10000) #only runs first 100 lines for testing.

#df100 = df100.replace('', np.nan)
df100.head(5)

Reading inputs...


  interactivity=interactivity, compiler=compiler, result=result)


234660


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,state,zip,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced
0,1,1,B,928,,,,11N,10W,22,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1
1,2,2,B,691,,,,10N,10W,3,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
2,3,6,B,1077,,,,12N,12W,6,...,NM,88240,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
3,4,7,B,735,,,,13N,08W,23,...,NM,87050,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
4,5,8,B,1094,,,,09N,12W,14,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0


In [None]:
list(df100.columns)

In [7]:
print ("Ground and Surface Water source types")

df100.assign(GroundWaterSourceTypeCV='')
df100.assign(SurfaceWaterSourceTypeCV='')

df100["GroundWaterSourceTypeCV"] = df100.apply(lambda row: 
                                         assignGroundWaterSourceTypeCV(row["grnd_wtr_s"]), axis=1)
df100["SurfaceWaterSourceTypeCV"] = df100.apply(lambda row: 'Surface Water' if int(row["surface_co"]) > 0
                                                             else '', axis=1)

df100

Ground and Surface Water source types


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,GroundWaterSourceTypeCV,SurfaceWaterSourceTypeCV
0,1,1,B,928,,,,11N,10W,22,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Groundwater/Shallow,
1,2,2,B,691,,,,10N,10W,03,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
2,3,6,B,1077,,,,12N,12W,06,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,,
3,4,7,B,735,,,,13N,08W,23,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
4,5,8,B,1094,,,,09N,12W,14,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
5,6,9,B,1322,,,,14N,11W,19,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Dry,
6,7,10,B,1250,,,,12N,12W,06,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
8,9,12,B,681,,,,12N,08W,36,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,
9,10,13,B,1290,,,,11N,10W,16,...,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,


In [8]:
print ("Water source type")

df100.assign(WaterSourceTypeCV='')

waterSourceList = []
for index, row in df100.iterrows():
    #print(index)
    groundWatetrStr = row['GroundWaterSourceTypeCV']
    surfaceWatetrStr = row['SurfaceWaterSourceTypeCV']
    if (groundWatetrStr != '') & (surfaceWatetrStr != ''):
        waterSourceList.append(",".join([groundWatetrStr, surfaceWatetrStr]))
    elif groundWatetrStr != '':
        waterSourceList.append(groundWatetrStr)
    elif surfaceWatetrStr != '':
        waterSourceList.append(surfaceWatetrStr)
    else:
        waterSourceList.append("Unknown")

df100['WaterSourceTypeCV'] = waterSourceList


df100

Water source type


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,GroundWaterSourceTypeCV,SurfaceWaterSourceTypeCV,WaterSourceTypeCV
0,1,1,B,928,,,,11N,10W,22,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Groundwater/Shallow,,Groundwater/Shallow
1,2,2,B,691,,,,10N,10W,03,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
2,3,6,B,1077,,,,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,,,Unknown
3,4,7,B,735,,,,13N,08W,23,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
4,5,8,B,1094,,,,09N,12W,14,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
5,6,9,B,1322,,,,14N,11W,19,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Dry,,Groundwater/Dry
6,7,10,B,1250,,,,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
8,9,12,B,681,,,,12N,08W,36,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow
9,10,13,B,1290,,,,11N,10W,16,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow


In [9]:
print ("Water source name")

df100.assign(WaterSourceName='')

df100 = df100.replace(np.nan, '')
df100["WaterSourceName"] = df100.apply(lambda row: row["pod_name"] 
                                                   if str(row["pod_name"]).strip() != '' 
                                                   else "Unspecificed" , axis=1)

# if name unspecified type unknown
df100.loc[df100['WaterSourceName'] == "Unspecificed", 'WaterSourceTypeCV'] = 'Unknown'

df100

Water source name


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,GroundWaterSourceTypeCV,SurfaceWaterSourceTypeCV,WaterSourceTypeCV,WaterSourceName
0,1,1,B,928,,,,11N,10W,22,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Groundwater/Shallow,,Unknown,Unspecificed
1,2,2,B,691,,,,10N,10W,03,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed
2,3,6,B,1077,,,,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,,,Unknown,Unspecificed
3,4,7,B,735,,,,13N,08W,23,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed
4,5,8,B,1094,,,,09N,12W,14,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed
5,6,9,B,1322,,,,14N,11W,19,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Dry,,Unknown,Unspecificed
6,7,10,B,1250,,,,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Groundwater/Shallow,OTERO-3
8,9,12,B,681,,,,12N,08W,36,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed
9,10,13,B,1290,,,,11N,10W,16,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Groundwater/Shallow,,Unknown,Unspecificed


In [10]:
print ("Map columns")

destCols=['WaterSourceName','WaterSourceTypeCV']
srsCols=['WaterSourceName', 'WaterSourceTypeCV']

outdf100[destCols] = df100[srsCols]


Map columns


In [11]:
print("Hard coded ...")

outdf100.WaterQualityIndicatorCV = "Fresh"


Hard coded ...


In [12]:
print("Dropping duplicates...")

print(len(outdf100.index))
outdf100 = outdf100.drop_duplicates(subset=['WaterSourceName']) #'WaterSourceTypeCV','WaterQualityIndicatorCV'
outdf100 = outdf100.reset_index(drop=True)
print(len(outdf100.index))

outdf100

Dropping duplicates...
234660
7830


Unnamed: 0,WaterSourceUUID,WaterSourceNativeID,WaterSourceName,WaterSourceTypeCV,WaterQualityIndicatorCV,GNISFeatureNameCV,Geometry
0,,,Unspecificed,Unknown,Fresh,,
1,,,OTERO-3,Groundwater/Shallow,Fresh,,
2,,,WR 7,Groundwater/Shallow,Fresh,,
3,,,WR 10,Groundwater/Shallow,Fresh,,
4,,,MJ,Groundwater/Shallow,Fresh,,
5,,,TRCPC-6,Groundwater/Artesian,Fresh,,
6,,,TRCPC-5,Groundwater/Artesian,Fresh,,
7,,,OTERO-2,Groundwater/Shallow,Fresh,,
8,,,DAB,Groundwater/Shallow,Fresh,,
9,,,WR 5,Groundwater/Shallow,Fresh,,


In [14]:
print ("Water source native id and UUID")

#9.12.19 Adel: For water sources table, how about we do an incremental ID? like 1, 2, 3 etc?
outdf100 = outdf100.reset_index(drop=True)
outdf100['WaterSourceNativeID'] = range(1, len(outdf100.index) + 1)

print("Adding UUID...")
outdf100['WaterSourceUUID'] = outdf100.apply(lambda row: "_".join(["NM", str(row['WaterSourceNativeID'])]), axis=1)

outdf100 = outdf100.replace(np.nan, '')

outdf100

Water source native id and UUID
Adding UUID...


Unnamed: 0,WaterSourceUUID,WaterSourceNativeID,WaterSourceName,WaterSourceTypeCV,WaterQualityIndicatorCV,GNISFeatureNameCV,Geometry
0,NM_1,1,Unspecificed,Unknown,Fresh,,
1,NM_2,2,OTERO-3,Groundwater/Shallow,Fresh,,
2,NM_3,3,WR 7,Groundwater/Shallow,Fresh,,
3,NM_4,4,WR 10,Groundwater/Shallow,Fresh,,
4,NM_5,5,MJ,Groundwater/Shallow,Fresh,,
5,NM_6,6,TRCPC-6,Groundwater/Artesian,Fresh,,
6,NM_7,7,TRCPC-5,Groundwater/Artesian,Fresh,,
7,NM_8,8,OTERO-2,Groundwater/Shallow,Fresh,,
8,NM_9,9,DAB,Groundwater/Shallow,Fresh,,
9,NM_10,10,WR 5,Groundwater/Shallow,Fresh,,


In [15]:
#write out
outdf100.to_csv(out_put, index=False, encoding = "utf-8")

print("Done watersources")

Done watersources
