## Sites_dim
Code to generate sites.csv as input to the WaDE db for OK water rights

In [1]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
from dateutil.parser import parse
from pyproj import CRS, Transformer, Proj
from utilityFunctions import *

In [2]:
# working directory
working_dir = "ProcessedInputData"
os.chdir(working_dir)

In [3]:
# Input files
fileInput1 = "OSE_Points_of_Diversion.csv "

# output sites
out_sitdim = 'sites.csv'

In [4]:
#column names
#10.24.19 rename 'WaDESiteUUID' to 'SiteUUID'
columns=['WaDESiteUUID', 'SiteNativeID', 'SiteName', 'USGSSiteID', 'SiteTypeCV', 'Longitude', 'Latitude',
          'SitePoint', 'SiteNativeURL', 'Geometry', 'CoordinateMethodCV', 'CoordinateAccuracy', 'GNISCodeCV',
          'EPSGCodeCV', 'NHDNetworkStatusCV', 'NHDProductCV', 'NHDUpdateDate', 'NHDReachCode', 'NHDMeasureNumber',
          'StateCV']

# These are not used currently. Data types inferred from the inputs
dtypesx = ['NVarChar(55)	NVarChar(50)	NVarChar(500)	NVarChar(250)	NVarChar(100)	Double	Double	Geometry',
           'NVarChar(250)	Geometry	NVarChar(100)	NVarChar(255)	NVarChar(50)	NVarChar(50)	NVarChar(50)',
           'NVarChar(50)	Date	NVarChar(50)	NVarChar(50)	NChar(5)']

In [5]:
# create target dataframe

#assumes dtypes inferred from CO file
outdf100=pd.DataFrame(columns=columns)

In [6]:
print("Reading inputs...")

# Read Inputs and merge tables

# points of diversion 
df100 = pd.read_csv(fileInput1,encoding = "ISO-8859-1") #, or alternatively encoding = "utf-8"
print (len(df100.index))

#df100 = df100.head(10000) #only runs first 100 lines for testing.

df100.head(5)

Reading inputs...


  interactivity=interactivity, compiler=compiler, result=result)


234660


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,state,zip,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced
0,1,1,B,928,,,,11N,10W,22,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1
1,2,2,B,691,,,,10N,10W,3,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
2,3,6,B,1077,,,,12N,12W,6,...,NM,88240,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
3,4,7,B,735,,,,13N,08W,23,...,NM,87050,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0
4,5,8,B,1094,,,,09N,12W,14,...,NM,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0


In [7]:
list(df100.columns) 

['OBJECTID_1',
 'OBJECTID',
 'pod_basin',
 'pod_nbr',
 'pod_suffix',
 'ref',
 'pod_name',
 'tws',
 'rng',
 'sec',
 'qtr_4th',
 'qtr_16th',
 'qtr_64th',
 'blk',
 'zone',
 'x',
 'y',
 'grant',
 'legal',
 'county',
 'start_date',
 'finish_dat',
 'plug_date',
 'pcw_rcv_da',
 'elevation',
 'depth_well',
 'grnd_wtr_s',
 'percent_sh',
 'depth_wate',
 'log_file_d',
 'sched_date',
 'use_of_wel',
 'pump_type',
 'pump_seria',
 'discharge',
 'aquifer',
 'sys_date',
 'subdiv_nam',
 'subdiv_loc',
 'restrict',
 'lat_deg',
 'lat_min',
 'lat_sec',
 'lon_deg',
 'lon_min',
 'lon_sec',
 'surface_co',
 'estimate_y',
 'pod_status',
 'casing_siz',
 'ditch_name',
 'utm_zone',
 'easting',
 'northing',
 'datum',
 'utm_source',
 'utm_accura',
 'xy_source',
 'xy_accurac',
 'lat_lon_so',
 'lat_lon_ac',
 'tract_nbr',
 'map_nbr',
 'surv_map',
 'other_loc',
 'pod_rec_nb',
 'cfs_start_',
 'cfs_end_md',
 'cfs_cnv_fa',
 'cs_code',
 'wrats_s_id',
 'utm_error',
 'pod_sub_ba',
 'well_tag',
 'static_lev',
 'pod_file',
 'bas

In [8]:
print ("Site names...")

df100.assign(SiteName='')

df100 = df100.replace(np.nan, '')
 # If site name doesn't exist use Not provided
df100["SiteName"] = df100.apply(lambda row: row["ditch_name"] 
                                                   if str(row["ditch_name"]).strip() != '' 
                                                   else "Not Provided" , axis=1)

df100 

Site names...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,zip,contact_ln,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteName
0,1,1,B,928,,,,11N,10W,22,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Not Provided
1,2,2,B,691,,,,10N,10W,03,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
2,3,6,B,1077,,,,12N,12W,06,...,88240,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
3,4,7,B,735,,,,13N,08W,23,...,87050,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
4,5,8,B,1094,,,,09N,12W,14,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
5,6,9,B,1322,,,,14N,11W,19,...,87045,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
6,7,10,B,1250,,,,12N,12W,06,...,87120,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,87503,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
8,9,12,B,681,,,,12N,08W,36,...,87021,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided
9,10,13,B,1290,,,,11N,10W,16,...,87020,,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided


In [9]:
print ("Surface and Ground water SiteTypeCV...")

df100.assign(GroundWaterSiteTypeCV='')
df100.assign(SurfaceWaterSiteTypeCV='')

df100["GroundWaterSiteTypeCV"] = df100.apply(lambda row: 'Ground Water' 
                                             if str(row["grnd_wtr_s"]).strip() != '' 
                                             else '', axis=1)
df100["SurfaceWaterSiteTypeCV"] = df100.apply(lambda row: 'Surface Water' 
                                              if int(row["surface_co"]) > 0 
                                              else '', axis=1)

df100

Surface and Ground water SiteTypeCV...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,contact_fn,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteName,GroundWaterSiteTypeCV,SurfaceWaterSiteTypeCV
0,1,1,B,928,,,,11N,10W,22,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Not Provided,Ground Water,
1,2,2,B,691,,,,10N,10W,03,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
2,3,6,B,1077,,,,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,,
3,4,7,B,735,,,,13N,08W,23,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
4,5,8,B,1094,,,,09N,12W,14,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
5,6,9,B,1322,,,,14N,11W,19,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
6,7,10,B,1250,,,,12N,12W,06,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
8,9,12,B,681,,,,12N,08W,36,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,
9,10,13,B,1290,,,,11N,10W,16,...,,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,


In [10]:
print ("SiteTypeCV...")

df100.assign(SiteTypeCV='')

siteTypeList = []
for index, row in df100.iterrows():
    #print(index)
    groundWatetrStr = row['GroundWaterSiteTypeCV']
    surfaceWatetrStr = row['SurfaceWaterSiteTypeCV']
    if (groundWatetrStr != '') & (surfaceWatetrStr != ''):
        siteTypeList.append('Surfacewater_Groundwater')
    elif groundWatetrStr != '':
        siteTypeList.append('Ground Water')
    elif surfaceWatetrStr != '':
        siteTypeList.append('Surface Water')
    else:
        siteTypeList.append("Unknown") #hard code "Unknown" for SiteTypeCV if it is missing

df100['SiteTypeCV'] = siteTypeList


df100

SiteTypeCV...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,nmwrrs_wrs,in_state,podlocdate,loc_error,wr_count,replaced,SiteName,GroundWaterSiteTypeCV,SurfaceWaterSiteTypeCV,SiteTypeCV
0,1,1,B,928,,,,11N,10W,22,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,1,Not Provided,Ground Water,,Ground Water
1,2,2,B,691,,,,10N,10W,03,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
2,3,6,B,1077,,,,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,,,Unknown
3,4,7,B,735,,,,13N,08W,23,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
4,5,8,B,1094,,,,09N,12W,14,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
5,6,9,B,1322,,,,14N,11W,19,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
6,7,10,B,1250,,,,12N,12W,06,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
8,9,12,B,681,,,,12N,08W,36,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water
9,10,13,B,1290,,,,11N,10W,16,...,http://nmwrrs.ose.state.nm.us/ReportDispatcher...,1,2019-10-01T00:00:00.000Z,0,1,0,Not Provided,Ground Water,,Ground Water


The following 3 cells generate longitue and latitude. The first two get lon/lat from UTM easting and northing. The third cell below directly read longitude/latitude values from input table, but there are too many cells without valid lon/lat inputs so it is better to use the code converting from utm coordinates.

In [12]:
print("Project to longitude/ latitude ")

df100 = df100.assign(Longitude='')
df100 = df100.assign(Latitude='')

utmProj = Proj(init="epsg:26913", proj="utm", zone=13)

# drop cells with no x or y coordinate
df100 = df100.replace(np.nan, '') 
dropIndex = df100.loc[(df100['easting'] == '') | (df100['northing'] == '')].index
if len(dropIndex) > 0:
    df100 = df100.drop(dropIndex)
    df100 = df100.reset_index(drop=True)

lonList = []
latList = []

#for ix in range(len(df100.index)):
for index, row in df100.iterrows():
    #print(ix)
    x1 = df100.loc[ix, 'easting']
    y1 = df100.loc[ix, 'northing']
    try:
        lon, lat = utmProj(float(x1), float(y1), inverse=True)
        lonList.append(lon)
        latList.append(lat)
    except:
        lonList.append('')
        latList.append('')

df100['Longitude'] = lonList
df100['Latitude'] = latList

df100

Project to longitude/ latitude II 


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,wr_count,replaced,SiteName,GroundWaterSiteTypeCV,SurfaceWaterSiteTypeCV,SiteTypeCV,Longitude,Latitude,Longitude2,Latitude2
0,1,1,B,928,,,,11N,10W,22,...,1,1,Not Provided,Ground Water,,Ground Water,35.162720,-107.882240,-107.687825,32.904489
1,2,2,B,691,,,,10N,10W,03,...,1,0,Not Provided,Ground Water,,Ground Water,35.125275,-107.881248,-107.687825,32.904489
2,3,6,B,1077,,,,12N,12W,06,...,1,0,Not Provided,,,Unknown,35.301357,-108.153120,-107.687825,32.904489
3,4,7,B,735,,,,13N,08W,23,...,1,0,Not Provided,Ground Water,,Ground Water,35.336345,-107.649451,-107.687825,32.904489
4,5,8,B,1094,,,,09N,12W,14,...,1,0,Not Provided,Ground Water,,Ground Water,35.007850,-108.079968,-107.687825,32.904489
5,6,9,B,1322,,,,14N,11W,19,...,1,0,Not Provided,Ground Water,,Ground Water,35.425315,-108.035686,-107.687825,32.904489
6,7,10,B,1250,,,,12N,12W,06,...,1,0,Not Provided,Ground Water,,Ground Water,35.304423,-108.145455,-107.687825,32.904489
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,1,0,Not Provided,Ground Water,,Ground Water,35.316696,-107.812161,-107.687825,32.904489
8,9,12,B,681,,,,12N,08W,36,...,1,0,Not Provided,Ground Water,,Ground Water,35.230103,-107.639806,-107.687825,32.904489
9,10,13,B,1290,,,,11N,10W,16,...,1,0,Not Provided,Ground Water,,Ground Water,35.183262,-107.892067,-107.687825,32.904489


In [None]:
print("Project to longitude/ latitude 2 ")

df100 = df100.assign(Longitude2='')
df100 = df100.assign(Latitude2='')

# use pyproj to project to lat lon
crs_to = CRS('EPSG:4326')  # CRS("WGS84")
crs_from = CRS("EPSG:26913")
transformer = Transformer.from_crs(crs_from, crs_to)

# drop cells with no x or y coordinate
df100 = df100.replace(np.nan, '') 
dropIndex = df100.loc[(df100['easting'] == '') | (df100['northing'] == '')].index
if len(dropIndex) > 0:
    df100 = df100.drop(dropIndex)
    df100 = df100.reset_index(drop=True)

lonList = []
latList = []
for ix in range(len(df100.index)):
    #print(ix)
    x1 = df100.loc[ix, 'easting']
    y1 = df100.loc[ix, 'northing']
    try:
        lat, lon  = transformer.transform(float(x1), float(y1))
        lonList.append(lon)
        latList.append(lat)
    except:
        lonList.append('')
        latList.append('')

df100['Longitude2'] = lonList
df100['Latitude2'] = latList

df100

In [13]:
print ("Longitude and latitude 3")

df100.assign(Longitude3='')
df100.assign(Latitude3='')

lonList = []
latList = []
for index, row in df100.iterrows():
    #print(index)
    try:
        lonval = row['lon_deg'] + row['lon_min']/60.0 + row['lon_sec']/ 3600.0
        lonList.append(lonval)
    except:
        lonList.append('')
    
    try:
        latval = row['lat_deg'] + row['lat_min']/60.0 + row['lat_sec']/ 3600.0
        latList.append(latval)
    except:
        latList.append('')

df100['Longitude3'] = lonList
df100['Latitude3'] = latList

df100

Longitude and latitude 3


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,SiteName,GroundWaterSiteTypeCV,SurfaceWaterSiteTypeCV,SiteTypeCV,Longitude,Latitude,Longitude2,Latitude2,Longitude3,Latitude3
0,1,1,B,928,,,,11N,10W,22,...,Not Provided,Ground Water,,Ground Water,35.162720,-107.882240,-107.687825,32.904489,0.000000,0.000000
1,2,2,B,691,,,,10N,10W,03,...,Not Provided,Ground Water,,Ground Water,35.125275,-107.881248,-107.687825,32.904489,0.000000,0.000000
2,3,6,B,1077,,,,12N,12W,06,...,Not Provided,,,Unknown,35.301357,-108.153120,-107.687825,32.904489,0.000000,0.000000
3,4,7,B,735,,,,13N,08W,23,...,Not Provided,Ground Water,,Ground Water,35.336345,-107.649451,-107.687825,32.904489,0.000000,0.000000
4,5,8,B,1094,,,,09N,12W,14,...,Not Provided,Ground Water,,Ground Water,35.007850,-108.079968,-107.687825,32.904489,0.000000,0.000000
5,6,9,B,1322,,,,14N,11W,19,...,Not Provided,Ground Water,,Ground Water,35.425315,-108.035686,-107.687825,32.904489,0.000000,0.000000
6,7,10,B,1250,,,,12N,12W,06,...,Not Provided,Ground Water,,Ground Water,35.304423,-108.145455,-107.687825,32.904489,0.000000,0.000000
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,Not Provided,Ground Water,,Ground Water,35.316696,-107.812161,-107.687825,32.904489,0.000000,0.000000
8,9,12,B,681,,,,12N,08W,36,...,Not Provided,Ground Water,,Ground Water,35.230103,-107.639806,-107.687825,32.904489,0.000000,0.000000
9,10,13,B,1290,,,,11N,10W,16,...,Not Provided,Ground Water,,Ground Water,35.183262,-107.892067,-107.687825,32.904489,0.000000,0.000000


Use Coordinate methodCV and Coordinate accuracy for UTM as the input lat lon values are sparce 

In [14]:
print("coordinatemethodCV UTM...")

df100.assign(CoordinateMethodCV='')

df100["CoordinateMethodCV"] = df100.apply(lambda row: coordMethCV(row['utm_source']), axis=1)

df100

coordinatemethodCV UTM...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,GroundWaterSiteTypeCV,SurfaceWaterSiteTypeCV,SiteTypeCV,Longitude,Latitude,Longitude2,Latitude2,Longitude3,Latitude3,CoordinateMethodCV
0,1,1,B,928,,,,11N,10W,22,...,Ground Water,,Ground Water,35.162720,-107.882240,-107.687825,32.904489,0.000000,0.000000,PLSS
1,2,2,B,691,,,,10N,10W,03,...,Ground Water,,Ground Water,35.125275,-107.881248,-107.687825,32.904489,0.000000,0.000000,PLSS
2,3,6,B,1077,,,,12N,12W,06,...,,,Unknown,35.301357,-108.153120,-107.687825,32.904489,0.000000,0.000000,PLSS
3,4,7,B,735,,,,13N,08W,23,...,Ground Water,,Ground Water,35.336345,-107.649451,-107.687825,32.904489,0.000000,0.000000,PLSS
4,5,8,B,1094,,,,09N,12W,14,...,Ground Water,,Ground Water,35.007850,-108.079968,-107.687825,32.904489,0.000000,0.000000,PLSS
5,6,9,B,1322,,,,14N,11W,19,...,Ground Water,,Ground Water,35.425315,-108.035686,-107.687825,32.904489,0.000000,0.000000,PLSS
6,7,10,B,1250,,,,12N,12W,06,...,Ground Water,,Ground Water,35.304423,-108.145455,-107.687825,32.904489,0.000000,0.000000,PLSS
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,Ground Water,,Ground Water,35.316696,-107.812161,-107.687825,32.904489,0.000000,0.000000,PLSS
8,9,12,B,681,,,,12N,08W,36,...,Ground Water,,Ground Water,35.230103,-107.639806,-107.687825,32.904489,0.000000,0.000000,PLSS
9,10,13,B,1290,,,,11N,10W,16,...,Ground Water,,Ground Water,35.183262,-107.892067,-107.687825,32.904489,0.000000,0.000000,PLSS


In [15]:
print("coordinate accuracy UTM...")

df100.assign(CoordinateAccuracy='')

df100["CoordinateAccuracy"] = df100.apply(lambda row: coordMethAcc(row['utm_accura']), axis=1)

df100 

coordinate accuracy UTM...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,SurfaceWaterSiteTypeCV,SiteTypeCV,Longitude,Latitude,Longitude2,Latitude2,Longitude3,Latitude3,CoordinateMethodCV,CoordinateAccuracy
0,1,1,B,928,,,,11N,10W,22,...,,Ground Water,35.162720,-107.882240,-107.687825,32.904489,0.000000,0.000000,PLSS,
1,2,2,B,691,,,,10N,10W,03,...,,Ground Water,35.125275,-107.881248,-107.687825,32.904489,0.000000,0.000000,PLSS,
2,3,6,B,1077,,,,12N,12W,06,...,,Unknown,35.301357,-108.153120,-107.687825,32.904489,0.000000,0.000000,PLSS,
3,4,7,B,735,,,,13N,08W,23,...,,Ground Water,35.336345,-107.649451,-107.687825,32.904489,0.000000,0.000000,PLSS,
4,5,8,B,1094,,,,09N,12W,14,...,,Ground Water,35.007850,-108.079968,-107.687825,32.904489,0.000000,0.000000,PLSS,
5,6,9,B,1322,,,,14N,11W,19,...,,Ground Water,35.425315,-108.035686,-107.687825,32.904489,0.000000,0.000000,PLSS,
6,7,10,B,1250,,,,12N,12W,06,...,,Ground Water,35.304423,-108.145455,-107.687825,32.904489,0.000000,0.000000,PLSS,
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,,Ground Water,35.316696,-107.812161,-107.687825,32.904489,0.000000,0.000000,PLSS,
8,9,12,B,681,,,,12N,08W,36,...,,Ground Water,35.230103,-107.639806,-107.687825,32.904489,0.000000,0.000000,PLSS,
9,10,13,B,1290,,,,11N,10W,16,...,,Ground Water,35.183262,-107.892067,-107.687825,32.904489,0.000000,0.000000,PLSS,


In [16]:
print("coordinatemethodCV Lat/Lon...")

df100.assign(CoordinateMethodCV2='')

df100["CoordinateMethodCV2"] = df100.apply(lambda row: coordMethCV(row['lat_lon_so']), axis=1)

df100

coordinatemethodCV Lat/Lon...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,SiteTypeCV,Longitude,Latitude,Longitude2,Latitude2,Longitude3,Latitude3,CoordinateMethodCV,CoordinateAccuracy,CoordinateMethodCV2
0,1,1,B,928,,,,11N,10W,22,...,Ground Water,35.162720,-107.882240,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
1,2,2,B,691,,,,10N,10W,03,...,Ground Water,35.125275,-107.881248,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
2,3,6,B,1077,,,,12N,12W,06,...,Unknown,35.301357,-108.153120,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
3,4,7,B,735,,,,13N,08W,23,...,Ground Water,35.336345,-107.649451,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
4,5,8,B,1094,,,,09N,12W,14,...,Ground Water,35.007850,-108.079968,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
5,6,9,B,1322,,,,14N,11W,19,...,Ground Water,35.425315,-108.035686,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
6,7,10,B,1250,,,,12N,12W,06,...,Ground Water,35.304423,-108.145455,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,Ground Water,35.316696,-107.812161,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
8,9,12,B,681,,,,12N,08W,36,...,Ground Water,35.230103,-107.639806,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified
9,10,13,B,1290,,,,11N,10W,16,...,Ground Water,35.183262,-107.892067,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified


In [17]:
print("coordinate accuracy Lat/Lon...")

df100.assign(CoordinateAccuracy2='')

df100["CoordinateAccuracy2"] = df100.apply(lambda row: coordMethAcc(row['lat_lon_ac']), axis=1)

df100    

coordinate accuracy Lat/Lon...


Unnamed: 0,OBJECTID_1,OBJECTID,pod_basin,pod_nbr,pod_suffix,ref,pod_name,tws,rng,sec,...,Longitude,Latitude,Longitude2,Latitude2,Longitude3,Latitude3,CoordinateMethodCV,CoordinateAccuracy,CoordinateMethodCV2,CoordinateAccuracy2
0,1,1,B,928,,,,11N,10W,22,...,35.162720,-107.882240,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
1,2,2,B,691,,,,10N,10W,03,...,35.125275,-107.881248,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
2,3,6,B,1077,,,,12N,12W,06,...,35.301357,-108.153120,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
3,4,7,B,735,,,,13N,08W,23,...,35.336345,-107.649451,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
4,5,8,B,1094,,,,09N,12W,14,...,35.007850,-108.079968,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
5,6,9,B,1322,,,,14N,11W,19,...,35.425315,-108.035686,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
6,7,10,B,1250,,,,12N,12W,06,...,35.304423,-108.145455,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
7,8,11,B,415,O-10,,OTERO-3,13N,09W,32,...,35.316696,-107.812161,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
8,9,12,B,681,,,,12N,08W,36,...,35.230103,-107.639806,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,
9,10,13,B,1290,,,,11N,10W,16,...,35.183262,-107.892067,-107.687825,32.904489,0.000000,0.000000,PLSS,,Unspecified,


In [22]:
print("Direct mapping columns...")
#
# Utah directly mapped cells
# 
destCols=['SiteNativeID', 'SiteName', 'SiteTypeCV', 'Longitude', 'Latitude', 'CoordinateMethodCV',
          'CoordinateAccuracy']
srsCols=['pod_nbr', 'SiteName', 'SiteTypeCV', 'Longitude', 'Latitude', 'CoordinateMethodCV',
         'CoordinateAccuracy']

outdf100[destCols] = df100[srsCols]

# replace NaN with blank cells
outdf100 = outdf100.replace(np.nan, '') 

Direct mapping columns...


In [23]:
print("Dropping empty lat/lon")
#drop the sites with no long and lat.
outdf100purge = outdf100.loc[(outdf100['Longitude'] == '') | (outdf100['Latitude'] == '')]
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv('sites_latlon_missing.csv')    #index=False,
    dropIndex = outdf100.loc[(outdf100['Longitude'] == '') | (outdf100['Latitude'] == '')].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

Dropping empty lat/lon


In [24]:
print("Dropping duplicates...")
#filter the whole table based on a unique combination of site ID, SiteName, SiteType
#10.24.19 added lat lon to list
print(len(outdf100.index))
outdf100 = outdf100.drop_duplicates(subset=['SiteNativeID', 'SiteName', 'SiteTypeCV', 'Longitude', 'Latitude'])   #
outdf100 = outdf100.reset_index(drop=True)
print(len(outdf100.index))

Dropping duplicates...
234660
226021


In [25]:
# hardcoded columns
print("Hard coded")

outdf100.EPSGCodeCV = 'EPSG:4326'


Hard coded


In [26]:
print("Check Site Native IDs are duplicated")

siteNIdDup = False
siteNativeIDdup=outdf100.loc[outdf100.duplicated(subset=['SiteNativeID'])]
if len(siteNativeIDdup.index) > 0:
    print("Site Native IDs are duplicated")
    siteNIdDup = True
#outdf100

siteNativeIDdup

Check Site Native IDs are duplicated
Site Native IDs are duplicated


Unnamed: 0,WaDESiteUUID,SiteNativeID,SiteName,USGSSiteID,SiteTypeCV,Longitude,Latitude,SitePoint,SiteNativeURL,Geometry,CoordinateMethodCV,CoordinateAccuracy,GNISCodeCV,EPSGCodeCV,NHDNetworkStatusCV,NHDProductCV,NHDUpdateDate,NHDReachCode,NHDMeasureNumber,StateCV
34,,1290,Not Provided,,Unknown,35.183262,-107.892067,,,,PLSS,,,EPSG:4326,,,,,,
59,,28,Not Provided,,Ground Water,35.238990,-107.874688,,,,PLSS,,,EPSG:4326,,,,,,
61,,28,Not Provided,,Ground Water,35.240834,-107.879027,,,,PLSS,,,EPSG:4326,,,,,,
72,,28,Not Provided,,Ground Water,35.229883,-107.865754,,,,PLSS,,,EPSG:4326,,,,,,
89,,1112,LA LUZ CREEK,,Unknown,32.991030,-105.849650,,,,PLSS,,,EPSG:4326,,,,,,
126,,28,Not Provided,,Ground Water,35.231684,-107.865817,,,,PLSS,,,EPSG:4326,,,,,,
154,,1019,Not Provided,,Ground Water,35.415175,-108.088613,,,,PLSS,,,EPSG:4326,,,,,,
158,,179,Not Provided,,Ground Water,32.974331,-105.554967,,,,PLSS,,,EPSG:4326,,,,,,
203,,28,Not Provided,,Ground Water,35.240711,-107.856863,,,,PLSS,,,EPSG:4326,,,,,,
215,,28,Not Provided,,Ground Water,35.224511,-107.868036,,,,PLSS,,,EPSG:4326,,,,,,


In [27]:
print("Adding SiteUUID...")

if siteNIdDup:    
    # 10.24.19 create unique site uuid
    outdf100 = outdf100.reset_index(drop=True)
    outdf100['TempUUID'] = range(1, len(outdf100.index) + 1)
    #append 'NM'
    outdf100['WaDESiteUUID'] = outdf100.apply(lambda row: "_".join(["NM", str(row['TempUUID'])]),
                                          axis=1)
    #drop temp uuid
    outdf100 = outdf100.drop('TempUUID', axis=1)
else:
    #append 'NM'
    outdf100['WaDESiteUUID'] = outdf100.apply(lambda row: '' if str(row['SiteNativeID']) == '' 
                                        else "_".join(["NM", str(row['SiteNativeID'])]), axis=1)

outdf100

Adding SiteUUID...


Unnamed: 0,WaDESiteUUID,SiteNativeID,SiteName,USGSSiteID,SiteTypeCV,Longitude,Latitude,SitePoint,SiteNativeURL,Geometry,CoordinateMethodCV,CoordinateAccuracy,GNISCodeCV,EPSGCodeCV,NHDNetworkStatusCV,NHDProductCV,NHDUpdateDate,NHDReachCode,NHDMeasureNumber,StateCV
0,NM_1,928,Not Provided,,Ground Water,35.162720,-107.882240,,,,PLSS,,,EPSG:4326,,,,,,
1,NM_2,691,Not Provided,,Ground Water,35.125275,-107.881248,,,,PLSS,,,EPSG:4326,,,,,,
2,NM_3,1077,Not Provided,,Unknown,35.301357,-108.153120,,,,PLSS,,,EPSG:4326,,,,,,
3,NM_4,735,Not Provided,,Ground Water,35.336345,-107.649451,,,,PLSS,,,EPSG:4326,,,,,,
4,NM_5,1094,Not Provided,,Ground Water,35.007850,-108.079968,,,,PLSS,,,EPSG:4326,,,,,,
5,NM_6,1322,Not Provided,,Ground Water,35.425315,-108.035686,,,,PLSS,,,EPSG:4326,,,,,,
6,NM_7,1250,Not Provided,,Ground Water,35.304423,-108.145455,,,,PLSS,,,EPSG:4326,,,,,,
7,NM_8,415,Not Provided,,Ground Water,35.316696,-107.812161,,,,PLSS,,,EPSG:4326,,,,,,
8,NM_9,681,Not Provided,,Ground Water,35.230103,-107.639806,,,,PLSS,,,EPSG:4326,,,,,,
9,NM_10,1290,Not Provided,,Ground Water,35.183262,-107.892067,,,,PLSS,,,EPSG:4326,,,,,,


In [28]:
print("Droping duplicates...")

# replace NaN with blank cells
outdf100 = outdf100.replace(np.nan, '')
#drop duplicate rows; just make sure
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv("sites_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

outdf100

Droping duplicates...


Unnamed: 0,WaDESiteUUID,SiteNativeID,SiteName,USGSSiteID,SiteTypeCV,Longitude,Latitude,SitePoint,SiteNativeURL,Geometry,CoordinateMethodCV,CoordinateAccuracy,GNISCodeCV,EPSGCodeCV,NHDNetworkStatusCV,NHDProductCV,NHDUpdateDate,NHDReachCode,NHDMeasureNumber,StateCV
0,NM_1,928,Not Provided,,Ground Water,35.162720,-107.882240,,,,PLSS,,,EPSG:4326,,,,,,
1,NM_2,691,Not Provided,,Ground Water,35.125275,-107.881248,,,,PLSS,,,EPSG:4326,,,,,,
2,NM_3,1077,Not Provided,,Unknown,35.301357,-108.153120,,,,PLSS,,,EPSG:4326,,,,,,
3,NM_4,735,Not Provided,,Ground Water,35.336345,-107.649451,,,,PLSS,,,EPSG:4326,,,,,,
4,NM_5,1094,Not Provided,,Ground Water,35.007850,-108.079968,,,,PLSS,,,EPSG:4326,,,,,,
5,NM_6,1322,Not Provided,,Ground Water,35.425315,-108.035686,,,,PLSS,,,EPSG:4326,,,,,,
6,NM_7,1250,Not Provided,,Ground Water,35.304423,-108.145455,,,,PLSS,,,EPSG:4326,,,,,,
7,NM_8,415,Not Provided,,Ground Water,35.316696,-107.812161,,,,PLSS,,,EPSG:4326,,,,,,
8,NM_9,681,Not Provided,,Ground Water,35.230103,-107.639806,,,,PLSS,,,EPSG:4326,,,,,,
9,NM_10,1290,Not Provided,,Ground Water,35.183262,-107.892067,,,,PLSS,,,EPSG:4326,,,,,,


In [29]:
print("Checking required isnot null...")
# check if any cell of these columns is null
requiredCols = ['WaDESiteUUID', 'SiteName', 'CoordinateMethodCV', 'GNISCodeCV', 'EPSGCodeCV']

# replace NaN with blank cells
outdf100 = outdf100.replace(np.nan, '')

outdf100_nullMand = outdf100.loc[(outdf100["WaDESiteUUID"] == '') |
                                 (outdf100["SiteName"] == '') | 
                                 (outdf100["CoordinateMethodCV"] == '') |
                                 (outdf100["GNISCodeCV"] == '') | 
                                 (outdf100["EPSGCodeCV"] == '')]

if (len(outdf100_nullMand.index) > 0):
    outdf100_nullMand.to_csv('sites_mandatoryFieldMissing.csv')  # index=False,

# ToDO: purge these cells if there is any missing? #For now left to be inspected and reported

Checking required isnot null...


In [30]:
print("Writing out...")

#write out
outdf100.to_csv(out_sitdim, index=False, encoding = "utf-8")

print("Done sites")

Writing out...
Done sites
