# Pre-processing New Mexico Site Specific Public Supply Water Use data for WaDE upload.
Date Updated: 10/11/2022

In [1]:
# working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd # the library that lets us read in shapefiles

# visulizaiton
import matplotlib.pyplot as plot
import seaborn as sns

# Cleanup
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory and Input File
workingDir = "G:/Shared drives/WaDE Data/NewMexico/SS_PublicSupplyWaterUse/RawInputData"

os.chdir(workingDir)

## Inputs and Dataframe Creation

In [3]:
# POD site with timeseries data
fileInput = "NMPWSPoints_input.csv"
dfpd = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfpd:
    dfpd['WaDEUUID'] = "nmPD" + dfpd.index.astype(str)
    dfpd.to_csv('NMPWSPoints_input.csv', index=False)

print(len(dfpd))
dfpd.head(1)

723


Unnamed: 0,OID_,OBJECTID,DWB_STATUS,ID,DWB_WebLink_No,WaterSystem_ID,DWB_SYSTEM_NAME,Public_Water_System_Name_2019,OSE_File_Number,OSE2015_Name_Public_Water_Syste,City,F2015_GW_AFY,F2015_SW_AFY,F2015Total_AFY,F2010_SW_AFY,F2010_GW_AFY,F2010Total_AFY,F2015_GPCD,F2010_GPCD,WWC,MGW,MSW,F2015_Population,F2010_Population,CO_2019,X_2019,Y_2019,Status_OSE,WP_Region,GW_Basin2019,SW_Basin2019,CountyName_HA,HUC_8,BoundaryQuality,DWB_POPULATION,WEC,Latitude,Longitude,WaDEUUID
0,160,161,A,1.0,NM3500101,NM3500101,BEARCAT HOMEOWNERS ASSOCIATION,BEARCAT HOMEOWNERS ASSOCIATION,E-3254,Bearcat Homeowners Assn.,Tijeras,4.0,,4.0,,6.575,6.575,61.022088,58.697655,,Y,,58.519138,100.0,1,383672.5839,3867264.585,Active,13.0,E,RG,BERNALILLO,13050001.0,Best approximation,60.0,0.0,34.941213,-106.273851,nmPD0


In [4]:
# Only looking at "Active" records.
dfpd = dfpd[dfpd['DWB_STATUS'] == 'A']
print(len(dfpd))
dfpd['DWB_STATUS'].unique()

577


array(['A'], dtype=object)

In [5]:
# POU site data (only)
fileInput = "nm_pws_input.csv"
dfpu = pd.read_csv(fileInput)

# WaDE UUID tracker for data assessment
if 'WaDEUUID' not in dfpu:
    dfpu['WaDEUUID'] = "nmPU" + dfpu.index.astype(str)
    dfpu.to_csv('nm_pws_input.csv', index=False)

print(len(dfpu))
dfpu.head(1)

605


Unnamed: 0,OID_,OBJECTI,ID,DWB_WL_,WtrS_ID,P_W_S_N,OSE_F_N,OSE2015,City,F2015_GW,F2015_S,F2015T_,F2010_S,F2010_GW,F2010T_,F2015_GP,F2010_GP,WWC,MGW,MSW,F2015_P,F2010_P,CO_2019,X_2019,Y_2019,Stt_OSE,WP_Regn,GW_B201,SW_B201,DWB_SYS,DWB_STA,CntN_HA,HUC_8,BndryQl,DWB_POP,WEC,Wt_S_ID,PblcSyN,CN,UTMX,UTMY,CITY_1,Plygn_B,SW_BASI,GW_BASI,SHAPE_L,SHAPE_A,cent_Lat,cent_Long,WaDEUUID
0,0,1,0.0,,,,,,Santa Fe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,3,,,,,,,,0.0,0,NM3500626,LAS CAMPANAS WATER SYSTEM,49.0,404430,3952699,,pws-provided waterlines,RG,RG,19925.996784,14449560.0,35.713671,-106.056724,nmPU0


## POD Time Series Data
- 4 datasets between 2010, 2015 SW & GW.

In [6]:
# F2010_GW_AFY
dfpd_10GW = pd.DataFrame(index=dfpd.index)

# Variable Info
dfpd_10GW['in_VariableCV'] = "Withdrawal"
dfpd_10GW['in_VariableSpecificCV'] = ""

# Water Source Info
dfpd_10GW['in_WaterSourceTypeCV'] = "Groundwater" #change here

# Site Info
dfpd_10GW['in_County'] = dfpd['CountyName_HA']
dfpd_10GW['in_HUC8'] = dfpd['HUC_8']
dfpd_10GW['in_Latitude'] = dfpd['Latitude']
dfpd_10GW['in_Longitude'] = dfpd['Longitude']
dfpd_10GW['in_PODorPOUSite'] = "POD"
dfpd_10GW['in_SiteName'] = "Unspecified"
dfpd_10GW['in_SiteNativeID'] = dfpd['ID']

# Site VariableAmounts Info
dfpd_10GW['in_Amount'] = dfpd['F2010_GW_AFY'] #change here
dfpd_10GW['in_BeneficialUseCategory'] = "Unspecified"
dfpd_10GW['in_PopulationServed'] = dfpd['F2010_Population'] #change here
dfpd_10GW['in_ReportYearCV'] = "2010" #change here
dfpd_10GW['in_TimeframeEnd'] = "12/31/" + dfpd_10GW['in_ReportYearCV'].astype(str)
dfpd_10GW['in_TimeframeStart'] = "01/01/" + dfpd_10GW['in_ReportYearCV'].astype(str)

# link to site data
dfpd_10GW['linkKey'] = dfpd['Public_Water_System_Name_2019']

print(len(dfpd_10GW))
dfpd_10GW.head(1)

577


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey
0,Withdrawal,,Groundwater,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,6.575,Unspecified,100.0,2010,12/31/2010,01/01/2010,BEARCAT HOMEOWNERS ASSOCIATION


In [7]:
# F2010_SW_AFY
dfpd_10SW = pd.DataFrame(index=dfpd.index)

# Variable Info
dfpd_10SW['in_VariableCV'] = "Withdrawal"
dfpd_10SW['in_VariableSpecificCV'] = ""

# Water Source Info
dfpd_10SW['in_WaterSourceTypeCV'] = "Surface Water" #change here

# Site Info
dfpd_10SW['in_County'] = dfpd['CountyName_HA']
dfpd_10SW['in_HUC8'] = dfpd['HUC_8']
dfpd_10SW['in_Latitude'] = dfpd['Latitude']
dfpd_10SW['in_Longitude'] = dfpd['Longitude']
dfpd_10SW['in_PODorPOUSite'] = "POD"
dfpd_10SW['in_SiteName'] = "Unspecified"
dfpd_10SW['in_SiteNativeID'] = dfpd['ID']

# Site VariableAmounts Info
dfpd_10SW['in_Amount'] = dfpd['F2010_SW_AFY'] #change here
dfpd_10SW['in_BeneficialUseCategory'] = "Unspecified"
dfpd_10SW['in_PopulationServed'] = dfpd['F2010_Population'] #change here
dfpd_10SW['in_ReportYearCV'] = "2010" #change here
dfpd_10SW['in_TimeframeEnd'] = "12/31/" + dfpd_10GW['in_ReportYearCV'].astype(str)
dfpd_10SW['in_TimeframeStart'] = "01/01/" + dfpd_10GW['in_ReportYearCV'].astype(str)

# link to site data
dfpd_10SW['linkKey'] = dfpd['Public_Water_System_Name_2019']

print(len(dfpd_10SW))
dfpd_10SW.head(1)

577


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey
0,Withdrawal,,Surface Water,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,,Unspecified,100.0,2010,12/31/2010,01/01/2010,BEARCAT HOMEOWNERS ASSOCIATION


In [8]:
# F2015_GW_AFY
dfpd_15GW = pd.DataFrame(index=dfpd.index)

# Variable Info
dfpd_15GW['in_VariableCV'] = "Withdrawal"
dfpd_15GW['in_VariableSpecificCV'] = ""

# Water Source Info
dfpd_15GW['in_WaterSourceTypeCV'] = "Groundwater" #change here

# Site Info
dfpd_15GW['in_County'] = dfpd['CountyName_HA']
dfpd_15GW['in_HUC8'] = dfpd['HUC_8']
dfpd_15GW['in_Latitude'] = dfpd['Latitude']
dfpd_15GW['in_Longitude'] = dfpd['Longitude']
dfpd_15GW['in_PODorPOUSite'] = "POD"
dfpd_15GW['in_SiteName'] = "Unspecified"
dfpd_15GW['in_SiteNativeID'] = dfpd['ID']

# Site VariableAmounts Info
dfpd_15GW['in_Amount'] = dfpd['F2015_GW_AFY'] #change here
dfpd_15GW['in_BeneficialUseCategory'] = "Unspecified"
dfpd_15GW['in_PopulationServed'] = dfpd['F2015_Population'] #change here
dfpd_15GW['in_ReportYearCV'] = "2015" #change here
dfpd_15GW['in_TimeframeEnd'] = "12/31/" + dfpd_15GW['in_ReportYearCV'].astype(str)
dfpd_15GW['in_TimeframeStart'] = "01/01/" + dfpd_15GW['in_ReportYearCV'].astype(str)

# link to site data
dfpd_15GW['linkKey'] = dfpd['Public_Water_System_Name_2019']

print(len(dfpd_15GW))
dfpd_15GW.head(1)

577


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey
0,Withdrawal,,Groundwater,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,4.0,Unspecified,58.519138,2015,12/31/2015,01/01/2015,BEARCAT HOMEOWNERS ASSOCIATION


In [9]:
# F2015_SW_AFY
dfpd_15SW = pd.DataFrame(index=dfpd.index)

# Variable Info
dfpd_15SW['in_VariableCV'] = "Withdrawal"
dfpd_15SW['in_VariableSpecificCV'] = ""

# Water Source Info
dfpd_15SW['in_WaterSourceTypeCV'] = "Surface Water" #change here

# Site Info
dfpd_15SW['in_County'] = dfpd['CountyName_HA']
dfpd_15SW['in_HUC8'] = dfpd['HUC_8']
dfpd_15SW['in_Latitude'] = dfpd['Latitude']
dfpd_15SW['in_Longitude'] = dfpd['Longitude']
dfpd_15SW['in_PODorPOUSite'] = "POD"
dfpd_15SW['in_SiteName'] = "Unspecified"
dfpd_15SW['in_SiteNativeID'] = dfpd['ID']

# Site VariableAmounts Info
dfpd_15SW['in_Amount'] = dfpd['F2015_SW_AFY'] #change here
dfpd_15SW['in_BeneficialUseCategory'] = "Unspecified"
dfpd_15SW['in_PopulationServed'] = dfpd['F2015_Population'] #change here
dfpd_15SW['in_ReportYearCV'] = "2015" #change here
dfpd_15SW['in_TimeframeEnd'] = "12/31/" + dfpd_15SW['in_ReportYearCV'].astype(str)
dfpd_15SW['in_TimeframeStart'] = "01/01/" + dfpd_15SW['in_ReportYearCV'].astype(str)

# link to site data
dfpd_15SW['linkKey'] = dfpd['Public_Water_System_Name_2019']

print(len(dfpd_15SW))
dfpd_15SW.head(1)

577


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey
0,Withdrawal,,Surface Water,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,,Unspecified,58.519138,2015,12/31/2015,01/01/2015,BEARCAT HOMEOWNERS ASSOCIATION


In [10]:
# Concatenate POD Data Together
frames = [dfpd_10GW, dfpd_10SW, dfpd_15GW, dfpd_15SW]
dfpd_out = pd.concat(frames).reset_index(drop=True)
print(len(dfpd_out))

2308


## POU Site Data
- just to show POU data for the POD timeseries above.

In [11]:
# left-join POD to POU data
dfpu = pd.merge(dfpu, dfpd, left_on='Wt_S_ID', right_on='WaterSystem_ID', how='left')
print(len(dfpu))
dfpu.head(1)

605


Unnamed: 0,OID__x,OBJECTI,ID_x,DWB_WL_,WtrS_ID,P_W_S_N,OSE_F_N,OSE2015,City_x,F2015_GW,F2015_S,F2015T_,F2010_S,F2010_GW,F2010T_,F2015_GP,F2010_GP,WWC_x,MGW_x,MSW_x,F2015_P,F2010_P,CO_2019_x,X_2019_x,Y_2019_x,Stt_OSE,WP_Regn,GW_B201,SW_B201,DWB_SYS,DWB_STA,CntN_HA,HUC_8_x,BndryQl,DWB_POP,WEC_x,Wt_S_ID,PblcSyN,CN,UTMX,UTMY,CITY_1,Plygn_B,SW_BASI,GW_BASI,SHAPE_L,SHAPE_A,cent_Lat,cent_Long,WaDEUUID_x,OID__y,OBJECTID,DWB_STATUS,ID_y,DWB_WebLink_No,WaterSystem_ID,DWB_SYSTEM_NAME,Public_Water_System_Name_2019,OSE_File_Number,OSE2015_Name_Public_Water_Syste,City_y,F2015_GW_AFY,F2015_SW_AFY,F2015Total_AFY,F2010_SW_AFY,F2010_GW_AFY,F2010Total_AFY,F2015_GPCD,F2010_GPCD,WWC_y,MGW_y,MSW_y,F2015_Population,F2010_Population,CO_2019_y,X_2019_y,Y_2019_y,Status_OSE,WP_Region,GW_Basin2019,SW_Basin2019,CountyName_HA,HUC_8_y,BoundaryQuality,DWB_POPULATION,WEC_y,Latitude,Longitude,WaDEUUID_y
0,0,1,0.0,,,,,,Santa Fe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,3,,,,,,,,0.0,0,NM3500626,LAS CAMPANAS WATER SYSTEM,49.0,404430,3952699,,pws-provided waterlines,RG,RG,19925.996784,14449560.0,35.713671,-106.056724,nmPU0,528.0,529.0,A,73.0,NM3500626,NM3500626,LAS CAMPANAS WATER SYSTEM,LAS CAMPANAS WATER SYSTEM,,Las Campanas,Santa Fe,222.65,15.0,237.62,,224.0,224.0,242.992988,499.9344,,Y,Y,873.0,400.0,49.0,407657.8522,3952562.037,Active,3.0,RG,RG,SANTA FE,13020201.0,Excellent,1495.0,6.0,35.712938,-106.057896,nmPD54


In [12]:
#Place of Use dataframe
dfpu_out = pd.DataFrame(index=dfpu.index)

# Variable Info
dfpu_out['in_VariableCV'] = "Withdrawal"
dfpu_out['in_VariableSpecificCV'] = ""

# Water Source Info
dfpu_out['in_WaterSourceTypeCV'] = "Unspecified"

# Site Info
dfpu_out['in_County'] = dfpu['CountyName_HA'] # from POD
dfpu_out['in_HUC8'] = dfpu['HUC_8_x']
dfpu_out['in_Latitude'] = dfpu['cent_Lat']
dfpu_out['in_Longitude'] = dfpu['cent_Long']
dfpu_out['in_PODorPOUSite'] = "POU"
dfpu_out['in_SiteName'] = dfpu['PblcSyN']
dfpu_out['in_SiteNativeID'] = dfpu['Wt_S_ID']

# Site VariableAmounts Info
dfpu_out['in_Amount'] = ""
dfpu_out['in_BeneficialUseCategory'] = "Unspecified"
dfpu_out['in_PopulationServed'] = ""
dfpu_out['in_ReportYearCV'] = ""
dfpu_out['in_TimeframeEnd'] = ""
dfpu_out['in_TimeframeStart'] = ""

# link to site data
dfpu_out['linkKey'] = dfpu['PblcSyN']

print(len(dfpu_out))
dfpu_out.head(1)

605


Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey
0,Withdrawal,,Unspecified,SANTA FE,,35.713671,-106.056724,POU,LAS CAMPANAS WATER SYSTEM,NM3500626,,Unspecified,,,,,LAS CAMPANAS WATER SYSTEM


## Concatenate POD with POU data Site Data

In [13]:
frames = [dfpd_out, dfpu_out]
dfout = pd.concat(frames).reset_index(drop=True)
print(len(dfout))

2913


## WaDE Custom Elements (due to missing info)

In [14]:
# Creating WaDE Custom VariableSpecificCV
# ----------------------------------------------------------------------------------------------------

def createVariableSpecificCV(inV, inBU, inWST):
    inV = str(inV).strip()
    inBU = str(inBU).strip().title()
    inWST = str(inWST).strip()
    
    outString = inV + "_Annual_" +  inBU + "_" + inWST
    
    return outString

dfout['in_VariableSpecificCV'] = dfout.apply(lambda row: createVariableSpecificCV(row['in_VariableCV'], 
                                                                                     row['in_BeneficialUseCategory'],
                                                                                     row['in_WaterSourceTypeCV']), axis=1)
dfout['in_VariableSpecificCV'].unique()

array(['Withdrawal_Annual_Unspecified_Groundwater',
       'Withdrawal_Annual_Unspecified_Surface Water',
       'Withdrawal_Annual_Unspecified_Unspecified'], dtype=object)

In [15]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDNMD_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
dfout['in_WaterSourceNativeID'].unique()

array(['WaDNMD_WS1', 'WaDNMD_WS2', 'WaDNMD_WS3'], dtype=object)

In [16]:
# if native site ID is not provdied / fill in blanks
# Creating WaDE Custom site native ID for easy site identification
# row.name = dataframe index of that row
# ----------------------------------------------------------------------------------------------------

def retrieveSiteNativeID(val, valIndex):
    if (val == '') or (pd.isnull(val)):
        outString = "WaDNMD_S" + str(valIndex)
    else:
        outString = val
    return outString

dfout['in_SiteNativeID'] = dfout.apply(lambda row: retrieveSiteNativeID(row['in_SiteNativeID'], row.name), axis=1)
dfout['in_SiteNativeID'].unique()

array([1.0, 4.0, 5.0, ..., 'NM3553501', 'NM3521001', 'NM3524130'],
      dtype=object)

## Cleaning Output
- checking & changing data type & format.

In [17]:
# Convert History Year to YYYY-MM-DD format.

dfout['in_TimeframeEnd'] = pd.to_datetime(dfout['in_TimeframeEnd'], errors = 'coerce')
dfout['in_TimeframeEnd'] = pd.to_datetime(dfout["in_TimeframeEnd"].dt.strftime('%m/%d/%Y'))

dfout['in_TimeframeStart'] = pd.to_datetime(dfout['in_TimeframeStart'], errors = 'coerce')
dfout['in_TimeframeStart'] = pd.to_datetime(dfout["in_TimeframeStart"].dt.strftime('%m/%d/%Y'))

dfout.head(1)

Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey,in_WaterSourceNativeID
0,Withdrawal,Withdrawal_Annual_Unspecified_Groundwater,Groundwater,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,6.575,Unspecified,100.0,2010,2010-12-31,2010-01-01,BEARCAT HOMEOWNERS ASSOCIATION,WaDNMD_WS1


In [18]:
# Converting numbers that are in string to float.

# in_Latitude & in_Longitude
dfout['in_Latitude'] = pd.to_numeric(dfout['in_Latitude'], errors='coerce')
dfout['in_Longitude'] = pd.to_numeric(dfout['in_Longitude'], errors='coerce')

# in_Amount
dfout['in_Amount'] = pd.to_numeric(dfout['in_Amount'], errors='coerce')

#in_ReportYearCV
# having some issues converting this to an int
dfout['in_ReportYearCV'] = pd.to_numeric(dfout['in_ReportYearCV'], errors='coerce')
dfout['in_ReportYearCV'] = dfout['in_ReportYearCV'].fillna(0).astype('int64')

#in_PopulationServed
# having some issues converting this to an int
dfout['in_PopulationServed'] = pd.to_numeric(dfout['in_PopulationServed'], errors='coerce')
dfout['in_PopulationServed'] = dfout['in_PopulationServed'].fillna(0).astype('int64')

dfout.head(1)

Unnamed: 0,in_VariableCV,in_VariableSpecificCV,in_WaterSourceTypeCV,in_County,in_HUC8,in_Latitude,in_Longitude,in_PODorPOUSite,in_SiteName,in_SiteNativeID,in_Amount,in_BeneficialUseCategory,in_PopulationServed,in_ReportYearCV,in_TimeframeEnd,in_TimeframeStart,linkKey,in_WaterSourceNativeID
0,Withdrawal,Withdrawal_Annual_Unspecified_Groundwater,Groundwater,BERNALILLO,13050001.0,34.941213,-106.273851,POD,Unspecified,1.0,6.575,Unspecified,100,2010,2010-12-31,2010-01-01,BEARCAT HOMEOWNERS ASSOCIATION,WaDNMD_WS1


## Shapefile Data
- For attaching gemetry to csv inputs.

In [19]:
# PoU Shapefile Data
# Shapefile input
dfPoUshapetemp = gpd.read_file('shapefile/nm_pws_areas/nm_pws.shp')
print(len(dfPoUshapetemp))
dfPoUshapetemp.head(3)

605


Unnamed: 0,OBJECTI,ID,DWB_WL_,WtrS_ID,P_W_S_N,OSE_F_N,OSE2015,City,F2015_GW,F2015_S,F2015T_,F2010_S,F2010_GW,F2010T_,F2015_GP,F2010_GP,WWC,MGW,MSW,F2015_P,F2010_P,CO_2019,X_2019,Y_2019,Stt_OSE,WP_Regn,GW_B201,SW_B201,DWB_SYS,DWB_STA,CntN_HA,HUC_8,BndryQl,DWB_POP,WEC,Wt_S_ID,PblcSyN,CN,UTMX,UTMY,CITY_1,Plygn_B,SW_BASI,GW_BASI,SHAPE_L,SHAPE_A,cent_Lat,cent_Long,geometry
0,1,,,,,,,Santa Fe,,,,,,,,,,,,,,,,,,3,,,,,,,,,,NM3500626,LAS CAMPANAS WATER SYSTEM,49.0,404430,3952699,,pws-provided waterlines,RG,RG,19925.996784,14449560.0,35.713671,-106.056724,"POLYGON ((-106.03632 35.71107, -106.04264 35.7..."
1,2,,,,,,,Santa Fe,,,,,,,,,,,,,,,,,,3,,,,,,,,,,NM3505226,SUNLIT HILLS OF SANTA FE,49.0,416727,3939560,,pws-provided parcels,RG,RG,33507.155613,10118860.0,35.596275,-105.919275,"MULTIPOLYGON (((-105.90099 35.60942, -105.9029..."
2,3,,,,,,,Santa Fe,,,,,,,,,,,,,,,,,,3,,,,,,,,,,NM3505126,CITY OF SANTA FE WATER SYSTEM,49.0,412430,3947467,,pws-provided waterlines,RG,RG,111834.662607,96548580.0,35.666978,-105.969838,"POLYGON ((-106.01869 35.62822, -106.01868 35.6..."


In [20]:
columnsList = ['in_SiteNativeID', 'geometry']
dfPoUshape = pd.DataFrame(columns=columnsList)
dfPoUshape['in_SiteNativeID'] = dfPoUshapetemp['Wt_S_ID'].astype(str)
dfPoUshape['geometry'] = dfPoUshapetemp['geometry']
dfPoUshape = dfPoUshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfPoUshape.head(3)

Unnamed: 0,in_SiteNativeID,geometry
0,NM3500626,"POLYGON ((-106.03632 35.71107, -106.04264 35.7..."
1,NM3505226,"MULTIPOLYGON (((-105.90099 35.60942, -105.9029..."
2,NM3505126,"POLYGON ((-106.01869 35.62822, -106.01868 35.6..."


## Export Outputs

In [21]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

in_VariableCV                       object
in_VariableSpecificCV               object
in_WaterSourceTypeCV                object
in_County                           object
in_HUC8                             object
in_Latitude                        float64
in_Longitude                       float64
in_PODorPOUSite                     object
in_SiteName                         object
in_SiteNativeID                     object
in_Amount                          float64
in_BeneficialUseCategory            object
in_PopulationServed                  int64
in_ReportYearCV                      int64
in_TimeframeEnd             datetime64[ns]
in_TimeframeStart           datetime64[ns]
linkKey                             object
in_WaterSourceNativeID              object
dtype: object


In [22]:
#Exporting to Finished File
dfout.to_csv('P_nmSSPWMain.csv', index=False)  # The output
dfPoUshape.to_csv('P_nmSSPWGeometry.csv', index=False) # The output geometry.