# Working with NM Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database. Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

Notes
- Date Updated: 05/03/2022
- There are 9 beneficial uses and 2 different water source types = 18 different time series of interest.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

# Working Directory
workingDir = "G:/Shared drives/WaDE Data/NewMexico/AggregatedAmounts"
os.chdir(workingDir)

## Water Use Data
- 5 different sheets of 1990, 1995, 2000, 2005, 2010, & 2015
- Merge into one dataframe

In [2]:
# timeseries input file
# combine multiple sheets to one dataFrame
# use sheet name as ReportYearCV

workbook = pd.ExcelFile('RawInputData/Summary of withdrawals by county 90-15_input.xlsx')
sheets = workbook.sheet_names
df = pd.concat([pd.read_excel(workbook, sheet_name=s).assign(ReportYearCV=s) for s in sheets])
print(len(df))
df.head()

1782


Unnamed: 0,CN,COUNTY,CAT,WSW,WGW,TW,ReportYearCV
0,1,Bernalillo,Public Water Supply,0.0,125483.15625,125483.15625,1990
1,1,Bernalillo,Domestic (self-supplied),0.0,3561.899902,3561.899902,1990
2,1,Bernalillo,Irrigated Agriculture,73727.0,4037.0,77764.0,1990
3,1,Bernalillo,Livestock (self-supplied),36.330002,753.200012,789.530029,1990
4,1,Bernalillo,Commercial (self-supplied),0.0,3711.300049,3711.300049,1990


In [3]:
for x in df['ReportYearCV'].sort_values().unique():
    print(f'"' + x + '",')

"1990",
"1995",
"2000",
"2005",
"2010",
"2015",


In [4]:
for x in df['CAT'].sort_values().unique():
    print(f'"' + x + '",')

"Commercial (self-supplied)",
"Domestic (self-supplied)",
"Industrial (self-supplied)",
"Irrigated Agriculture",
"Livestock (self-supplied)",
"Mining (self-supplied)",
"Power (self-supplied)",
"Public Water Supply",
"Reservoir Evaporation",


In [5]:
for x in df['CN'].sort_values().unique():
    print(f'"' + str(x) + '",')

"1",
"3",
"5",
"6",
"7",
"9",
"11",
"13",
"15",
"17",
"19",
"21",
"23",
"25",
"27",
"28",
"29",
"31",
"33",
"35",
"37",
"39",
"41",
"43",
"45",
"47",
"49",
"51",
"53",
"55",
"57",
"59",
"61",


## Extract timeseries info

### groundwater (WGW)

In [6]:
# 1, "Withdrawal_Annual_Commercial_Groundwater"

# create dataframe
df1_cg = pd.DataFrame(index=df.index)
df1_cg['dfNum'] = 1

# Variable Info
df1_cg['in_VariableSpecificCV'] = "Withdrawal_Annual_Commercial_Groundwater"  # update this

# WaterSource Info
df1_cg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df1_cg['in_ReportingUnitName'] = df['COUNTY']
df1_cg['in_ReportingUnitNativeID'] = df['CN']
df1_cg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df1_cg['in_ReportYearCV'] = df['ReportYearCV']
df1_cg['in_Amount'] = df['WGW']  # update this
df1_cg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df1_cg = df1_cg[df1_cg['in_BeneficialUseCategory'] == 'Commercial (self-supplied)'].reset_index(drop=True)  # update this

print(len(df1_cg))
df1_cg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Bernalillo,1,County,1990,3711.300049,Commercial (self-supplied)


In [7]:
# 2, "Withdrawal_Annual_Domestic_Groundwater"

# create dataframe
df2_dg = pd.DataFrame(index=df.index)
df2_dg['dfNum'] = 2

# Variable Info
df2_dg['in_VariableSpecificCV'] = "Withdrawal_Annual_Domestic_Groundwater" # update this

# WaterSource Info
df2_dg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df2_dg['in_ReportingUnitName'] = df['COUNTY']
df2_dg['in_ReportingUnitNativeID'] = df['CN']
df2_dg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df2_dg['in_ReportYearCV'] = df['ReportYearCV']
df2_dg['in_Amount'] = df['WGW']  # update this
df2_dg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df2_dg = df2_dg[df2_dg['in_BeneficialUseCategory'] == 'Domestic (self-supplied)'].reset_index(drop=True)  # update this

print(len(df2_dg))
df2_dg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,2,Withdrawal_Annual_Domestic_Groundwater,Groundwater,Bernalillo,1,County,1990,3561.899902,Domestic (self-supplied)


In [8]:
# 3, "Withdrawal_Annual_Industrial_Groundwater"

# create dataframe
df3_ig = pd.DataFrame(index=df.index)
df3_ig['dfNum'] = 3

# Variable Info
df3_ig['in_VariableSpecificCV'] = "Withdrawal_Annual_Industrial_Groundwater"  # update this

# WaterSource Info
df3_ig['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df3_ig['in_ReportingUnitName'] = df['COUNTY']
df3_ig['in_ReportingUnitNativeID'] = df['CN']
df3_ig['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df3_ig['in_ReportYearCV'] = df['ReportYearCV']
df3_ig['in_Amount'] = df['WGW']  # update this
df3_ig['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df3_ig = df3_ig[df3_ig['in_BeneficialUseCategory'] == 'Industrial (self-supplied)'].reset_index(drop=True)  # update this

print(len(df3_ig))
df3_ig.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,3,Withdrawal_Annual_Industrial_Groundwater,Groundwater,Bernalillo,1,County,1990,485.049988,Industrial (self-supplied)


In [9]:
# 4, "Withdrawal_Annual_Irrigated Agriculture_Groundwater"

# create dataframe
df4_ag = pd.DataFrame(index=df.index)
df4_ag['dfNum'] = 4

# Variable Info
df4_ag['in_VariableSpecificCV'] = "Withdrawal_Annual_Irrigated Agriculture_Groundwater"  # update this

# WaterSource Info
df4_ag['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df4_ag['in_ReportingUnitName'] = df['COUNTY']
df4_ag['in_ReportingUnitNativeID'] = df['CN']
df4_ag['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df4_ag['in_ReportYearCV'] = df['ReportYearCV']
df4_ag['in_Amount'] = df['WGW']  # update this
df4_ag['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df4_ag = df4_ag[df4_ag['in_BeneficialUseCategory'] == 'Irrigated Agriculture'].reset_index(drop=True)  # update this

print(len(df4_ag))
df4_ag.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,4,Withdrawal_Annual_Irrigated Agriculture_Ground...,Groundwater,Bernalillo,1,County,1990,4037.0,Irrigated Agriculture


In [10]:
# 5, "Withdrawal_Annual_Livestock_Groundwater"

# create dataframe
df5_lg = pd.DataFrame(index=df.index)
df5_lg['dfNum'] = 5

# Variable Info
df5_lg['in_VariableSpecificCV'] = "Withdrawal_Annual_Livestock_Groundwater"  # update this

# WaterSource Info
df5_lg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df5_lg['in_ReportingUnitName'] = df['COUNTY']
df5_lg['in_ReportingUnitNativeID'] = df['CN']
df5_lg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df5_lg['in_ReportYearCV'] = df['ReportYearCV']
df5_lg['in_Amount'] = df['WGW']  # update this
df5_lg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df5_lg = df5_lg[df5_lg['in_BeneficialUseCategory'] == 'Livestock (self-supplied)'].reset_index(drop=True)  # update this

print(len(df5_lg))
df5_lg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,5,Withdrawal_Annual_Livestock_Groundwater,Groundwater,Bernalillo,1,County,1990,753.200012,Livestock (self-supplied)


In [11]:
# 6, "Withdrawal_Annual_Mining_Groundwater"

# create dataframe
df6_mg = pd.DataFrame(index=df.index)
df6_mg['dfNum'] = 6

# Variable Info
df6_mg['in_VariableSpecificCV'] = "Withdrawal_Annual_Mining_Groundwater"  # update this

# WaterSource Info
df6_mg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df6_mg['in_ReportingUnitName'] = df['COUNTY']
df6_mg['in_ReportingUnitNativeID'] = df['CN']
df6_mg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df6_mg['in_ReportYearCV'] = df['ReportYearCV']
df6_mg['in_Amount'] = df['WGW']  # update this
df6_mg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df6_mg = df6_mg[df6_mg['in_BeneficialUseCategory'] == 'Mining (self-supplied)'].reset_index(drop=True)  # update this

print(len(df6_mg))
df6_mg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,6,Withdrawal_Annual_Mining_Groundwater,Groundwater,Bernalillo,1,County,1990,324.73999,Mining (self-supplied)


In [12]:
# 7, "Withdrawal_Annual_Power_Groundwater"

# create dataframe
df7_pg = pd.DataFrame(index=df.index)
df7_pg['dfNum'] = 7

# Variable Info
df7_pg['in_VariableSpecificCV'] = "Withdrawal_Annual_Power_Groundwater"  # update this

# WaterSource Info
df7_pg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df7_pg['in_ReportingUnitName'] = df['COUNTY']
df7_pg['in_ReportingUnitNativeID'] = df['CN']
df7_pg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df7_pg['in_ReportYearCV'] = df['ReportYearCV']
df7_pg['in_Amount'] = df['WGW']  # update this
df7_pg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df7_pg = df7_pg[df7_pg['in_BeneficialUseCategory'] == 'Power (self-supplied)'].reset_index(drop=True)  # update this

print(len(df7_pg))
df7_pg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,7,Withdrawal_Annual_Power_Groundwater,Groundwater,Bernalillo,1,County,1990,179.360001,Power (self-supplied)


In [13]:
# 8, "Withdrawal_Annual_Public Supply_Groundwater"

# create dataframe
df8_psg = pd.DataFrame(index=df.index)
df8_psg['dfNum'] = 8

# Variable Info
df8_psg['in_VariableSpecificCV'] = "Withdrawal_Annual_Public Supply_Groundwater"  # update this

# WaterSource Info
df8_psg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df8_psg['in_ReportingUnitName'] = df['COUNTY']
df8_psg['in_ReportingUnitNativeID'] = df['CN']
df8_psg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df8_psg['in_ReportYearCV'] = df['ReportYearCV']
df8_psg['in_Amount'] = df['WGW']  # update this
df8_psg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df8_psg = df8_psg[df8_psg['in_BeneficialUseCategory'] == 'Public Water Supply'].reset_index(drop=True)  # update this

print(len(df8_psg))
df8_psg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,8,Withdrawal_Annual_Public Supply_Groundwater,Groundwater,Bernalillo,1,County,1990,125483.15625,Public Water Supply


In [14]:
# 9, "Withdrawal_Annual_Reservoir Evaporation_Groundwater"

# create dataframe
df9_reg = pd.DataFrame(index=df.index)
df9_reg['dfNum'] = 9

# Variable Info
df9_reg['in_VariableSpecificCV'] = "Withdrawal_Annual_Reservoir Evaporation_Groundwater"  # update this

# WaterSource Info
df9_reg['in_WaterSourceTypeCV'] = "Groundwater"  # update this

# ReportingUnit Info
df9_reg['in_ReportingUnitName'] = df['COUNTY']
df9_reg['in_ReportingUnitNativeID'] = df['CN']
df9_reg['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df9_reg['in_ReportYearCV'] = df['ReportYearCV']
df9_reg['in_Amount'] = df['WGW']  # update this
df9_reg['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df9_reg = df9_reg[df9_reg['in_BeneficialUseCategory'] == 'Reservoir Evaporation'].reset_index(drop=True)  # update this

print(len(df9_reg))
df9_reg.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,9,Withdrawal_Annual_Reservoir Evaporation_Ground...,Groundwater,Bernalillo,1,County,1990,0.0,Reservoir Evaporation


In [15]:
# Concatenate Groundwater timeseries DataFrames
frames = [df1_cg, df2_dg, df3_ig, df4_ag, df5_lg, df6_mg, df7_pg, df8_psg, df9_reg]
dfground = pd.concat(frames).reset_index(drop=True)

print(len(dfground))
dfground.head(1)

1782


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Bernalillo,1,County,1990,3711.300049,Commercial (self-supplied)


### surface water (WSW)

In [16]:
# 10, "Withdrawal_Annual_Commercial_Surface Water"

# create dataframe
df10_cs = pd.DataFrame(index=df.index)
df10_cs['dfNum'] = 10

# Variable Info
df10_cs['in_VariableSpecificCV'] = "Withdrawal_Annual_Commercial_Surface Water"  # update this

# WaterSource Info
df10_cs['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df10_cs['in_ReportingUnitName'] = df['COUNTY']
df10_cs['in_ReportingUnitNativeID'] = df['CN']
df10_cs['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df10_cs['in_ReportYearCV'] = df['ReportYearCV']
df10_cs['in_Amount'] = df['WSW']  # update this
df10_cs['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df10_cs = df10_cs[df10_cs['in_BeneficialUseCategory'] == 'Commercial (self-supplied)'].reset_index(drop=True)  # update this

print(len(df10_cs))
df10_cs.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,10,Withdrawal_Annual_Commercial_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Commercial (self-supplied)


In [17]:
# 11, "Withdrawal_Annual_Domestic_Surface Water"

# create dataframe
df11_ds = pd.DataFrame(index=df.index)
df11_ds['dfNum'] = 11

# Variable Info
df11_ds['in_VariableSpecificCV'] = "Withdrawal_Annual_Domestic_Surface Water"  # update this

# WaterSource Info
df11_ds['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df11_ds['in_ReportingUnitName'] = df['COUNTY']
df11_ds['in_ReportingUnitNativeID'] = df['CN']
df11_ds['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df11_ds['in_ReportYearCV'] = df['ReportYearCV']
df11_ds['in_Amount'] = df['WSW']  # update this
df11_ds['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df11_ds = df11_ds[df11_ds['in_BeneficialUseCategory'] == 'Domestic (self-supplied)'].reset_index(drop=True)  # update this

print(len(df11_ds))
df11_ds.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,11,Withdrawal_Annual_Domestic_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Domestic (self-supplied)


In [18]:
# 12, "Withdrawal_Annual_Industrial_Surface Water"

# create dataframe
df12_is = pd.DataFrame(index=df.index)
df12_is['dfNum'] = 12

# Variable Info
df12_is['in_VariableSpecificCV'] = "Withdrawal_Annual_Industrial_Surface Water"  # update this

# WaterSource Info
df12_is['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df12_is['in_ReportingUnitName'] = df['COUNTY']
df12_is['in_ReportingUnitNativeID'] = df['CN']
df12_is['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df12_is['in_ReportYearCV'] = df['ReportYearCV']
df12_is['in_Amount'] = df['WSW']  # update this
df12_is['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df12_is = df12_is[df12_is['in_BeneficialUseCategory'] == 'Industrial (self-supplied)'].reset_index(drop=True)  # update this

print(len(df12_is))
df12_is.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,12,Withdrawal_Annual_Industrial_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Industrial (self-supplied)


In [19]:
# 13, "Withdrawal_Annual_Irrigated Agriculture_Surface Water"

# create dataframe
df13_as = pd.DataFrame(index=df.index)
df13_as['dfNum'] = 13

# Variable Info
df13_as['in_VariableSpecificCV'] = "Withdrawal_Annual_Irrigated Agriculture_Surface Water"  # update this

# WaterSource Info
df13_as['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df13_as['in_ReportingUnitName'] = df['COUNTY']
df13_as['in_ReportingUnitNativeID'] = df['CN']
df13_as['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df13_as['in_ReportYearCV'] = df['ReportYearCV']
df13_as['in_Amount'] = df['WSW']  # update this
df13_as['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df13_as = df13_as[df13_as['in_BeneficialUseCategory'] == 'Irrigated Agriculture'].reset_index(drop=True)  # update this

print(len(df13_as))
df13_as.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,13,Withdrawal_Annual_Irrigated Agriculture_Surfac...,Surface Water,Bernalillo,1,County,1990,73727.0,Irrigated Agriculture


In [20]:
# 14, "Withdrawal_Annual_Livestock_Surface Water"

# create dataframe
df14_ls = pd.DataFrame(index=df.index)
df14_ls['dfNum'] = 14

# Variable Info
df14_ls['in_VariableSpecificCV'] = "Withdrawal_Annual_Livestock_Surface Water"  # update this

# WaterSource Info
df14_ls['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df14_ls['in_ReportingUnitName'] = df['COUNTY']
df14_ls['in_ReportingUnitNativeID'] = df['CN']
df14_ls['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df14_ls['in_ReportYearCV'] = df['ReportYearCV']
df14_ls['in_Amount'] = df['WSW']  # update this
df14_ls['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df14_ls = df14_ls[df14_ls['in_BeneficialUseCategory'] == 'Livestock (self-supplied)'].reset_index(drop=True)  # update this

print(len(df14_ls))
df14_ls.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,14,Withdrawal_Annual_Livestock_Surface Water,Surface Water,Bernalillo,1,County,1990,36.330002,Livestock (self-supplied)


In [21]:
# 15, "Withdrawal_Annual_Mining_Surface Water"

# create dataframe
df15_ms = pd.DataFrame(index=df.index)
df15_ms['dfNum'] = 15

# Variable Info
df15_ms['in_VariableSpecificCV'] = "Withdrawal_Annual_Mining_Surface Water"  # update this

# WaterSource Info
df15_ms['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df15_ms['in_ReportingUnitName'] = df['COUNTY']
df15_ms['in_ReportingUnitNativeID'] = df['CN']
df15_ms['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df15_ms['in_ReportYearCV'] = df['ReportYearCV']
df15_ms['in_Amount'] = df['WSW']  # update this
df15_ms['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df15_ms = df15_ms[df15_ms['in_BeneficialUseCategory'] == 'Mining (self-supplied)'].reset_index(drop=True)  # update this

print(len(df15_ms))
df15_ms.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,15,Withdrawal_Annual_Mining_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Mining (self-supplied)


In [22]:
# 16, "Withdrawal_Annual_Power_Surface Water"

# create dataframe
df16_ps = pd.DataFrame(index=df.index)
df16_ps['dfNum'] = 16

# Variable Info
df16_ps['in_VariableSpecificCV'] = "Withdrawal_Annual_Power_Surface Water"  # update this

# WaterSource Info
df16_ps['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df16_ps['in_ReportingUnitName'] = df['COUNTY']
df16_ps['in_ReportingUnitNativeID'] = df['CN']
df16_ps['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df16_ps['in_ReportYearCV'] = df['ReportYearCV']
df16_ps['in_Amount'] = df['WSW']  # update this
df16_ps['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df16_ps = df16_ps[df16_ps['in_BeneficialUseCategory'] == 'Power (self-supplied)'].reset_index(drop=True)  # update this

print(len(df16_ps))
df16_ps.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,16,Withdrawal_Annual_Power_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Power (self-supplied)


In [23]:
# 17, "Withdrawal_Annual_Public Supply_Surface Water"

# create dataframe
df17_pss = pd.DataFrame(index=df.index)
df17_pss['dfNum'] = 17

# Variable Info
df17_pss['in_VariableSpecificCV'] = "Withdrawal_Annual_Public Supply_Surface Water"  # update this

# WaterSource Info
df17_pss['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df17_pss['in_ReportingUnitName'] = df['COUNTY']
df17_pss['in_ReportingUnitNativeID'] = df['CN']
df17_pss['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df17_pss['in_ReportYearCV'] = df['ReportYearCV']
df17_pss['in_Amount'] = df['WSW']  # update this
df17_pss['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df17_pss = df17_pss[df17_pss['in_BeneficialUseCategory'] == 'Public Water Supply'].reset_index(drop=True)  # update this

print(len(df17_pss))
df17_pss.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,17,Withdrawal_Annual_Public Supply_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Public Water Supply


In [24]:
# 18, "Withdrawal_Annual_Reservoir Evaporation_Surface Water"

# create dataframe
df18_res = pd.DataFrame(index=df.index)
df18_res['dfNum'] = 18

# Variable Info
df18_res['in_VariableSpecificCV'] = "Withdrawal_Annual_Reservoir Evaporation_Surface Water"  # update this

# WaterSource Info
df18_res['in_WaterSourceTypeCV'] = "Surface Water"  # update this

# ReportingUnit Info
df18_res['in_ReportingUnitName'] = df['COUNTY']
df18_res['in_ReportingUnitNativeID'] = df['CN']
df18_res['in_ReportingUnitTypeCV'] = "County"

# AggregatedAmounts Info
df18_res['in_ReportYearCV'] = df['ReportYearCV']
df18_res['in_Amount'] = df['WSW']  # update this
df18_res['in_BeneficialUseCategory'] = df['CAT']

# filter by benefical use
df18_res = df18_res[df18_res['in_BeneficialUseCategory'] == 'Reservoir Evaporation'].reset_index(drop=True)  # update this

print(len(df18_res))
df18_res.head(1)

198


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,18,Withdrawal_Annual_Reservoir Evaporation_Surfac...,Surface Water,Bernalillo,1,County,1990,0.0,Reservoir Evaporation


In [25]:
# Concatenate Surface Water timeseries DataFrames
frames = [df10_cs, df11_ds, df12_is, df13_as, df14_ls, df15_ms, df16_ps, df17_pss, df18_res]
dfsurface = pd.concat(frames).reset_index(drop=True)

print(len(dfsurface))
dfsurface.head(1)

1782


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,10,Withdrawal_Annual_Commercial_Surface Water,Surface Water,Bernalillo,1,County,1990,0.0,Commercial (self-supplied)


### Concatenate together

In [26]:
# Concatenate Groundwater & Surface Water timeseries DataFrames together into single output
frames = [dfground, dfsurface]
dfout = pd.concat(frames).reset_index(drop=True)

print(len(dfout))
dfout.head(1)

3564


Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory
0,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Bernalillo,1,County,1990,3711.300049,Commercial (self-supplied)


## WaDE Custom Elements (due to missing info)

In [27]:
# adding start and & end values.
# Not provided

dfout['in_TimeframeStart'] = '01/01/' + dfout['in_ReportYearCV'].astype(str)
dfout['in_TimeframeEnd'] = '12/31/' + dfout['in_ReportYearCV'].astype(str)
dfout.head()

Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory,in_TimeframeStart,in_TimeframeEnd
0,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Bernalillo,1,County,1990,3711.300049,Commercial (self-supplied),01/01/1990,12/31/1990
1,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Catron,3,County,1990,16.35,Commercial (self-supplied),01/01/1990,12/31/1990
2,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Chaves,5,County,1990,2801.659912,Commercial (self-supplied),01/01/1990,12/31/1990
3,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Cibola,6,County,1990,53.779999,Commercial (self-supplied),01/01/1990,12/31/1990
4,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Colfax,7,County,1990,362.649994,Commercial (self-supplied),01/01/1990,12/31/1990


In [28]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENM_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dfout['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfout['in_WaterSourceNativeID'] = dfout.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
dfout

Unnamed: 0,dfNum,in_VariableSpecificCV,in_WaterSourceTypeCV,in_ReportingUnitName,in_ReportingUnitNativeID,in_ReportingUnitTypeCV,in_ReportYearCV,in_Amount,in_BeneficialUseCategory,in_TimeframeStart,in_TimeframeEnd,in_WaterSourceNativeID
0,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Bernalillo,1,County,1990,3711.300049,Commercial (self-supplied),01/01/1990,12/31/1990,WaDENM_WS1
1,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Catron,3,County,1990,16.350000,Commercial (self-supplied),01/01/1990,12/31/1990,WaDENM_WS1
2,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Chaves,5,County,1990,2801.659912,Commercial (self-supplied),01/01/1990,12/31/1990,WaDENM_WS1
3,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Cibola,6,County,1990,53.779999,Commercial (self-supplied),01/01/1990,12/31/1990,WaDENM_WS1
4,1,Withdrawal_Annual_Commercial_Groundwater,Groundwater,Colfax,7,County,1990,362.649994,Commercial (self-supplied),01/01/1990,12/31/1990,WaDENM_WS1
...,...,...,...,...,...,...,...,...,...,...,...,...
3559,18,Withdrawal_Annual_Reservoir Evaporation_Surfac...,Surface Water,Socorro,53,County,2015,7570.000000,Reservoir Evaporation,01/01/2015,12/31/2015,WaDENM_WS2
3560,18,Withdrawal_Annual_Reservoir Evaporation_Surfac...,Surface Water,Taos,55,County,2015,226.205000,Reservoir Evaporation,01/01/2015,12/31/2015,WaDENM_WS2
3561,18,Withdrawal_Annual_Reservoir Evaporation_Surfac...,Surface Water,Torrance,57,County,2015,0.000000,Reservoir Evaporation,01/01/2015,12/31/2015,WaDENM_WS2
3562,18,Withdrawal_Annual_Reservoir Evaporation_Surfac...,Surface Water,Union,59,County,2015,171.380000,Reservoir Evaporation,01/01/2015,12/31/2015,WaDENM_WS2


## Shapefile Data

In [29]:
# Basin Shapefile Data
shapeInput = "RawInputData/NMCountShapeFile/CountyShape.shp"
dfshape = gpd.read_file(shapeInput)

print(len(dfshape))
dfshape.head(3)

33


Unnamed: 0,GEOID,NAME,StateNum,State_RU,Name_State,Shape_Leng,Shape_Area,geometry
0,35011,De Baca,35,35-35011,De Baca_35,3.45123,0.592267,"POLYGON ((-104.89241 34.25992, -104.89202 34.6..."
1,35029,Luna,35,35-35029,Luna_35,3.504984,0.734031,"POLYGON ((-108.22981 32.20716, -108.22934 32.5..."
2,35033,Mora,35,35-35033,Mora_35,3.907811,0.499959,"POLYGON ((-105.72471 35.90021, -105.71861 35.9..."


In [30]:
# shapefile output dataframe
columnList = ['in_ReportingUnitName', 
              'in_Geomerty']
dfshapeOut = pd.DataFrame(columns=columnList, index=dfshape.index)

dfshapeOut['in_ReportingUnitName'] = dfshape['NAME']
dfshapeOut['in_Geomerty'] = dfshape['geometry']

dfshapeOut = dfshapeOut.drop_duplicates().reset_index(drop=True)
print(len(dfshapeOut))
dfshapeOut.head(3)

33


Unnamed: 0,in_ReportingUnitName,in_Geomerty
0,De Baca,"POLYGON ((-104.89241 34.25992, -104.89202 34.6..."
1,Luna,"POLYGON ((-108.22981 32.20716, -108.22934 32.5..."
2,Mora,"POLYGON ((-105.72471 35.90021, -105.71861 35.9..."


## Inspect Output Data & Export

In [31]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

dfNum                         int64
in_VariableSpecificCV        object
in_WaterSourceTypeCV         object
in_ReportingUnitName         object
in_ReportingUnitNativeID      int64
in_ReportingUnitTypeCV       object
in_ReportYearCV              object
in_Amount                   float64
in_BeneficialUseCategory     object
in_TimeframeStart            object
in_TimeframeEnd              object
in_WaterSourceNativeID       object
dtype: object


In [32]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfshapeOut.dtypes)

in_ReportingUnitName      object
in_Geomerty             geometry
dtype: object


In [33]:
# Export out to CSV.
dfout.to_csv('RawInputData/P_nmAgMaster.csv', index=False) # The output.
dfshapeOut.to_csv('RawInputData/P_nmAgGeometry.csv', index=False) # The output geometry.

In [34]:
for x in dfout['in_ReportingUnitNativeID'].sort_values().unique():
    print(f'"' + str(x) + '",')

"1",
"3",
"5",
"6",
"7",
"9",
"11",
"13",
"15",
"17",
"19",
"21",
"23",
"25",
"27",
"28",
"29",
"31",
"33",
"35",
"37",
"39",
"41",
"43",
"45",
"47",
"49",
"51",
"53",
"55",
"57",
"59",
"61",
