# Working with NM Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

In [1]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/NewMexico/AggregatedAmounts"
os.chdir(workingDir)

#CSV input file
fileInput1 = "RawInputData/Summary of withdrawals by county 90-15.xlsx" 
df = pd.read_excel(fileInput1, header=0, sheet_name=0, skiprows=1, encoding = "ISO-8859-1")

#Shapefile input
AZ_AMA = gpd.read_file('RawInputData/NMCountShapeFile/CountyShape.shp')
dfshape = pd.DataFrame(AZ_AMA)

In [3]:
#check csv input
df

Unnamed: 0,CN,COUNTY,CAT,WSW,WGW,TW
0,1,Bernalillo,Public Water Supply,0.000000,125483.156250,125483.156250
1,1,Bernalillo,Domestic (self-supplied),0.000000,3561.899902,3561.899902
2,1,Bernalillo,Irrigated Agriculture,73727.000000,4037.000000,77764.000000
3,1,Bernalillo,Livestock (self-supplied),36.330002,753.200012,789.530029
4,1,Bernalillo,Commercial (self-supplied),0.000000,3711.300049,3711.300049
...,...,...,...,...,...,...
292,61,Valencia,Commercial (self-supplied),0.000000,1025.689941,1025.689941
293,61,Valencia,Industrial (self-supplied),0.000000,84.800003,84.800003
294,61,Valencia,Mining (self-supplied),0.000000,3.600000,3.600000
295,61,Valencia,Power (self-supplied),0.000000,0.000000,0.000000


In [4]:
#check shp input
dfshape

Unnamed: 0,GEOID,NAME,StateNum,State_RU,Name_State,geometry
0,35011,De Baca,35,35-35011,De Baca_35,"POLYGON ((-104.89241 34.25992, -104.89202 34.6..."
1,35029,Luna,35,35-35029,Luna_35,"POLYGON ((-108.22981 32.20716, -108.22934 32.5..."
2,35033,Mora,35,35-35033,Mora_35,"POLYGON ((-105.72471 35.90021, -105.71861 35.9..."
3,35045,San Juan,35,35-35045,San Juan_35,"POLYGON ((-109.04602 36.25059, -109.04522 36.9..."
4,35041,Roosevelt,35,35-35041,Roosevelt_35,"POLYGON ((-103.94902 34.39014, -103.94878 34.6..."
5,35051,Sierra,35,35-35051,Sierra_35,"POLYGON ((-108.00060 33.47801, -107.93896 33.4..."
6,35021,Harding,35,35-35021,Harding_35,"POLYGON ((-104.43904 36.19482, -104.42679 36.1..."
7,35007,Colfax,35,35-35007,Colfax_35,"POLYGON ((-105.37770 36.55230, -105.37530 36.5..."
8,35061,Valencia,35,35-35061,Valencia_35,"POLYGON ((-107.20333 34.78303, -107.20288 34.9..."
9,35057,Torrance,35,35-35057,Torrance_35,"POLYGON ((-106.47118 34.60842, -106.46487 34.6..."


In [5]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df.dtypes)

CN          int64
COUNTY     object
CAT        object
WSW       float64
WGW       float64
TW        float64
dtype: object


In [6]:
# combine multiple sheets to one dataFrame

startYear = 1990
endYear = 2015
numSheets = 5
yearList = np.linspace(startYear, endYear, numSheets)
df100_list = []

for isx in range (numSheets):
    df = pd.read_excel(fileInput1, header=0, sheet_name=isx, skiprows=1, encoding = "ISO-8859-1")
    df = df.assign(ReportYearCV=yearList[isx])
    df.ReportYearCV = df.ReportYearCV.astype(int)
    df100_list.append(df)
    
df100 = pd.concat(df100_list, sort=True)

df100

Unnamed: 0,CAT,CN,COUNTY,ReportYearCV,TW,WGW,WSW
0,Public Water Supply,1,Bernalillo,1990,125483.156250,125483.156250,0.000000
1,Domestic (self-supplied),1,Bernalillo,1990,3561.899902,3561.899902,0.000000
2,Irrigated Agriculture,1,Bernalillo,1990,77764.000000,4037.000000,73727.000000
3,Livestock (self-supplied),1,Bernalillo,1990,789.530029,753.200012,36.330002
4,Commercial (self-supplied),1,Bernalillo,1990,3711.300049,3711.300049,0.000000
...,...,...,...,...,...,...,...
292,Livestock (self-supplied),61,Valencia,2015,888.255259,841.114994,47.140265
293,Mining (self-supplied),61,Valencia,2015,178.559000,178.559000,0.000000
294,Power (self-supplied),61,Valencia,2015,6.000000,6.000000,0.000000
295,Public Water Supply,61,Valencia,2015,6553.686693,6553.686693,0.000000


In [7]:
print("WaterSourceTypeCV and Amount...")

# each row above has two amounts: surface and ground water 
# create separate tables for groundwater amount and surface amount and concatenate them

df100_1 = df100[['COUNTY', 'CAT', 'ReportYearCV', 'WGW']]
df100_2 = df100[['COUNTY', 'CAT', 'ReportYearCV', 'WSW']]
df100_1 = df100_1.rename(columns={"WGW": "Amount"})
df100_2 = df100_2.rename(columns={"WSW": "Amount"})

# water source id for each amount type
df100_1 = df100_1.assign(WaterSourceTypeCV = "Ground Water")
df100_2 = df100_2.assign(WaterSourceTypeCV = "Surface Water")

# concat the two
df100=pd.concat([df100_1, df100_2], sort=True, ignore_index=True)

print (len(df100.index))

df100.head(5)
df100.tail(5)
df100

WaterSourceTypeCV and Amount...
2970


Unnamed: 0,Amount,CAT,COUNTY,ReportYearCV,WaterSourceTypeCV
0,125483.156250,Public Water Supply,Bernalillo,1990,Ground Water
1,3561.899902,Domestic (self-supplied),Bernalillo,1990,Ground Water
2,4037.000000,Irrigated Agriculture,Bernalillo,1990,Ground Water
3,753.200012,Livestock (self-supplied),Bernalillo,1990,Ground Water
4,3711.300049,Commercial (self-supplied),Bernalillo,1990,Ground Water
...,...,...,...,...,...
2965,47.140265,Livestock (self-supplied),Valencia,2015,Surface Water
2966,0.000000,Mining (self-supplied),Valencia,2015,Surface Water
2967,0.000000,Power (self-supplied),Valencia,2015,Surface Water
2968,0.000000,Public Water Supply,Valencia,2015,Surface Water


In [8]:
#transfer WKT gemetry from dfshape
def retrieveGeometry(colrowValue, dfshape):
    if (colrowValue == '') or (pd.isnull(colrowValue)):
        outList = ''
    else:
        ml = dfshape.loc[(dfshape['NAME'] == colrowValue), 'geometry']
        if not(ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df100['Geometry'] = df100.apply(lambda row: retrieveGeometry(row['COUNTY'], dfshape), axis=1)
df100

Unnamed: 0,Amount,CAT,COUNTY,ReportYearCV,WaterSourceTypeCV,Geometry
0,125483.156250,Public Water Supply,Bernalillo,1990,Ground Water,"POLYGON ((-107.196763 35.219458, -106.658138 3..."
1,3561.899902,Domestic (self-supplied),Bernalillo,1990,Ground Water,"POLYGON ((-107.196763 35.219458, -106.658138 3..."
2,4037.000000,Irrigated Agriculture,Bernalillo,1990,Ground Water,"POLYGON ((-107.196763 35.219458, -106.658138 3..."
3,753.200012,Livestock (self-supplied),Bernalillo,1990,Ground Water,"POLYGON ((-107.196763 35.219458, -106.658138 3..."
4,3711.300049,Commercial (self-supplied),Bernalillo,1990,Ground Water,"POLYGON ((-107.196763 35.219458, -106.658138 3..."
...,...,...,...,...,...,...
2965,47.140265,Livestock (self-supplied),Valencia,2015,Surface Water,"POLYGON ((-107.203335 34.78303, -107.202882 34..."
2966,0.000000,Mining (self-supplied),Valencia,2015,Surface Water,"POLYGON ((-107.203335 34.78303, -107.202882 34..."
2967,0.000000,Power (self-supplied),Valencia,2015,Surface Water,"POLYGON ((-107.203335 34.78303, -107.202882 34..."
2968,0.000000,Public Water Supply,Valencia,2015,Surface Water,"POLYGON ((-107.203335 34.78303, -107.202882 34..."


In [10]:
#Export to CSV

#Printing file to xlsx
df100.to_csv('RawInputData/P_NMagg.csv', index=False)