In [1]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse
from utilityFunctions import *

In [2]:
# working directory
working_dir = "C:/tseg/NMTest/aggregatedamounts/"
os.chdir(working_dir)

In [3]:
target_columns = ["ReportingUnitUUID", "ReportingUnitNativeID", "ReportingUnitName", 
                  "ReportingUnitTypeCV", "ReportingUnitUpdateDate", "ReportingUnitProductVersion",
                  "StateCV", "EPSGCodeCV", "Geometry"]

In [4]:
outdf100 = pd.DataFrame(columns=target_columns)

In [5]:
# Input files
fileInput1 = "Summary of withdrawals by county 90-15.xlsx" 
fileInput2 = "Summary of withdrawals by River Basin 90-15.xlsx" 

In [6]:
df10 = pd.read_excel(fileInput1, header=0, sheet_name=0, skiprows=1, encoding = "ISO-8859-1")
#df20 = pd.read_excel(fileInput2, header=0, sheet_name=0, skiprows=1, encoding = "ISO-8859-1")
list(df10.columns)
#list(df20.columns)

['CN', 'COUNTY', 'CAT', 'WSW', 'WGW', 'TW']

In [7]:
# combine multiple sheets to one dataFrame

startYear = 1990
endYear = 2015
numSheets = 5
yearList = np.linspace(startYear, endYear, numSheets)
df100_list = []
for isx in range (numSheets):
    df10 = pd.read_excel(fileInput1, header=0, sheet_name=isx, skiprows=1, encoding = "ISO-8859-1")
    df10 = df10.assign(ReportYearCV=yearList[isx])
    df10.ReportYearCV = df10.ReportYearCV.astype(int)
    df100_list.append(df10)
    
df100 = pd.concat(df100_list, sort=True)

df100

Unnamed: 0,CAT,CN,COUNTY,ReportYearCV,TW,WGW,WSW
0,Public Water Supply,1,Bernalillo,1990,125483.156250,125483.156250,0.000000
1,Domestic (self-supplied),1,Bernalillo,1990,3561.899902,3561.899902,0.000000
2,Irrigated Agriculture,1,Bernalillo,1990,77764.000000,4037.000000,73727.000000
3,Livestock (self-supplied),1,Bernalillo,1990,789.530029,753.200012,36.330002
4,Commercial (self-supplied),1,Bernalillo,1990,3711.300049,3711.300049,0.000000
5,Industrial (self-supplied),1,Bernalillo,1990,485.049988,485.049988,0.000000
6,Mining (self-supplied),1,Bernalillo,1990,324.739990,324.739990,0.000000
7,Power (self-supplied),1,Bernalillo,1990,179.360001,179.360001,0.000000
8,Reservoir Evaporation,1,Bernalillo,1990,0.000000,0.000000,0.000000
9,Public Water Supply,3,Catron,1990,125.440002,125.440002,0.000000


In [8]:
print("Copying County name...")

# all we need from input is county name

# first remove redundancies
df100 = df100.drop_duplicates(subset=["COUNTY"])   #
df100 = df100.reset_index(drop=True)

print(len(df100.index))

outdf100["ReportingUnitName"] = df100["COUNTY"]

Copying County name...
33


In [9]:
# ReportingUnitNativeID	 	 Auto generate
outdf100['ReportingUnitNativeID'] = range(1, len(outdf100.index) + 1)

#ReportingUnitUUID	 	 NM_NativeID
outdf100['ReportingUnitUUID'] = outdf100.apply(lambda row: "_".join(["NM", str(row['ReportingUnitNativeID'])]), axis=1)


In [10]:
# hardcoded

outdf100.StateCV = "NM"
outdf100.ReportingUnitTypeCV = "County"
outdf100.EPSGCodeCV = "EPSG:4326"
sampleWKT =  'POLYGON((-99.54319297853704 37.15853229006052, -97.26976797641987 37.15759429005948, -105.11636298372741 37.14764529005038, -104.52740598317905 37.15119229005359, -104.09963198278069 37.15376929005606, -103.56062798227867 37.156443290058405, -103.12301898187116 37.157137290059154, -103.08639398183686 37.15689329005886, -103.00203898175846 37.156332290058344, -99.90287697887197 37.162385290064094, -99.54319297853704 37.15853229006052))'
outdf100.Geometry = sampleWKT

# replace NaN with blank cells
outdf100 = outdf100.replace(np.nan, '')
outdf100.head(5)

Unnamed: 0,ReportingUnitUUID,ReportingUnitNativeID,ReportingUnitName,ReportingUnitTypeCV,ReportingUnitUpdateDate,ReportingUnitProductVersion,StateCV,EPSGCodeCV,Geometry
0,NM_1,1,Bernalillo,County,,,NM,EPSG:4326,"POLYGON((-99.54319297853704 37.15853229006052,..."
1,NM_2,2,Catron,County,,,NM,EPSG:4326,"POLYGON((-99.54319297853704 37.15853229006052,..."
2,NM_3,3,Chaves,County,,,NM,EPSG:4326,"POLYGON((-99.54319297853704 37.15853229006052,..."
3,NM_4,4,Cibola,County,,,NM,EPSG:4326,"POLYGON((-99.54319297853704 37.15853229006052,..."
4,NM_5,5,Colfax,County,,,NM,EPSG:4326,"POLYGON((-99.54319297853704 37.15853229006052,..."


In [11]:
print("Writing out...")

#write out
out_repunit = 'reportingunits.csv'
outdf100.to_csv(out_repunit, index=False, encoding = "utf-8")

print("Done sites")

Writing out...
Done sites
