This code prepares the "reportingunits.csv" file for NM where aggregations are done on basin level.
Another code "reportingunits_NN.py" aggregates on county level.

In [2]:
#!/usr/bin/env python
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil.parser import parse
import shapefile
import pygeoif
import json
import geopandas as gpd
from utilityFunctions import *

In [3]:
# working directory
working_dir = "../AggregatedAmounts/RawInputData/"
os.chdir(working_dir)

In [4]:
target_columns = ["ReportingUnitUUID", "ReportingUnitNativeID", "ReportingUnitName", 
                  "ReportingUnitTypeCV", "ReportingUnitUpdateDate", "ReportingUnitProductVersion",
                  "StateCV", "EPSGCodeCV", "Geometry"]

In [5]:
outdf100 = pd.DataFrame(columns=target_columns)

In [6]:
# Input files
fileInput1 = "Summary of withdrawals by county 90-15.xlsx" 
fileInput2 = "Summary of withdrawals by River Basin 90-15.xlsx" 

In [7]:
df10 = pd.read_excel(fileInput2, header=0, sheet_name=0, skiprows=1, encoding = "ISO-8859-1")
#df20 = pd.read_excel(fileInput2, header=0, sheet_name=0, skiprows=1, encoding = "ISO-8859-1")
list(df10.columns)
#list(df20.columns)

['RVB', 'CAT', 'WSW', 'WGW', 'TW']

In [9]:
# combine multiple sheets to one dataFrame

startYear = 1990
endYear = 2015
numSheets = 5
yearList = np.linspace(startYear, endYear, numSheets)
df100_list = []
for isx in range (numSheets):
    df10 = pd.read_excel(fileInput2, header=0, sheet_name=isx, skiprows=1, encoding = "ISO-8859-1")
    df10 = df10.assign(ReportYearCV=yearList[isx])
    df10.ReportYearCV = df10.ReportYearCV.astype(int)
    df100_list.append(df10)
    
df100 = pd.concat(df100_list, sort=True)

df100

Unnamed: 0,CAT,RVB,ReportYearCV,TW,WGW,WSW
0,Public Water Supply,AWR,1990,6.308600e+03,3503.840000,2804.760000
1,Domestic (self-supplied),AWR,1990,6.111500e+02,611.150000,0.000000
2,Irrigated Agriculture,AWR,1990,2.937790e+05,105199.000000,188580.000000
3,Livestock (self-supplied),AWR,1990,4.149130e+03,3197.640000,951.490000
4,Commercial (self-supplied),AWR,1990,5.877100e+02,378.150000,209.560000
5,Industrial (self-supplied),AWR,1990,0.000000e+00,0.000000,0.000000
6,Mining (self-supplied),AWR,1990,2.936900e+02,293.690000,0.000000
7,Power (self-supplied),AWR,1990,0.000000e+00,0.000000,0.000000
8,Reservoir Evaporation,AWR,1990,6.292140e+04,0.000000,62921.400000
9,Public Water Supply,TG,1990,2.437476e+04,24374.760000,0.000000


In [10]:
print("Copying basin name...")

# all we need from input is river basin name

# first remove redundancies
df100 = df100.drop_duplicates(subset=["RVB"])   #
df100 = df100.reset_index(drop=True)

print(len(df100.index))

outdf100["ReportingUnitName"] = df100["RVB"]

Copying basin name...
6


In [11]:
# ReportingUnitNativeID	 	 Auto generate
outdf100['ReportingUnitNativeID'] = range(1, len(outdf100.index) + 1)

#ReportingUnitUUID	 	 NM_NativeID
outdf100['ReportingUnitUUID'] = outdf100.apply(lambda row: 
                                              "_".join(["NM", str(row['ReportingUnitNativeID'])]),
                                                axis=1)


In [16]:
# make sure the shapefile is in the same working directory

print("Geometry...")

basins = "WUR_surface_Basins.shp"

sf = shapefile.Reader(basins)
#print(sf)

shapes = sf.shapes()
#shapes[0].shapeType
fields = sf.fields
print (fields)
records = sf.records()
print(records)

print(outdf100["ReportingUnitName"])

fields = sf.fields[1:] 
field_names = [field[0] for field in fields] 

# construction of a dctionary field_name:value  
for r in sf.shapeRecords():  
    atr = dict(zip(field_names, r.record)) 
    if atr['STATE'] == 'New Mexico':
        action
gm= pygeoif.geometry.as_shape(shapes[0])
#print (gm.wkt)
gm=[]
for sp in shapes:
    gm.append(pygeoif.geometry.as_shape(sp)) 
m = pygeoif.MultiPoint(gm)
#print (m.wkt)

Geometry...
[('DeletionFlag', 'C', 1, 0), ['REG_NAME', 'C', 60, 0], ['Shape_Leng', 'F', 19, 11], ['Shape_Area', 'F', 19, 11], ['Acre', 'F', 19, 11]]
[Record #0: ['Upper Colorado Region', 21759.7389687, 21136137.6785, 6248871.35774], Record #1: ['Arkansas-White-Red Region', 290931.572706, 2579621573.23, 11302813.135], Record #2: ['Rio Grande Region', 413346.403199, 7966971248.07, 32070699.4266], Record #3: ['Lower Colorado Region', 396911.568088, 3530633008.58, 8535605.24968], Record #4: ['Pecos Region', 709376.617843, 9442245477.58, 16263638.3], Record #5: ['Texas-Gulf Region', 136903.395422, 747261081.817, 3400856.08862]]
0    AWR
1     TG
2      P
3     RG
4     UC
5     LC
Name: ReportingUnitName, dtype: object


In [2]:
geojson_file = "basins_geojson"
geodf = gpd.read_file(basins)
geodf.to_file(geojson_file, driver = "GeoJSON")

with open(geojson_file) as geofile:
    basinsJson = json.load(geofile)

#basinsJson
#print(locations)
idS = [basinsJson['features'][k]['id'] for k in range(len(basinsJson['features']))]
#print(basinsJson['features'][5].keys())
#print(basinsJson['features'][0]['properties'])
text=[feat['properties']['REG_NAME'] for feat in basinsJson['features'] if feat['id'] in locations]
text

In [None]:
"""
fields = sf.fields[1:] 
field_names = [field[0] for field in fields] 
# construction of a dctionary field_name:value  
for r in sf.shapeRecords():  
    atr = dict(zip(field_names, r.record)) 
    if atr['STATE'] == 'New Mexico':
        action

>>> for shapeRec in sf.iterShapeRecords():
...     # do something here
...     pass


"""

In [None]:
# hardcoded

outdf100.StateCV = "NM"
outdf100.ReportingUnitTypeCV = "River Basin"
outdf100.EPSGCodeCV = "EPSG:4326"
sampleWKT =  'POLYGON((-99.54319297853704 37.15853229006052, -97.26976797641987 37.15759429005948, -105.11636298372741 37.14764529005038, -104.52740598317905 37.15119229005359, -104.09963198278069 37.15376929005606, -103.56062798227867 37.156443290058405, -103.12301898187116 37.157137290059154, -103.08639398183686 37.15689329005886, -103.00203898175846 37.156332290058344, -99.90287697887197 37.162385290064094, -99.54319297853704 37.15853229006052))'
outdf100.Geometry = sampleWKT

# replace NaN with blank cells
outdf100 = outdf100.replace(np.nan, '')
outdf100.head(5)

In [None]:
print("Writing out...")

#write out
out_repunit = 'reportingunits.csv'
outdf100.to_csv(out_repunit, index=False, encoding = "utf-8")

print("Done sites")