# Working with NV Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

#### Notes:
- NA

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "G:/Shared drives/WaDE Data/Nevada/AggregatedAmounts/RawInputData"
os.chdir(workingDir)

# Pumpage Data

In [None]:
#CSV input file
basin2015Input = "StateInv_2015_BasinPumpage.csv"
basin2017Input = "StateInv_2017_BasinPumpage.csv"
county2015Input = "StateInv_2015_CountyPumpage.csv"
county2017Input = "StateInv_2015_CountyPumpage.csv"

In [None]:
# basin 2015
dfb15 = pd.read_csv(basin2015Input)
dfb15['Year'] = "2015"
dfb15['in_ReportingUnitType'] = "Basin"
print(len(dfb15))
dfb15.head(1)

In [None]:
# basin 2017
dfb17 = pd.read_csv(basin2017Input)
dfb17['Year'] = "2017"
dfb17['in_ReportingUnitType'] = "Basin"
print(len(dfb17))
dfb17.head(1)

In [None]:
# coutny 2015
dfc15 = pd.read_csv(county2015Input)
dfc15['Year'] = "2015"
dfc15['in_ReportingUnitType'] = "County"
print(len(dfc15))
dfc15.head(1)

In [None]:
# coutny 2017
dfc17 = pd.read_csv(county2017Input)
dfc17['Year'] = "2017"
dfc17['in_ReportingUnitType'] = "County"
print(len(dfc17))
dfc17.head(1)

### Concatenate Files

In [None]:
# Concatenate basin dataframes.
frames = [dfb15, dfb17]
dfbtemp = pd.concat(frames).reset_index(drop=True)
dfbtemp['in_ReportingUnitNativeID'] = dfbtemp['BasinID']
dfbtemp['in_ReportingUnitName'] = dfbtemp['BasinName']
dfbtemp = dfbtemp.drop(['OID_', 'BasinID', 'BasinName'], axis=1)
print(len(dfbtemp))
dfbtemp.head(3)

In [None]:
# need to transpose the data.
# create Temporary basin out dataframe
columnsList = ["Year", "in_ReportingUnitType", "in_ReportingUnitNativeID", "in_ReportingUnitName"]
dfbtemp2 = pd.DataFrame(columns=columnsList)
dfbtemp2 = dfbtemp[columnsList]

############################################

dfbtemp2 = dfbtemp2.assign(NV_BenUse='')
dfbtemp2 = dfbtemp2.assign(in_Amount='')
dfbtemp2 = dfbtemp2.assign(in_ReportingUnitType='')
dfBasinOut = pd.DataFrame() # dataframe to append to

############################################
columnsList = [
    'COM',
    'CON',
    'DOM',
    'ENV',
    'IND',
    'IRR',
    'MM',
    'MUN',
    'OTH',
    'PWR',
    'QM',
    'REC',
    'STK',
    'WLD']
lenList = len(columnsList)


############################################
for i in range(lenList):
    BenuseString = columnsList[i]
    dfbtemp2['NV_BenUse'] = BenuseString
    dfbtemp2['in_Amount'] = dfbtemp[columnsList[i]]
    dfbtemp2['in_ReportingUnitType'] = "Basin"
    dfBasinOut = dfBasinOut.append(dfbtemp2)
    
############################################

print(len(dfBasinOut.index))
dfBasinOut

In [None]:
# Concatenate county dataframes.
frames = [dfc15, dfc17]
dfctemp = pd.concat(frames).reset_index(drop=True)
dfctemp['in_ReportingUnitNativeID'] = ""
dfctemp['in_ReportingUnitName'] = dfctemp['County']
dfctemp = dfctemp.drop(['OID_', 'County'], axis=1)
print(len(dfctemp))
dfctemp.head(3)

In [None]:
# Creating WaDE Custom reportingunit native ID for easy site identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp ReportingUnitNativeID dataframe of unique reporting units.
def assignReportingUnitNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENV_RU" + string1
    return outstring

dfReportingUnitNativeID = pd.DataFrame()
dfReportingUnitNativeID['in_ReportingUnitName'] = dfctemp['in_ReportingUnitName']
dfReportingUnitNativeID['in_ReportingUnitType'] = dfctemp['in_ReportingUnitType']
dfReportingUnitNativeID = dfReportingUnitNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfReportingUnitNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfReportingUnitNativeID['in_ReportingUnitNativeID'] = dftemp.apply(lambda row: assignReportingUnitNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom reportingunit native ID
def retrieveReportingUnitNativeID(A, B):
    if (A == '' and B == '') or (pd.isnull(A) and pd.isnull(B)):
        outList = ''
    else:
        ml = dfReportingUnitNativeID.loc[(dfReportingUnitNativeID['in_ReportingUnitName'] == A) & 
                                         (dfReportingUnitNativeID['in_ReportingUnitType'] == B), 'in_ReportingUnitNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dfctemp['in_ReportingUnitNativeID'] = dfctemp.apply(lambda row: retrieveReportingUnitNativeID( row['in_ReportingUnitName'], row['in_ReportingUnitType']), axis=1)
dfctemp

In [None]:
# need to transpose the data.
# create Temporary county out dataframe
columnsList = ["Year", "in_ReportingUnitType", "in_ReportingUnitNativeID", "in_ReportingUnitName"]
dfctemp2 = pd.DataFrame(columns=columnsList)
dfctemp2 = dfctemp[columnsList]

############################################

dfctemp2 = dfctemp2.assign(NV_BenUse='')
dfctemp2 = dfctemp2.assign(in_Amount='')
dfctemp2 = dfctemp2.assign(in_ReportingUnitType='')
dfCountyOut = pd.DataFrame() # dataframe to append to

############################################
columnsList = [
    'COM',
    'CON',
    'DOM',
    'ENV',
    'IND',
    'IRR',
    'MM',
    'MUN',
    'OTH',
    'PWR',
    'QM',
    'REC',
    'STK',
    'WLD']
lenList = len(columnsList)


############################################
for i in range(lenList):
    BenuseString = columnsList[i]
    dfctemp2['NV_BenUse'] = BenuseString
    dfctemp2['in_Amount'] = dfctemp[columnsList[i]]
    dfctemp2['in_ReportingUnitType'] = "County"
    dfCountyOut = dfCountyOut.append(dfctemp2)
    
############################################

print(len(dfCountyOut.index))
dfCountyOut

In [None]:
# Concatenate temp basin and county dataframes.
frames = [dfBasinOut, dfCountyOut]
dfout = pd.concat(frames).reset_index(drop=True)
print(len(dfout))
dfout

In [None]:
# TimeframeStart & TimeframeEnd

dfout['in_TimeframeStart'] = '01/01/' + dfout['Year'].astype(str)
dfout['in_TimeframeEnd'] = '12/31/' + dfout['Year'].astype(str)
dfout

In [None]:
# For creating BeneficialUse
BeneficialUseDict = {
    "COM" : "Commercial",
    "CON" : "Construction",
    "DOM" : "Domestic",
    "ENV" : "Environmental",
    "IND" : "Industrial",
    "IRR" : "Irrigation",
    "MM" : "Mining and Milling",
    "MUN" : "Municipal",
    "PWR" : "Power",
    "QM" : "Quasi-Municipal",
    "REC" : "Recreational",
    "STK" : "Stockwater",
    "WLD" : "Wildlife"}
def assignBeneficialUse(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        String1 = colrowValue.strip()
        try:
            outList = BeneficialUseDict[String1]
        except:
            outList = "Unspecified"

    return outList

dfout['in_BeneficialUseCategory'] = dfout.apply(lambda row: assignBeneficialUse(row['NV_BenUse']), axis=1)
dfout['in_BeneficialUseCategory'].unique()

In [None]:
# in_VariableSpecificCV Info
dfout['in_VariableSpecificCV'] = "Withdrawal_Annual_" + dfout['in_BeneficialUseCategory'] + "_Groundwater"
dfout['in_VariableSpecificCV'].unique()

# Shapefile Data

In [None]:
# Basin Shapefile input
basinShape = gpd.read_file('shapefile/NVBasinShapefile.shp')
print(len(basinShape))
basinShape.head(3)

In [None]:
columnsList = ['RU_Name', 'RU_Type', 'geometry']
dfNVBshape = pd.DataFrame(columns=columnsList)
dfNVBshape['RU_Name'] = basinShape['BasinName']
dfNVBshape['RU_Type'] = "Basin"
dfNVBshape['geometry'] = basinShape['geometry']
dfNVBshape = dfNVBshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfNVBshape.head(3)

In [None]:
# Coutny Shapefile input
countyShape = gpd.read_file('shapefile/NVCountyShapefile.shp')
print(len(countyShape))
countyShape.head(3)

In [None]:
columnsList = ['RU_Name', 'RU_Type', 'geometry']
dfNVCshape = pd.DataFrame(columns=columnsList)
dfNVCshape['RU_Name'] = countyShape['County']
dfNVCshape['RU_Type'] = "County"
dfNVCshape['geometry'] = countyShape['geometry']
dfNVCshape = dfNVCshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfNVCshape.head(3)

In [None]:
# Concatenate basin and county shape dataframes together.
frames = [dfNVBshape, dfNVCshape]
dfAllShape = pd.concat(frames).reset_index(drop=True)
dfAllShape

### Inspect Output Data & Export

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfAllShape.dtypes)

In [None]:
# Export out to CSV.
dfout.to_csv('P_nvAggMaster.csv', index=False) # The output.
dfAllShape.to_csv('P_nvGeometry.csv', index=False) # The output geometry.