# NE Aggregated Data

Preprocessing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

Notes
- Will create 5 seperate dataframes to hold variable specific data inputs, then merge into single larger output dataframe.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Nebraska/AggregatedAmounts/RawInputData"
os.chdir(workingDir)

# Water Use Data - Basin

In [None]:
#CSV input file
fileInput = "INSIGHT_FinalBasinData_20151102_input.csv"

In [None]:
df_basin = pd.read_csv(fileInput)
print(len(df_basin))
df_basin.head(3)

In [None]:
# Annual Groundwater Supply
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_AGWS_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_AGWS_ba['in_MethodTypeCV'] = "Modeled"
df_AGWS_ba['in_VariableSpecificCV'] = "Annual Groundwater Supply"
df_AGWS_ba['in_WaterSourceTypeCV'] = "Groundwater"
df_AGWS_ba['in_ReportingUnitName'] = df_basin['Basin']
df_AGWS_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_AGWS_ba['in_ReportingUnitTypeCV'] = "Basin"
df_AGWS_ba['in_ReportYearCV'] = df_basin['Year']
df_AGWS_ba['in_Amount'] = df_basin['GWDP_Annual']

print(len(df_AGWS_ba))
df_AGWS_ba.head(3)

In [None]:
# Annual Surface Water Supply
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_ASWS_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_ASWS_ba['in_MethodTypeCV'] = "Modeled"
df_ASWS_ba['in_VariableSpecificCV'] = "Annual Surface Water Supply"
df_ASWS_ba['in_WaterSourceTypeCV'] = "Surface Water"
df_ASWS_ba['in_ReportingUnitName'] = df_basin['Basin']
df_ASWS_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_ASWS_ba['in_ReportingUnitTypeCV'] = "Basin"
df_ASWS_ba['in_ReportYearCV'] = df_basin['Year']
df_ASWS_ba['in_Amount'] = df_basin['SWDTotal_Annual']

print(len(df_ASWS_ba))
df_ASWS_ba.head(3)

In [None]:
# Annual Groundwater Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_AGWD_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_AGWD_ba['in_MethodTypeCV'] = "Modeled"
df_AGWD_ba['in_VariableSpecificCV'] = "Annual Groundwater Demand"
df_AGWD_ba['in_WaterSourceTypeCV'] = "Groundwater"
df_AGWD_ba['in_ReportingUnitName'] = df_basin['Basin']
df_AGWD_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_AGWD_ba['in_ReportingUnitTypeCV'] = "Basin"
df_AGWD_ba['in_ReportYearCV'] = df_basin['Year']
df_AGWD_ba['in_Amount'] = df_basin['GWCTotal_Annual']

print(len(df_AGWD_ba))
df_AGWD_ba.head(3)

In [None]:
# Annual Surface Water Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_ASWD_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_ASWD_ba['in_MethodTypeCV'] = "Modeled"
df_ASWD_ba['in_VariableSpecificCV'] = "Annual Surface Water Demand"
df_ASWD_ba['in_WaterSourceTypeCV'] = "Surface Water"
df_ASWD_ba['in_ReportingUnitName'] = df_basin['Basin']
df_ASWD_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_ASWD_ba['in_ReportingUnitTypeCV'] = "Basin"
df_ASWD_ba['in_ReportYearCV'] = df_basin['Year']
df_ASWD_ba['in_Amount'] = df_basin['SWDemandTotal_Annual']

print(len(df_ASWD_ba))
df_ASWD_ba.head(3)

In [None]:
# Total Non-Consumptive Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_TNCD_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_TNCD_ba['in_MethodTypeCV'] = "Estimated"
df_TNCD_ba['in_VariableSpecificCV'] = "Total Non-Consumptive Demand"
df_TNCD_ba['in_WaterSourceTypeCV'] = "Surface and Groundwater"
df_TNCD_ba['in_ReportingUnitName'] = df_basin['Basin']
df_TNCD_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_TNCD_ba['in_ReportingUnitTypeCV'] = "Basin"
df_TNCD_ba['in_ReportYearCV'] = df_basin['Year']
df_TNCD_ba['in_Amount'] = df_basin['TotalNonConsumptiveUse_Annual']

print(len(df_TNCD_ba))
df_TNCD_ba.head(3)

In [None]:
# Net Surface Water Loss Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_NSWLD_ba = pd.DataFrame(columns=columnList, index=df_basin.index)

df_NSWLD_ba['in_MethodTypeCV'] = "Estimated"
df_NSWLD_ba['in_VariableSpecificCV'] = "Net Surface Water Loss Demand"
df_NSWLD_ba['in_WaterSourceTypeCV'] = "Surface Water"
df_NSWLD_ba['in_ReportingUnitName'] = df_basin['Basin']
df_NSWLD_ba['in_ReportingUnitNativeID'] = df_basin['BasinID']
df_NSWLD_ba['in_ReportingUnitTypeCV'] = "Basin"
df_NSWLD_ba['in_ReportYearCV'] = df_basin['Year']
df_NSWLD_ba['in_Amount'] = df_basin['NetSurfaceWaterLoss_Annual']

print(len(df_NSWLD_ba))
df_NSWLD_ba.head(3)

In [None]:
# Concatenate Basin DataFrames
frames = [df_AGWS_ba, df_ASWS_ba, df_AGWD_ba, df_ASWD_ba, df_TNCD_ba, df_NSWLD_ba]
df_BasinOUT = pd.concat(frames)

print(len(df_BasinOUT))
df_BasinOUT

# Water Use Data - Subbasin

In [None]:
#CSV input file
fileInput = "INSIGHT_FinalSubBasinData_20151102_input.csv"

In [None]:
df_subbasin = pd.read_csv(fileInput)
print(len(df_subbasin))
df_subbasin.head(3)

In [None]:
# Annual Groundwater Supply
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_AGWS_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_AGWS_sb['in_MethodTypeCV'] = "Modeled"
df_AGWS_sb['in_VariableSpecificCV'] = "Annual Groundwater Supply"
df_AGWS_sb['in_WaterSourceTypeCV'] = "Groundwater"
df_AGWS_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_AGWS_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_AGWS_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_AGWS_sb['in_ReportYearCV'] = df_subbasin['Year']
df_AGWS_sb['in_Amount'] = df_subbasin['GWDP_Annual']

print(len(df_AGWS_sb))
df_AGWS_sb.head(3)

In [None]:
# Annual Surface Water Supply
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_ASWS_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_ASWS_sb['in_MethodTypeCV'] = "Modeled"
df_ASWS_sb['in_VariableSpecificCV'] = "Annual Surface Water Supply"
df_ASWS_sb['in_WaterSourceTypeCV'] = "Surface Water"
df_ASWS_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_ASWS_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_ASWS_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_ASWS_sb['in_ReportYearCV'] = df_subbasin['Year']
df_ASWS_sb['in_Amount'] = df_subbasin['SWDTotal_Annual']

print(len(df_ASWS_sb))
df_ASWS_sb.head(3)

In [None]:
# Annual Groundwater Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_AGWD_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_AGWD_sb['in_MethodTypeCV'] = "Modeled"
df_AGWD_sb['in_VariableSpecificCV'] = "Annual Groundwater Demand"
df_AGWD_sb['in_WaterSourceTypeCV'] = "Groundwater"
df_AGWD_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_AGWD_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_AGWD_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_AGWD_sb['in_ReportYearCV'] = df_subbasin['Year']
df_AGWD_sb['in_Amount'] = df_subbasin['GWCTotal_Annual']

print(len(df_AGWD_sb))
df_AGWD_sb.head(3)

In [None]:
# Annual Surface Water Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_ASWD_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_ASWD_sb['in_MethodTypeCV'] = "Modeled"
df_ASWD_sb['in_VariableSpecificCV'] = "Annual Surface Water Demand"
df_ASWD_sb['in_WaterSourceTypeCV'] = "Surface Water"
df_ASWD_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_ASWD_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_ASWD_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_ASWD_sb['in_ReportYearCV'] = df_subbasin['Year']
df_ASWD_sb['in_Amount'] = df_subbasin['SWDemandTotal_Annual']

print(len(df_ASWD_sb))
df_ASWD_sb.head(3)

In [None]:
# Total Non-Consumptive Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_TNCD_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_TNCD_sb['in_MethodTypeCV'] = "Modeled"
df_TNCD_sb['in_VariableSpecificCV'] = "Total Non-Consumptive Demand"
df_TNCD_sb['in_WaterSourceTypeCV'] = "Surface and Groundwater"
df_TNCD_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_TNCD_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_TNCD_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_TNCD_sb['in_ReportYearCV'] = df_subbasin['Year']
df_TNCD_sb['in_Amount'] = df_subbasin['TotalNonConsumptiveUse_Annual']

print(len(df_TNCD_sb))
df_TNCD_sb.head(3)

In [None]:
# Net Surface Water Loss Demand
columnList = ['in_MethodTypeCV', 
              'in_VariableSpecificCV', 
              'in_WaterSourceTypeCV', 
              'in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_ReportYearCV',
              'in_Amount']
df_NSWLD_sb = pd.DataFrame(columns=columnList, index=df_subbasin.index)

df_NSWLD_sb['in_MethodTypeCV'] = "Modeled"
df_NSWLD_sb['in_VariableSpecificCV'] = "Net Surface Water Loss Demand"
df_NSWLD_sb['in_WaterSourceTypeCV'] = "Surface Water"
df_NSWLD_sb['in_ReportingUnitName'] = df_subbasin['Subbasin']
df_NSWLD_sb['in_ReportingUnitNativeID'] = df_subbasin['SubID']
df_NSWLD_sb['in_ReportingUnitTypeCV'] = "Subbasin"
df_NSWLD_sb['in_ReportYearCV'] = df_subbasin['Year']
df_NSWLD_sb['in_Amount'] = df_subbasin['NetSurfaceWaterLoss_Annual']

print(len(df_NSWLD_sb))
df_NSWLD_sb.head(3)

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_AGWS_sb, df_ASWS_sb, df_AGWD_sb, df_ASWD_sb, df_TNCD_sb, df_NSWLD_sb]
df_SubbasinOUT = pd.concat(frames)

print(len(df_SubbasinOUT))
df_SubbasinOUT

### Concatenate Basin & Subbasin DataFrames

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_BasinOUT, df_SubbasinOUT]
df_out = pd.concat(frames)

print(len(df_out))
df_out

## WaDE Custom Elements (due to missing sate info)

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDENE_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = df_out['in_WaterSourceTypeCV']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

df_out['in_WaterSourceNativeID'] = df_out.apply(lambda row: retrieveWaterSourceNativeID(row['in_WaterSourceTypeCV']), axis=1)
df_out

# Shapefile Data

In [None]:
# Basin Shapefile Data
BasinSHPInput = "InsightShapeFiles/Basin.shp"
BasinShape = gpd.read_file(BasinSHPInput)

print(len(BasinShape))
BasinShape.head(3)

In [None]:
# Basin output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_basin_shp = pd.DataFrame(columns=columnList, index=BasinShape.index)

df_basin_shp['in_ReportingUnitName'] = BasinShape['Basin']
df_basin_shp['in_ReportingUnitNativeID'] = BasinShape['BID']
df_basin_shp['in_ReportingUnitTypeCV'] = "Basin"
df_basin_shp['in_Geomerty'] = BasinShape['geometry']

print(len(df_basin_shp))
df_basin_shp.head(3)

In [None]:
# Subbasin Shapefile Data
SubbasinSHPInput = "InsightShapeFiles/Subbasin.shp"
SubbasinShape = gpd.read_file(SubbasinSHPInput)

print(len(SubbasinShape))
SubbasinShape.head(3)

In [None]:
# Subbasin output shapefile dataframe
columnList = ['in_ReportingUnitName', 
              'in_ReportingUnitNativeID',
              'in_ReportingUnitTypeCV',
              'in_Geomerty']
df_subbasin_shp = pd.DataFrame(columns=columnList, index=SubbasinShape.index)

df_subbasin_shp['in_ReportingUnitName'] = SubbasinShape['Subbasin']
df_subbasin_shp['in_ReportingUnitNativeID'] = SubbasinShape['SubID']
df_subbasin_shp['in_ReportingUnitTypeCV'] = "Subbasin"
df_subbasin_shp['in_Geomerty'] = SubbasinShape['geometry']

print(len(df_subbasin_shp))
df_subbasin_shp.head(3)

### Concatenate Basin & Subbasin DataFrames

In [None]:
# Concatenate Subbasin DataFrames
frames = [df_basin_shp, df_subbasin_shp]
df_shape_out = pd.concat(frames)

print(len(df_shape_out))
df_shape_out

### Inspect Output Data & Export

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_out.dtypes)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(df_shape_out.dtypes)

In [None]:
# Export out to CSV.
df_out.to_csv('P_neAggMaster.csv', index=False) # The output.
df_shape_out.to_csv('P_neAggGeometry.csv', index=False) # The output geometry.