# Working with WY Aggregated Data

Pre-processing input data for a smoother upload experience of the state data to the WaDE 2.0 database.
Using geopandas to read in shp file, and coverting to WKT for ReportingUnit geometry.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
import geopandas as gpd # the library that lets us read in shapefiles
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

#Setting work directory, reading inputs, creating dataframe
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Wyoming/AggregatedAmounts/RawInputData"
os.chdir(workingDir)

# Water Use Data

In [None]:
#CSV input file
fileInput = "WYAggData_input.csv"

In [None]:
# check input 2011
df = pd.read_csv(fileInput)
df

In [None]:
droplist = ["2000",
"2001",
"2002",
"2003",
"2004",
"2005",
"2006",
"2007",
"2008",
"2009",
"2010",
"2011",
"2012",
"2013",
"2014",
"2015",
"2016",
"2017",
"2018"]

dftempK = df
dftempK = dftempK.drop(columns=droplist).reset_index()
dftempK

In [None]:
dftempY = df
dftempY = dftempY.drop(columns=['Water Use by Basin', 'UseType', 'Source'])
dftempY

# dftempY = dftempY.T
# dftempY

In [None]:
dftempY = dftempY.T.unstack().reset_index(level=1, name='Amount').rename(columns={'level_1':'ReportYear'})[['Amount','ReportYear']]
dftempY

In [None]:
dftempY['Water Use by Basin'] = dftempK['Water Use by Basin']
dftempY['UseType'] = dftempK['UseType']
dftempY['Source'] = dftempK['Source']

dftempY

## WaDE Custom Elements (due to missing sate info)

In [None]:
# Creating WaDE Custom reporting u nit native ID for easy identificaiion
# ----------------------------------------------------------------------------------------------------

# Create temp ReportingUnit native id dataframe of unique site.
def assignReportNID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_RU" + string1
    return outstring

dfReportNativeID = pd.DataFrame()

dfReportNativeID['in_ReportingUnitName'] = dftempY['Water Use by Basin']
dfReportNativeID = dfReportNativeID.drop_duplicates()

dftemp = pd.DataFrame(index = dfReportNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfReportNativeID['in_ReportingUnitNativeID'] = dftemp.apply(lambda row: assignReportNID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom ReportingUnit native id
def retrieveReportNID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfReportNativeID.loc[(dfReportNativeID['in_ReportingUnitName'] == A), 'in_ReportingUnitNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dftempY['in_ReportingUnitNativeID'] = dftempY.apply(lambda row: retrieveReportNID(row['Water Use by Basin']), axis=1)
dftempY

In [None]:
# Creating WaDE Custom water source native ID for easy water source identification
# ----------------------------------------------------------------------------------------------------

# Create temp WaterSourceNativeID dataframe of unique water source.
def assignWaterSourceNativeID(colrowValue):
    string1 = str(colrowValue)
    outstring = "WaDEWY_WS" + string1
    return outstring

dfWaterSourceNativeID = pd.DataFrame()
dfWaterSourceNativeID['in_WaterSourceTypeCV'] = dftempY['Source']
dfWaterSourceNativeID = dfWaterSourceNativeID.drop_duplicates()

dftemp = pd.DataFrame(index=dfWaterSourceNativeID.index)
dftemp["Count"] = range(1, len(dftemp.index) + 1)
dfWaterSourceNativeID['in_WaterSourceNativeID'] = dftemp.apply(lambda row: assignWaterSourceNativeID(row['Count']), axis=1)

# ----------------------------------------------------------------------------------------------------

# Retreive WaDE Custom water source native ID
def retrieveWaterSourceNativeID(A):
    if (A == '') or (pd.isnull(A)):
        outList = ''
    else:
        ml = dfWaterSourceNativeID.loc[(dfWaterSourceNativeID['in_WaterSourceTypeCV'] == A), 'in_WaterSourceNativeID']
        if not (ml.empty):  # check if the series is empty
            outList = ml.iloc[0]
        else:
            outList = ''
    return outList

dftempY['in_WaterSourceNativeID'] = dftempY.apply(lambda row: retrieveWaterSourceNativeID(row['Source']), axis=1)
dftempY

# Shapefile Data

In [None]:
# Shapefile input
shpInput = "WYBasinShapefile/BasinShapefile.shp"
BasinShape = gpd.read_file(shpInput)
BasinShape

In [None]:
#Create geo output
columnsList = ['RU_Name', 'geometry']
dfshape = pd.DataFrame(columns=columnsList)
dfshape['RU_Name'] = BasinShape['Reportin_3']
dfshape['geometry'] = BasinShape['geometry']
dfshape = dfshape.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False)
dfshape

### Inspect Output Data & Export

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dftempY.dtypes)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfshape.dtypes)

In [None]:
# Export out to CSV.
dftempY.to_csv('P_wyAggMaster.csv', index=False) # The output.
dfshape.to_csv('P_wyGeometry.csv', index=False) # The output geometry.