# Pre-processing Arizona Allocation data for WaDEQA upload.
Date Updated: 04/06/2020
Purpose:  To pre-process the Arizona data into one master file for simple DataFrame creation and extraction

Notes:
We are going to all AZ groundwater for now.  They don't tradtionaly track priority date, but we want to incldue the volume and site info.

In [None]:
#Needed Libararies
import os
import numpy as np
import pandas as pd
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

## Groundwater

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Arizona/WaterAllocation/RawInputData/Groundwater"
os.chdir(workingDir)

WellRegistry_Input = "WELLS_wellRegistry_input.csv"
GWSI_Input = "GWSI_SITES_input.csv"

In [None]:
#Groundwater Dataframes creation
df_WR = pd.read_csv(WellRegistry_Input)
df_WR['REG_ID'] = df_WR['REGISTRY_I'].astype(str)
df_GWSI = pd.read_csv(GWSI_Input)

# Merge together into one
df_WR_GWSI = pd.merge(df_WR, df_GWSI, left_on='REG_ID', right_on='REG_ID', how='left')
df_WR_GWSI.head(3)

In [None]:
# Creating long and lat values from data.  
# Need to convert from UTM to WGS 84.
# I believe AZ is consiered WGS 84 / UTM zone 12N - EPSG:32612.

from pyproj import Proj
myProj = Proj(proj='utm',zone=12, ellps='WGS84', preserve_units=False)
long, lat = myProj(df_WR_GWSI['UTM_X_METE'].values, df_WR_GWSI['UTM_Y_METE'].values, inverse=True)
df_WR_GWSI['in_Latitude'] = lat
df_WR_GWSI['in_Longitude'] = long
df_WR_GWSI = df_WR_GWSI.replace(np.nan, '')  # Replaces NaN values with blank.
df_WR_GWSI.head(3)

In [None]:
# Create output dataframe for ground water
columnslist = [
    ### Method Info ###
    "in_ApplicableResourceTypeCV",
    
    ### Water Source Info ###
    "in_WaterSourceUUID",
    "in_WSGeometry",
    "in_GNISFeatureNameCV",
    "in_WaterQualityIndicatorCV",
    "in_WaterSourceName",
    "in_WaterSourceNativeID",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_SiteUUID",
    "in_CoordinateAccuracy",
    "in_CoordinateMethodCV",
    "in_County",
    "in_EPSGCodeCV",
    "in_Geometry",
    "in_GNISCodeCV",
    "in_HUC12",
    "in_HUC8",
    "in_Latitude",
    "in_Longitude",
    "in_NHDNetworkStatusCV",
    "in_NHDProductCV",
    "in_PODorPOUSite",
    "in_SiteName",
    "in_SiteNativeID",
    "in_SitePoint",
    "in_SiteTypeCV",
    "in_StateCV",
    "in_USGSSiteID",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationApplicationDate",
    "in_AllocationAssociatedConsumptiveUseSiteIDs",
    "in_AllocationAssociatedWithdrawalSiteIDs",
    "in_AllocationBasisCV",
    "in_AllocationChangeApplicationIndicator",
    "in_AllocationCommunityWaterSupplySystem",
    "in_AllocationCropDutyAmount",
    "in_AllocationExpirationDate",
    "in_AllocationFlow_CFS",
    "in_AllocationLegalStatusCV",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationSDWISIdentifierCV",
    "in_AllocationTimeframeEnd",
    "in_AllocationTimeframeStart",
    "in_AllocationTypeCV",
    "in_AllocationVolume_AF",
    "in_BeneficialUseCategory",
    "in_CommunityWaterSupplySystem",
    "in_CropTypeCV",
    "in_CustomerTypeCV",
    "in_DataPublicationDate",
    "in_DataPublicationDOI",
    "in_ExemptOfVolumeFlowPriority",
    "in_GeneratedPowerCapacityMW",
    "in_IrrigatedAcreage",
    "in_IrrigationMethodCV",
    "in_LegacyAllocationIDs",
    "in_MethodUUID",
    "in_OrganizationUUID",
    "in_PopulationServed",
    "in_PowerType",
    "in_PrimaryUseCategory",
    "in_SiteUUID",
    "in_VariableSpecificUUID",
    "in_WaterAllocationNativeURL",
    "in_WaterSourceUUID"
]

dfground = pd.DataFrame(columns=columnslist, index=df_WR_GWSI.index)

In [None]:
#############################################################################################
#Method
dfground['in_ApplicableResourceTypeCV'] = 'Groundwater'

#WaterSource
dfground['in_WaterSourceTypeCV'] = 'groundwater/well'
dfground['in_WaterSourceName'] = "Unspecified"
                                    
#Site
dfground['in_County'] = df_WR_GWSI['COUNTY']
dfground['in_Latitude'] = df_WR_GWSI['in_Latitude']
dfground['in_Longitude'] = df_WR_GWSI['in_Longitude']
dfground['in_PODorPOUSite'] = 'POD'
dfground['in_SiteName'] = "Groundwater"
dfground['in_SiteNativeID'] = df_WR_GWSI['REGISTRY_I']
dfground['in_SiteTypeCV'] = df_WR_GWSI['WELL_TYPE_']

#AllocationAmount_fact
dfground['in_AllocationFlow_CFS'] = df_WR_GWSI['PUMPRATE']
dfground['in_AllocationNativeID'] = df_WR_GWSI['REGISTRY_I']
dfground['in_AllocationOwner'] = df_WR_GWSI['OWNER_NAME']
dfground['in_AllocationPriorityDate'] = ''
dfground['in_AllocationTimeframeEnd'] = '12/31'
dfground['in_AllocationTimeframeStart'] = '01/01'
dfground['in_AllocationTypeCV'] = df_WR_GWSI['WELL_TYPE_']
dfground['in_BeneficialUseCategory'] = df_WR_GWSI['WATER_USE']
dfground['in_ExemptOfVolumeFlowPriority'] = 1

dfground

In [None]:
# Drop Duplicates
dfground = dfground.drop_duplicates()
dfground = dfground.replace(np.nan, '')  # Replaces NaN values with blank.

## Surface Water

In [None]:
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Arizona/WaterAllocation/RawInputData/Surface_Water"
os.chdir(workingDir)

#Surfacewater Dataframes creation
SWR_fillings_input = "SWR_fillings_POD_input.csv"
df_SWRfill = pd.read_csv(SWR_fillings_input, encoding = "ISO-8859-1")

SWshed_input = "ADWR_Surface_Watersheds_POD_input.csv"
df_SWshed = pd.read_csv(SWshed_input, encoding = "ISO-8859-1")

In [None]:
# Need to restructure df_SWshed
# Single Flow_CFS as ANNUAL USE value.
# Seperate, then combine via comma Water Use.
# Retreive BenUse to single flow df.

df_SWshed_AU = df_SWshed.loc[df_SWshed["WATER USE"] == "ANNUAL USE"].reset_index()
df_SWshed_Ben = df_SWshed.loc[df_SWshed["WATER USE"] != "ANNUAL USE"].reset_index()

df_SWshed_Ben = df_SWshed_Ben.groupby('REG. NO').agg(lambda x: ','.join([str(elem) for elem in (list(set(x)))])).replace(np.nan, '').reset_index()
df_SWshed_Ben['BenUse'] = df_SWshed_Ben['WATER USE']
df_SWshed_Ben['REGNO'] = df_SWshed_Ben['REG. NO']

BenUseDict = pd.Series(df_SWshed_Ben.BenUse.values, index = df_SWshed_Ben.REGNO).to_dict()
def retrieveBenUse(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = ''
    else:
        String1 = colrowValue
        try:
            outList = BenUseDict[String1]
        except:
            outList = ''
    return outList
df_SWshed_AU['BenUse'] = df_SWshed_AU.apply(lambda row: retrieveBenUse(row['REG. NO']), axis=1)

In [None]:
#Merge together
df_SWFill_SWshed = pd.merge(df_SWRfill, df_SWshed_AU, left_on='FILE_NO', right_on='REG. NO', how='left')

In [None]:
# Creating long and lat values from data.  
# Need to convert from UTM 12N to WGS 84.
# I believe AZ is consiered WGS 84 / UTM zone 12N - EPSG:32612.

from pyproj import Proj
myProj = Proj(proj='utm',zone=12, ellps='WGS84', preserve_units=False)
long, lat = myProj(df_SWFill_SWshed['X_UTMNAD83'].values, df_SWFill_SWshed['Y_UTMNAD83'].values, inverse=True)
df_SWFill_SWshed['in_Latitude'] = lat
df_SWFill_SWshed['in_Longitude'] = long
df_SWFill_SWshed = df_SWFill_SWshed.replace(np.nan, '')  # Replaces NaN values with blank.
df_SWFill_SWshed.head(3)

In [None]:
# Creating AllocationFlow_CFS.
# Need to split string to value and units
# Need to convert based on string

# "Cubic Feet Per Second",

def CreateFlow_CFS(val, unit):
    if val == '' or pd.isnull(val):
        outVal = ""
    else:
        try:
            if unit == "Cubic Feet Per Second":
                outVal = float(val)
            else:
                outVal = ""
        except:
            outVal = ""
    return outVal

df_SWFill_SWshed['in_AllocationFlow_CFS'] = df_SWFill_SWshed.apply(lambda row: CreateFlow_CFS(row['QUANTITY_Val'], row['QUANTITY_Unit']), axis=1)
df_SWFill_SWshed.head(3)

In [None]:
# Creating AllocationVolume_AF.
# Need to split string to value and units
# Need to convert value based on unit,

"Acre-Feet Per Annum",
"Acre-Feet",
"Acre-Feet Total",
"ACRES",
"CFT - Cubic Feet Total",
"Feet",
"Gallons",
"Gallons Per Annum",
"Miners Inches Per Annum",
"MIT - Miners Inches Total",

def CreateVolume_AF(val, unit):
    if val == '' or pd.isnull(val):
        outVal = ""
    else:
        try:
            if unit == "Acre-Feet Per Annum":
                val = float(val)
                outVal = val
            elif unit == "Acre-Feet":
                outVal = float(val)
            elif unit == "Acre-Feet Total":
                outVal = float(val)
            elif unit == "ACRES":
                outVal = float(val)
            elif unit == "CFT - Cubic Feet Total":
                val = float(val)
                outVal = val / (43559.9)  # Cubic Feet to AF
            elif unit == "Feet":
                val = float(val)
                outVal = val / (43559.9)  # Cubic Feet to AF
            elif unit == "Gallons":
                val = float(val)
                outVal = val / (325851)  # Gallons to AF
            elif unit == "Gallons Per Annum":
                val = float(val)
                outVal = val / (325851)  # Gallons to AF
            elif unit == "Miners Inches Per Annum":
                val = float(val)
                outVal = val * (0.055214457974269576)  # Miners Inches to AF
            elif unit == "MIT - Miners Inches Total":
                val = float(val)
                outVal = val * (0.055214457974269576)  # Miners Inches to AF
            else:
                outVal = ""
        except:
            outVal = ""
    return outVal

df_SWFill_SWshed['in_AllocationVolume_AF'] = df_SWFill_SWshed.apply(lambda row: CreateVolume_AF(row['QUANTITY_Val'], row['QUANTITY_Unit']), axis=1)
df_SWFill_SWshed.head(3)

In [None]:
# Create output dataframe for ground water
columnslist = [
    ### Method Info ###
    "in_ApplicableResourceTypeCV",
    
    ### Water Source Info ###
    "in_WaterSourceUUID",
    "in_WSGeometry",
    "in_GNISFeatureNameCV",
    "in_WaterQualityIndicatorCV",
    "in_WaterSourceName",
    "in_WaterSourceNativeID",
    "in_WaterSourceTypeCV",
    
    ### Site Info ###
    "in_SiteUUID",
    "in_CoordinateAccuracy",
    "in_CoordinateMethodCV",
    "in_County",
    "in_EPSGCodeCV",
    "in_Geometry",
    "in_GNISCodeCV",
    "in_HUC12",
    "in_HUC8",
    "in_Latitude",
    "in_Longitude",
    "in_NHDNetworkStatusCV",
    "in_NHDProductCV",
    "in_PODorPOUSite",
    "in_SiteName",
    "in_SiteNativeID",
    "in_SitePoint",
    "in_SiteTypeCV",
    "in_StateCV",
    "in_USGSSiteID",
    
    ### AllocationAmount_fact Info ###
    "in_AllocationApplicationDate",
    "in_AllocationAssociatedConsumptiveUseSiteIDs",
    "in_AllocationAssociatedWithdrawalSiteIDs",
    "in_AllocationBasisCV",
    "in_AllocationChangeApplicationIndicator",
    "in_AllocationCommunityWaterSupplySystem",
    "in_AllocationCropDutyAmount",
    "in_AllocationExpirationDate",
    "in_AllocationFlow_CFS",
    "in_AllocationLegalStatusCV",
    "in_AllocationNativeID",
    "in_AllocationOwner",
    "in_AllocationPriorityDate",
    "in_AllocationSDWISIdentifierCV",
    "in_AllocationTimeframeEnd",
    "in_AllocationTimeframeStart",
    "in_AllocationTypeCV",
    "in_AllocationVolume_AF",
    "in_BeneficialUseCategory",
    "in_CommunityWaterSupplySystem",
    "in_CropTypeCV",
    "in_CustomerTypeCV",
    "in_DataPublicationDate",
    "in_DataPublicationDOI",
    "in_ExemptOfVolumeFlowPriority",
    "in_GeneratedPowerCapacityMW",
    "in_IrrigatedAcreage",
    "in_IrrigationMethodCV",
    "in_LegacyAllocationIDs",
    "in_MethodUUID",
    "in_OrganizationUUID",
    "in_PopulationServed",
    "in_PowerType",
    "in_PrimaryUseCategory",
    "in_SiteUUID",
    "in_VariableSpecificUUID",
    "in_WaterAllocationNativeURL",
    "in_WaterSourceUUID"
]

df_Surface = pd.DataFrame(columns=columnslist, index=df_SWFill_SWshed.index)

In [None]:
# For creating WaterSourceName
def assignWaterSourceName(colrowValue):
    if colrowValue == '' or pd.isnull(colrowValue):
        outList = "Unspecified"
    else:
        strVal = str(colrowValue)
        outList = strVal.strip()
    return outList

# For creating SiteNativeID
def assignSiteNativeID(colrowValue):
    strVal = str(colrowValue)
    strVal = strVal.strip()
    if strVal == '' or pd.isnull(strVal):
        outList = "Unspecified"
    else:
        outList = strVal
    return outList

#############################################################################################
#Method
df_Surface['in_ApplicableResourceTypeCV'] = 'Surface Water'

#WaterSource
df_Surface['in_WaterSourceTypeCV'] = 'Surface Water'
df_Surface['in_WaterSourceName'] = df_SWFill_SWshed.apply(lambda row: assignWaterSourceName(row['WATERSOURC']), axis=1)
                                    
#Site
df_Surface['in_County'] = df_SWFill_SWshed['COUNTY']
df_Surface['in_Latitude'] = df_SWFill_SWshed['in_Latitude']
df_Surface['in_Longitude'] = df_SWFill_SWshed['in_Longitude']
df_Surface['in_PODorPOUSite'] = "POD"
df_Surface['in_SiteName'] = "Surface Water"
df_Surface['in_SiteNativeID'] = df_SWFill_SWshed.apply(lambda row: assignSiteNativeID(row['CADASTRAL']), axis=1)
df_Surface['in_SiteTypeCV'] = "Unspecified"

#AllocationAmount_fact
df_Surface['in_AllocationFlow_CFS'] = df_SWFill_SWshed['in_AllocationFlow_CFS']
df_Surface['in_AllocationVolume_AF'] = df_SWFill_SWshed['in_AllocationVolume_AF']
df_Surface['in_AllocationLegalStatusCV'] = df_SWFill_SWshed['STATUS_x']
df_Surface['in_AllocationNativeID'] = df_SWFill_SWshed['FILE_NO']
df_Surface['in_AllocationOwner'] = df_SWFill_SWshed['HLDRNAME']
df_Surface['in_AllocationTimeframeEnd'] = '12/31'
df_Surface['in_AllocationTimeframeStart'] = '01/01'
df_Surface['in_AllocationPriorityDate'] = df_SWFill_SWshed['PRIOR_DATE']
df_Surface['in_BeneficialUseCategory'] = df_SWFill_SWshed['BenUse']
df_Surface['in_ExemptOfVolumeFlowPriority'] = 0

In [None]:
# Drop Duplicates
df_Surface = df_Surface.drop_duplicates()

## Concatenate and Export Outputs

In [None]:
# Concatenate
frames = [dfground, df_Surface]
dfout = pd.concat(frames)

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dfout.dtypes)

In [None]:
#Exporting to Finished File
#Working Directory and Input File
workingDir = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/Arizona/WaterAllocation/RawInputData"
os.chdir(workingDir)

dfout.to_csv('P_ArizonaMaster.csv', index=False)  # The output