In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import os
from utilityFunctions import assignReportingUnitsID

In [None]:
# working directory
working_dir = "/Users/joseph/Desktop/WSWC/DataMigration/MappingStatesDataToWaDE2.0/Wyoming/AggregatedAmounts/"
os.chdir(working_dir)
#sub directory destinations
PID = "ProcessedInputData/"
RD = "RawInputData/"

# set year as most current data permits
year = 2018


In [None]:
# create output dataframe columns
target_columns = ["OrganizationUUID", "VariableSpecificUUID",
                  "ReportingUnitUUID",
                  "PrimaryUseCategory", "BeneficialUseCategory",
                  "WaterSourceUUID", "MethodUUID", "TimeframeStart", "TimeframeEnd",
                  "DataPublicationDate", "DataPublicationDOI", "ReportYearCV", "Amount",
                  "PopulationServed", "PowerGeneratedGWh", "IrrigatedAcreage",
                  "InterbasinTransferToID", "InterbasinTransferFromID", "CustomerTypeCV",
                  "AllocationCropDutyAmount", "IrrigationMethodCV", "CropTypeCV",
                  "CommunityWaterSupplySystem", "SDWISIdentifierCV"]
outdf100 = pd.DataFrame(columns=target_columns)

In [None]:
#Input files
fileInput1 = "Agricultural, Municipal, Domestic, and Industrial Use Indecies_Draft_For Delivery to WWDC_2019.0703.xlsx"

# reporting units lookup
inp_repunts = "reportingunits.csv"
# outputs aggregated amounts
out_agamount = "aggregatedamounts.csv"

In [None]:
# import files to dataframe
    # aggregated water use data
df = pd.read_excel(RD+fileInput1, header=2)
df = df.rename(columns={'Unnamed: 1':'UseType', 'Unnamed: 2':'Source'})
    # reporting units (basins)
df400 = pd.read_csv(PID+inp_repunts, encoding = "ISO-8859-1")

# Fill nan with 'blank' to simplify upcoming if statement check
df= df.fillna('blank')


In [None]:
# distribute basin names over blanks
x = df.loc[0, 'Water Use by Basin']
for i, row in df.iterrows():
    y = row['Water Use by Basin']
    if y == 'blank':
        df.at[i, 'Water Use by Basin'] = x
    else:
        x = y

# distribute use types over blanks
x = df.loc[0,'UseType']
for i, row in df.iterrows():
    y = row['UseType']
    if y == 'blank':
        df.at[i, 'UseType'] = x
    else:
        x = y

In [None]:
# Delete rows with Source = "Total"
print('Pivoting data...')
df = df[df.Source != 'Total']
# create 'Year' and 'Amount' columns
df=df.assign(Year=np.nan, Amount=np.nan)
df3 = pd.DataFrame()

#Pivot data from horizontal to vertical.  Iterate through across columns
clmn = list(df)
for i in clmn:
    if type(i) == int and i <= year:
        df2 = df[['Water Use by Basin', 'UseType', 'Source',i, 'Amount', 'Year']].copy()
        #Iterate down rows
        for j, row in df2.iterrows():
            df2.at[j, 'Year'] = i
            if type(row[i]) == int or type(row[i]) == float:
                df2.at[j, 'Amount'] = row[i]
            else:
                df2.at[j, 'Amount'] = np.nan
        df3 = df3.append(df2, ignore_index=True)
        df3.drop(i, axis=1, inplace=True)
        print(i)
    else:
        print('blah')

df3.set_index('Water Use by Basin', inplace = True)
df3.drop('Statewide Water Use Index', inplace = True)
df3.reset_index(inplace=True)
df3['Year'] = df3['Year'].astype(int)
df100 = df3.copy()

In [None]:
# water source id for each amount use type
print("WatersourceUUID and amount...")
df100.replace(['Surface Water', 'Ground Water', 'Cross Basin Diversion'], ['Fresh_Surface', 'Fresh_Ground', 'CrossBasin_Diversion'], inplace=True)


# assign reporting unit from reportingunits.csv
df100 = df100.assign(ReportingUnitUUID='')
df3['ReportingUnitUUID'] = df3.apply(lambda row: assignReportingUnitsID(row['Water Use by Basin'], df400), axis=1)

In [None]:
print("Copying columns...")
# copy columns from df100 to outdf100
destCols = ["WaterSourceUUID", "ReportingUnitUUID", "ReportYearCV", "Amount", "BeneficialUseCategory"]
srsCols = ["Source", "ReportingUnitUUID", "Year", "Amount", "UseType"]

outdf100[destCols] = df3[srsCols]

In [None]:
# hardcoded
outdf100.OrganizationUUID = "WWDO"
outdf100.VariableSpecificUUID = "Consumptive Use"
# variableSpecificCV = 'Allocation All'
outdf100.MethodUUID = "WWDO_Water_uses"
#outdf100.AllocationBasisCV = "Unknown"
# check this later
outdf100.PrimaryUseCategory = "Irrigation"
outdf100.TimeframeStart = "01/01"
outdf100.TimeframeEnd = "12/31"
outdf100.DataPublicationDate = datetime.now().strftime('%m/%d/%Y')

In [None]:
# if Amount empty drop row and save it to a _missing.csv
print("Droping null amounts...")
outdf100 = outdf100.replace(np.nan, '') #replace NaN by blank strings

outdf100purge = outdf100.loc[outdf100["Amount"] == '']
if len(outdf100purge.index) > 0:
    outdf100purge.to_csv(PID+'aggregatedallocations_missing.csv')    #index=False,
    dropIndex = outdf100.loc[outdf100["Amount"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

In [None]:
# if reporting unit UUID empty, drop row
print("Droping null ReportingUnitID ...")
#if
outdf100nullPR = outdf100.loc[outdf100["ReportingUnitUUID"] == '']
if len(outdf100nullPR.index) > 0:
    dropIndex = outdf100.loc[outdf100["ReportingUnitUUID"] == ''].index
    outdf100 = outdf100.drop(dropIndex)
    outdf100 = outdf100.reset_index(drop=True)

In [None]:
#drop duplicate rows; just make sure
print("Droping duplicates...")
outdf100Duplicated=outdf100.loc[outdf100.duplicated()]
if len(outdf100Duplicated.index) > 0:
    outdf100Duplicated.to_csv(PID+"aggregatedallocations_duplicaterows.csv")  # index=False,
    outdf100.drop_duplicates(inplace=True)   #
    outdf100 = outdf100.reset_index(drop=True)

In [None]:
print('Generating WaDE metadata tables...')
#TODO:  Need hardcoded values for all of these.  Adapting Tsegenah's from New Mexico for time being.
# Methods
columns=['MethodUUID', 'MethodName', 'MethodDescription', 'MethodNEMILink', 'ApplicableResourceTypeCV',
         'MethodTypeCV', 'DataCoverageValue', 'DataQualityValueCV',	'DataConfidenceValue']
dtypesx = ['BigInt	NVarChar(250)	NVarChar(50)	Text	NVarChar(100)	NVarChar(100)	NVarChar(50)',
           'NVarChar(100)	NVarChar(50)	NVarChar(50)']
inpVals = ['WWDO_Water_uses','Wyoming Water Uses', 'Withdrawal Volume Estimate',
           np.nan, 'Unspecified', 'Water use', np.nan, np.nan, np.nan]
outdf_methods = pd.DataFrame([inpVals], columns=columns)
outdf_methods.to_csv(PID+'methods.csv', index=False)


# Water sources
columns=['WaterSourceUUID', 'WaterSourceNativeID',	'WaterSourceName', 'WaterSourceTypeCV',
         'WaterQualityIndicatorCV',	'GNISFeatureNameCV', 'Geometry']
dtypesx = ['BigInt	NVarChar(250)	NVarChar(250)	NVarChar(250)	NVarChar(100)	NVarChar(100)',
           'NVarChar(250)	Geometry']
outdf_waterSources = pd.DataFrame(columns=columns)
outdf_waterSources['WaterSourceUUID'] = ['Fresh_Surface', 'Fresh_Ground', 'CrossBasin_Diversion']
outdf_waterSources['WaterSourceNativeID'] = ['Fresh_Surface', 'Fresh_Ground', 'CrossBasin_Diversion']
outdf_waterSources['WaterSourceTypeCV'] = 'Unspecified'
outdf_waterSources['WaterQualityIndicatorCV'] = 'Fresh'
outdf_waterSources.to_csv(PID+'watersources.csv', index=False)


# variables
columns=['VariableSpecificUUID', 'VariableSpecificCV', 'VariableCV', 'AggregationStatisticCV', 'AggregationInterval',
         'AggregationIntervalUnitCV', 'ReportYearStartMonth', 'ReportYearTypeCV', 'AmountUnitCV', 'MaximumAmountUnitCV']
inpVals = ['Consumptive Use','Consumptive Use', 'Consumptive Use', 'Cumulative', 1,'Year', '1-Oct', 'WaterYear', 'Acre feet', '',]
outdf_variables = pd.DataFrame([inpVals], columns=columns)
outdf_variables.to_csv(PID+'variables.csv', index=False)

# organizations
columns=['OrganizationUUID', 'OrganizationName', 'State', 'OrganizationPurview', 'OrganizationWebsite',
         'OrganizationPhoneNumber', 'OrganizationContactName', 'OrganizationContactEmail',	'DataMappingURL']
dtypesx = ['BigInt	NVarChar(250)	NVarChar(50)	Text	NVarChar(100)	NVarChar(100)	NVarChar(50)',
           'NVarChar(100)	NVarChar(50)	NVarChar(50)']
#assumes dtypes inferred from CO file
print("Columns...")
inpVals = ['WWDO','Wyoming Water Development Office', 'Wyoming',
           np.nan, 'http://wwdc.state.wy.us', '307-777-7626', 'Mabel Jones', 'mabel.jones1@wyo.gov', 
           'https://github.com/WSWCWaterDataExchange/MappingStatesDataToWaDE2.0/blob/master/Wyoming/ReadMe.md']
outdf_organizations = pd.DataFrame([inpVals], columns=columns)
outdf_organizations.to_csv(PID+'organizations.csv', index=False)


In [None]:
# outputs aggregated amounts
print("Writing outputs...")
outdf100.to_csv('ProcessedInputData/'+out_agamount, index=False, encoding = "utf-8")


print("Aggregate water allocation done!")