# Assign RegulatoryOverlayUUIDs Values to Water Right Sites.csv
- Purpose:  To assign RegulatoryOverlayUUIDs values to state water right sites.csv File.
- Note: requires the completed pre-processed sites.csv file from the state "WaterAllocation/ProcessedInputData" folder to be copied over to the "Regulatory/ProcessedInputData" folder.

In [None]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [None]:
# Inputs
varWaDEDataType = "Regulatory" # WaDE datatype for file location

# Set Working Directory
workingDirString = "C:/Users/rjame/Documents/WSWC Documents/MappingStatesDataToWaDE2.0/2_CodeMappingTemplates/" + varWaDEDataType
print(f'The working Directory is:', workingDirString)

## Input Data
- state water right sites.csv
- state regulatory regulatoryreportingunits.csv
- state regulatory reportingunits.csv 

#### wade water right site.csv data
- convert to geodataframe

In [None]:
# State Water Right sites.csv file
dfs = pd.read_csv('G:/Shared drives/WaDE Data/NewMexico/WaterAllocation/ProcessedInputData/sites.csv')
print(len(dfs))
dfs.head(1)

In [None]:
# Map out points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot
gdfs = gpd.GeoDataFrame(dfs, geometry=gpd.points_from_xy(dfs.Longitude.astype(float), dfs.Latitude.astype(float)), crs="EPSG:4326")
gplt.pointplot(gdfs, hue='PODorPOUSite', legend=True, legend_var='hue', ax=ax)

#### wade regulatoryreportingunits.csv data

In [None]:
inputFile = "ProcessedInputData/regulatoryreportingunits.csv"
df_rru = pd.read_csv(inputFile)
print(len(df_rru))
df_rru.head(1)

#### wade reportingunits.csv data

In [None]:
inputFile = "ProcessedInputData/reportingunits.csv"
dfru = pd.read_csv(inputFile)
print(len(dfru))
dfru.head(1)

## Extract RegulatoryOverlay from Reporting Units and assing to WR Sites
- repeat scripts based on the number of ReportingUnitTypeCV types.
- merge all geo-dataframes into one output

#### Regulatory Area Data #1

In [None]:
dfru1 = dfru.copy()
dfru1 = dfru1[dfru1['ReportingUnitTypeCV'] == "{name of ReportingUnitTypeCV of area data type #1}"]
dfru1 = pd.merge(dfru1, df_rru[['ReportingUnitUUID', 'RegulatoryOverlayUUID']], left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')
print(len(dfru1))
dfru1.head(1)

In [None]:
# Convert dataframe -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

dfru1 = dfru1[dfru1['Geometry'] != ""].reset_index(drop=True)
dfru1['Geometry'] = gpd.GeoSeries.from_wkt(dfru1['Geometry'], crs="EPSG:4326")
gdfru1 = gpd.GeoDataFrame(dfru1, geometry=dfru1['Geometry'], crs="EPSG:4326") # covert to geodataframe
gplt.polyplot(gdfru1, ax=ax)

In [None]:
# Select sites within polygon.
gdfs_ru1 = gpd.sjoin(left_df=gdfs, right_df=gdfru1[['ReportingUnitUUID', 'RegulatoryOverlayUUID', 'geometry']], op='within').replace(np.nan, "")
print(len(gdfs_ru1))
gdfs_ru1.head()

In [None]:
# plot the selected points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot
gplt.pointplot(gdfs_ru1, hue='PODorPOUSite', legend=True, legend_var='hue', ax=ax)

In [None]:
# set RegulatoryOverlayUUIDs
gdfs_ru1['RegulatoryOverlayUUIDs'] = gdfs_ru1['RegulatoryOverlayUUID']
gdfs_ru1 = gdfs_ru1.drop(['RegulatoryOverlayUUID', 'geometry', 'index_right', 'ReportingUnitUUID'], axis=1)
gdfs_ru1.head(1)

#### Regulatory Area Data #2

In [None]:
# dfru2 = dfru.copy()
# etc etc

## Concatenate all Regulatory Area Data Types together
- drop geometry from the wade wr sites.csv geodataframe

In [None]:
# Concatenate dataframes
gdfs = gdfs.drop(['geometry'], axis=1)

frames = [gdfs, gdfs_ru1] # list all out dataframes here
outdf = pd.concat(frames)
outdf = outdf.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
print(len(outdf))

In [None]:
# groupby() fields on SiteNativeID
outdf = outdf.groupby('SiteNativeID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem!=''])).replace(np.nan, "").reset_index()
print(len(outdf))

# Inspect Output Data & Export

In [None]:
outdf.info()

In [None]:
outdf

In [None]:
# Export out to CSV.
outdf.to_csv('ProcessedInputData/sites.csv', index=False) # this is in the Regulatory data folder