# Assign RegulatoryOverlayUUIDs Values to Water Right sites.csv
Purpose:  To assign RegulatoryOverlayUUIDs values to state water right sites.csv File.

Notes: 
- requires the completed pre-processed sites.csv & watersource.csv files from the state "WaterAllocation/ProcessedInputData" folder to be copied over to the "Regulatory/ProcessedInputData" folder.  Rename to wr_sites.csv & wr_watersource.csv to preserve inputs.
- requires awareness of unique WaterSourceTypeCV of each water right site match up to corresponding WaterSourceTypeCV of regulatoryoverlays per reportingunits

In [None]:
# Needed Libraries / Modules

# ---- working with data ----
import os  # native operating system interaction
import numpy as np  # mathematical array manipulation
import pandas as pd  # data structure and data analysis
import geopandas as gpd  # geo-data structure and data analysis

# ---- visualization ----
import matplotlib.pyplot as plt  # plotting library
import seaborn as sns  # plotting library
import geoplot as gplt # for plotting maps
import geoplot.crs as gcrs #used to pull in webdata

# ---- API data retrieval ----
import requests  # http requests
import json  # JSON parse

# ---- Cleanup ----
import re  # string regular expression manipulation
from datetime import datetime  # date and time manipulation
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # suppress scientific notation in Pandas

In [None]:
# ---- working directory ----
workingDirString = "G:/Shared drives/WaDE Data/Utah/Regulatory" # set working directory folder string here
os.chdir(workingDirString)
print(f'The working Directory is:', workingDirString)

## Input Data

In [None]:
# Regulatory Input Data
dfro = pd.read_csv("ProcessedInputData/regulatoryoverlays.csv")
dfru = pd.read_csv("ProcessedInputData/reportingunits.csv")
dfrru = pd.read_csv("ProcessedInputData/regulatoryreportingunits.csv")

# Water right Input Data
dfws = pd.read_csv('ProcessedInputData/wr_watersources.zip')
dfs = pd.read_csv('ProcessedInputData/wr_sites.zip')

In [None]:
#### regulatory watersource info with reporting unit info

# merge regulatoryoverlays -to- regulatoryreportingunits -to- reportingunits
dfro = pd.merge(dfro[['RegulatoryOverlayUUID', 'RegulatoryOverlayTypeCV', 'WaterSourceTypeCV']], dfrru[['RegulatoryOverlayUUID', 'ReportingUnitUUID']], left_on='RegulatoryOverlayUUID', right_on='RegulatoryOverlayUUID', how='left')
dfru = pd.merge(dfru, dfro, left_on='ReportingUnitUUID', right_on='ReportingUnitUUID', how='left')

print(dfru['RegulatoryOverlayTypeCV'].unique()) # check WaterSourceTypeCV for unique values for
print(dfru['WaterSourceTypeCV'].unique()) # check WaterSourceTypeCV for unique values for
print(len(dfru))
dfru.head(1)

In [None]:
#### water right watersource info with site info

# explode site.csv on WaterSourceUUIDs
dfs = dfs.assign(WaterSourceUUIDs=dfs['WaterSourceUUIDs'].str.split(',')).explode('WaterSourceUUIDs').reset_index(drop=True)

# merge watersources to dfs via WaterSourceUUIDs -to -WaterSourceUUID
dfs = pd.merge(dfs, dfws[['WaterSourceUUID', 'WaterSourceTypeCV']], left_on='WaterSourceUUIDs', right_on='WaterSourceUUID', how='left')
print(dfs['WaterSourceTypeCV'].unique()) # check WaterSourceTypeCV for unique values for
print(len(dfs))
dfs.head(1)

## Extract RegulatoryOverlay from Reporting Units and assing to WR Sites
- repeat scripts based on the number of ReportingUnitTypeCV types.
- merge all geo-dataframes into one output

#### Regulatory Area Data #1

In [None]:
# Create copy of reportingunits dataframe
# Extract out ReportingUnitTypeCV from reportingunits dataframe
# note unique WaterSourceTypeCV

dfru1 = dfru.copy()
nameOfReportingUnitTypeVar = "Water Right Areas" # change here for ReportingUnitTypeCV of interest
dfru1 = dfru1[dfru1['RegulatoryOverlayTypeCV'] == nameOfReportingUnitTypeVar]
print(len(dfru1))
print(dfru1['WaterSourceTypeCV'].unique())
dfru1.head(1)

In [None]:
# Convert dataframe -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

dfru1 = dfru1[dfru1['Geometry'] != ""].reset_index(drop=True)
dfru1['Geometry'] = gpd.GeoSeries.from_wkt(dfru1['Geometry'], crs="EPSG:4326")
gdfru1 = gpd.GeoDataFrame(dfru1, geometry=dfru1['Geometry'], crs="EPSG:4326") # covert to geodataframe
gplt.polyplot(gdfru1, ax=ax)

In [None]:
# Create copy of water right sites dataframe
# Extract out WaterSourceTypeCV and match to that of above reportingunits dataframe

dfs1 = dfs.copy()
# nameOfWaterSourceTypeCV= "add name here" # change here for WaterSourceTypeCV of interest
# dfs1 = dfs1[dfs1['WaterSourceTypeCV'] == nameOfWaterSourceTypeCV]
dfs1 = dfs1[dfs1['PODorPOUSite'] == 'POD']
print(len(dfs1))
print(dfs1['WaterSourceTypeCV'].unique())
dfs1.head(1)

In [None]:
# Convert extracted water right sites -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gdfs1 = gpd.GeoDataFrame(dfs1, geometry=gpd.points_from_xy(dfs1.Longitude.astype(float), dfs1.Latitude.astype(float)), crs="EPSG:4326")
gplt.pointplot(gdfs1, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# Select sites within polygon.
gdfs1_ru1 = gpd.sjoin(left_df=gdfs1, right_df=gdfru1[['ReportingUnitUUID', 'RegulatoryOverlayUUID', 'geometry']], op='within').replace(np.nan, "")
print(len(gdfs1_ru1))
gdfs1_ru1.head()

In [None]:
# plot the selected points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gplt.pointplot(gdfs1_ru1, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# set RegulatoryOverlayUUIDs
gdfs1_ru1['RegulatoryOverlayUUIDs'] = gdfs1_ru1['RegulatoryOverlayUUID']
gdfs1_ru1 = gdfs1_ru1.drop(['RegulatoryOverlayUUID', 'geometry', 'index_right', 'ReportingUnitUUID', 'WaterSourceUUID', 'WaterSourceTypeCV'], axis=1)
gdfs1_ru1.head(1)

#### Regulatory Area Data #2

In [None]:
# Create copy of reportingunits dataframe
# Extract out ReportingUnitTypeCV from reportingunits dataframe
# note unique WaterSourceTypeCV

dfru2 = dfru.copy()
nameOfReportingUnitTypeVar = "Groundwater Policy Management" # change here for ReportingUnitTypeCV of interest
dfru2 = dfru2[dfru2['RegulatoryOverlayTypeCV'] == nameOfReportingUnitTypeVar]
print(len(dfru2))
print(dfru2['WaterSourceTypeCV'].unique())
dfru2.head(1)

In [None]:
# Convert dataframe -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

dfru2 = dfru2[dfru2['Geometry'] != ""].reset_index(drop=True)
dfru2['Geometry'] = gpd.GeoSeries.from_wkt(dfru2['Geometry'], crs="EPSG:4326")
gdfru2 = gpd.GeoDataFrame(dfru2, geometry=dfru2['Geometry'], crs="EPSG:4326") # covert to geodataframe
gplt.polyplot(gdfru2, ax=ax)

In [None]:
# Create copy of water right sites dataframe
# Extract out WaterSourceTypeCV and match to that of above reportingunits dataframe

dfs2 = dfs.copy()
nameOfWaterSourceTypeCVList = ['Abandonded Well', 'Underground', 'Spring' ] # change here for WaterSourceTypeCV of interest
dfs2 = dfs2[dfs2['WaterSourceTypeCV'].isin(nameOfWaterSourceTypeCVList)]
dfs2 = dfs2[dfs2['PODorPOUSite'] == 'POD']
print(len(dfs2))
print(dfs2['WaterSourceTypeCV'].unique())
dfs2.head(1)

In [None]:
# Convert extracted water right sites -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gdfs2 = gpd.GeoDataFrame(dfs2, geometry=gpd.points_from_xy(dfs2.Longitude.astype(float), dfs2.Latitude.astype(float)), crs="EPSG:4326")
gplt.pointplot(gdfs2, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# Select sites within polygon.
gdfs2_ru2 = gpd.sjoin(left_df=gdfs2, right_df=gdfru2[['ReportingUnitUUID', 'RegulatoryOverlayUUID', 'geometry']], op='within').replace(np.nan, "")
print(len(gdfs2_ru2))
gdfs2_ru2.head()

In [None]:
# plot the selected points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gplt.pointplot(gdfs2_ru2, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# set RegulatoryOverlayUUIDs
gdfs2_ru2['RegulatoryOverlayUUIDs'] = gdfs2_ru2['RegulatoryOverlayUUID']
gdfs2_ru2 = gdfs2_ru2.drop(['RegulatoryOverlayUUID', 'geometry', 'index_right', 'ReportingUnitUUID', 'WaterSourceUUID', 'WaterSourceTypeCV'], axis=1)
gdfs2_ru2.head(1)

#### Regulatory Area Data #3

In [None]:
# Create copy of reportingunits dataframe
# Extract out ReportingUnitTypeCV from reportingunits dataframe
# note unique WaterSourceTypeCV

dfru3 = dfru.copy()
nameOfReportingUnitTypeVar = "Basins Closed to New Appropriations" # change here for ReportingUnitTypeCV of interest
dfru3 = dfru3[dfru3['RegulatoryOverlayTypeCV'] == nameOfReportingUnitTypeVar]
print(len(dfru3))
print(dfru3['WaterSourceTypeCV'].unique())
dfru3.head(1)

In [None]:
# Convert dataframe -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

dfru3 = dfru3[dfru3['Geometry'] != ""].reset_index(drop=True)
dfru3['Geometry'] = gpd.GeoSeries.from_wkt(dfru3['Geometry'], crs="EPSG:4326")
gdfru3 = gpd.GeoDataFrame(dfru3, geometry=dfru3['Geometry'], crs="EPSG:4326") # covert to geodataframe
gplt.polyplot(gdfru3, ax=ax)

In [None]:
# Create copy of water right sites dataframe
# Extract out WaterSourceTypeCV and match to that of above reportingunits dataframe

dfs3 = dfs.copy()
# nameOfWaterSourceTypeCV= "add name here" # change here for WaterSourceTypeCV of interest
# dfs3 = dfs3[dfs3['WaterSourceTypeCV'] == nameOfWaterSourceTypeCV]
dfs3 = dfs3[dfs3['PODorPOUSite'] == 'POD']
print(len(dfs3))
print(dfs3['WaterSourceTypeCV'].unique())
dfs3.head(1)

In [None]:
# Convert extracted water right sites -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gdfs3 = gpd.GeoDataFrame(dfs3, geometry=gpd.points_from_xy(dfs3.Longitude.astype(float), dfs3.Latitude.astype(float)), crs="EPSG:4326")
gplt.pointplot(gdfs3, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# Select sites within polygon.
gdfs3_ru3 = gpd.sjoin(left_df=gdfs3, right_df=gdfru3[['ReportingUnitUUID', 'RegulatoryOverlayUUID', 'geometry']], op='within').replace(np.nan, "")
print(len(gdfs3_ru3))
gdfs3_ru3.head()

In [None]:
# plot the selected points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gplt.pointplot(gdfs3_ru3, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# set RegulatoryOverlayUUIDs
gdfs3_ru3['RegulatoryOverlayUUIDs'] = gdfs3_ru3['RegulatoryOverlayUUID']
gdfs3_ru3 = gdfs3_ru3.drop(['RegulatoryOverlayUUID', 'geometry', 'index_right', 'ReportingUnitUUID', 'WaterSourceUUID', 'WaterSourceTypeCV'], axis=1)
gdfs3_ru3.head(1)

#### Regulatory Area Data #4

In [None]:
# Create copy of reportingunits dataframe
# Extract out ReportingUnitTypeCV from reportingunits dataframe
# note unique WaterSourceTypeCV

dfru4 = dfru.copy()
nameOfReportingUnitTypeVar = "Areas Open to Limited Appropriation" # change here for ReportingUnitTypeCV of interest
dfru4 = dfru4[dfru4['RegulatoryOverlayTypeCV'] == nameOfReportingUnitTypeVar]
print(len(dfru4))
print(dfru4['WaterSourceTypeCV'].unique())
dfru4.head(1)

In [None]:
# Convert dataframe -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

dfru4 = dfru4[dfru4['Geometry'] != ""].reset_index(drop=True)
dfru4['Geometry'] = gpd.GeoSeries.from_wkt(dfru4['Geometry'], crs="EPSG:4326")
gdfru4 = gpd.GeoDataFrame(dfru4, geometry=dfru4['Geometry'], crs="EPSG:4326") # covert to geodataframe
gplt.polyplot(gdfru4, ax=ax)

In [None]:
# Create copy of water right sites dataframe
# Extract out WaterSourceTypeCV and match to that of above reportingunits dataframe

dfs4 = dfs.copy()
# nameOfWaterSourceTypeCV= "add name here" # change here for WaterSourceTypeCV of interest
# dfs2 = dfs2[dfs2['WaterSourceTypeCV'] == nameOfWaterSourceTypeCV]
dfs4 = dfs4[dfs4['PODorPOUSite'] == 'POD']
print(len(dfs4))
print(dfs4['WaterSourceTypeCV'].unique())
dfs4.head(1)

In [None]:
# Convert extracted water right sites -to- geodataframe & plot
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gdfs4 = gpd.GeoDataFrame(dfs4, geometry=gpd.points_from_xy(dfs4.Longitude.astype(float), dfs4.Latitude.astype(float)), crs="EPSG:4326")
gplt.pointplot(gdfs4, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# Select sites within polygon.
gdfs4_ru4 = gpd.sjoin(left_df=gdfs4, right_df=gdfru4[['ReportingUnitUUID', 'RegulatoryOverlayUUID', 'geometry']], op='within').replace(np.nan, "")
print(len(gdfs4_ru4))
gdfs4_ru4.head()

In [None]:
# plot the selected points
contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa')) # use for background map in subplot
ax = gplt.webmap(contiguous_usa, projection=gcrs.WebMercator()) # set subplot

gplt.pointplot(gdfs4_ru4, hue='WaterSourceTypeCV', legend=True, legend_var='hue', ax=ax)

In [None]:
# set RegulatoryOverlayUUIDs
gdfs4_ru4['RegulatoryOverlayUUIDs'] = gdfs4_ru4['RegulatoryOverlayUUID']
gdfs4_ru4 = gdfs4_ru4.drop(['RegulatoryOverlayUUID', 'geometry', 'index_right', 'ReportingUnitUUID', 'WaterSourceUUID', 'WaterSourceTypeCV'], axis=1)
gdfs4_ru4.head(1)

## Concatenate all Regulatory Area Data Types together
- drop geometry from the wade wr sites.csv geodataframe

In [None]:
# Concatenate dataframes into single output
frames = [dfs, gdfs1_ru1, gdfs2_ru2, gdfs3_ru3] # list all out dataframes here
outdfs = pd.concat(frames)
outdfs = outdfs.drop_duplicates().reset_index(drop=True).replace(np.nan, "")
outdfs = outdfs.groupby('SiteUUID').agg(lambda x: ','.join([str(elem) for elem in (list(set(x))) if elem != ""])).replace(np.nan, "").reset_index()
print(len(outdfs))
outdfs.head(1)

# Inspect Output Data & Export

In [None]:
outdfs.info()

In [None]:
outdfs

In [None]:
# Export out to CSV.
outdfs.to_csv('ProcessedInputData/sites.csv', index=False) # this is in the Regulatory data folder