# ESMR data from open data
https://data.ca.gov/dataset/water-quality-effluent-electronic-self-monitoring-report-esmr-data

In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from holoviews import opts, dim
import holoviews as hv
import panel as pn
import hvplot.pandas
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore")


pn.extension()

hv.extension('bokeh')

In [3]:
from esmr_data import esmr

In [5]:
esmr_file = '../tests/data/esmr-analytical-export_years-2006-2024_2024-03-13.csv'
#esmr_file = '../tests/data/esmr-analytical-export_year-2024_2024-12-02.csv'
df = esmr.read_data_csv(esmr_file)

In [6]:
data_dict = pd.read_csv('../tests/data/esmr_data_dictionary.csv')

# Using class structure

In [7]:
data = esmr.ESMR(df)

In [8]:
region_names = data.get_region_names()
region_names[0:5]

array(['Region 9 - San Diego', 'Region 2 - San Francisco Bay',
       'Region 3 - Central Coast', 'Region 5S - Sacramento',
       'Region 8 - Santa Ana'], dtype='<U31')

In [9]:
facility_names = data.get_facility_names()
facility_names[0:5]

array(['San Juan Capistrano GW TP', 'Calera Creek Water Recycling Plant',
       'SOCWA Aliso Creek Ocean Outfall',
       'PG&E Diablo Canyon Power Plant', 'SCRWA WWTP'], dtype='<U75')

In [10]:
cols = ['facility_name','facility_place_id']
data.df[cols].groupby(cols).count().reset_index().to_csv('facility_name_id.csv')

In [12]:
cols = ['location','location_place_type']
data.get_facility('White Slough Water Pollution Control Facility').df[cols].astype(str).groupby(cols).count()

location,location_place_type
EFF-001,Effluent Monitoring
INF-001,Influent Monitoring
RSW-001,Receiving Water Monitoring
RSW-003,Receiving Water Monitoring
RSW-005,Receiving Water Monitoring
SPL-001,Internal Monitoring
UVS-001,Internal Monitoring


In [13]:
v=data.get_facility('White Slough Water Pollution Control Facility').get_location('UVS-001').get_parameter('Flow').get_variables()[0]
v.result.hvplot()

In [14]:
v=data.get_facility('White Slough Water Pollution Control Facility').get_location('EFF-001').get_parameter('Flow').get_variables()[0]

In [15]:
v.df.hvplot.table()

In [16]:
sac_facility_names = data.get_facility_names(region_names[0])
print('expect facilities for a region to be less in number than all regions')
assert len(sac_facility_names) < len(facility_names)

expect facilities for a region to be less in number than all regions


In [17]:
df.location_place_type.unique()

['Effluent Monitoring', 'Influent Monitoring', 'Groundwater Monitoring', 'Receiving Water Monitoring', 'Recycled Water Monitoring', 'Internal Monitoring', 'Biosolids Monitoring', NaN, 'Stormwater Monitoring', 'Non-point Source Monitoring']
Categories (9, object): ['Effluent Monitoring', 'Influent Monitoring', 'Receiving Water Monitoring', 'Biosolids Monitoring', ..., 'Recycled Water Monitoring', 'Groundwater Monitoring', 'Stormwater Monitoring', 'Non-point Source Monitoring']

In [18]:
facility = data.get_facility('City of Jackson WWTP')
facility

Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642)

In [19]:
facility.get_location_names()

array(['EFF-001', 'INF-001', 'RSW-001', 'RSW-002', 'SPL-001', 'UVS-001'],
      dtype='<U64')

In [20]:
location = facility.get_location(facility.get_location_names()[0])
location

Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
       'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
      dtype='<U265'))

In [21]:
location.get_parameter('Flow')

Parameter(name='Flow', location=Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
       'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
      dtype='<U265')))

In [22]:
location.get_parameter_names()

array(['Chronic Toxicity', 'Temperature', 'Flow',
       'Biochemical Oxygen Demand (BOD) (5-day @ 20 Deg. C)', 'pH',
       'Total Suspended Solids (TSS)', 'Chlorine, Total Residual',
       'Methyl Tert-butyl Ether (MTBE)', 'Ammonia, Total (as N)',
       'Turbidity', 'Dissolved Oxygen',
       'Electrical Conductivity @ 25 Deg. C', 'Settleable Solids',
       'Total Coliform', 'Nitrate, Total (as NO3)',
       'Total Dissolved Solids (TDS)', 'Acute Toxicity',
       'Mercury, Total Recoverable', 'Bromoform', 'Chloroform',
       'Dichlorobromomethane', 'Tetrachloroethene',
       'Nitrite Plus Nitrate (as N)', 'Nitrate, Total (as N)',
       'Copper, Total Recoverable', 'Zinc, Total Recoverable',
       'Nitrite, Total (as N)',
       'Total Suspended Solids (TSS), Percent Removal',
       '2,6-Dinitrotoluene', 'Diazinon', 'Cyanide, Total (as CN)',
       'BOD5 @ 20 Deg. C, Percent Removal', '1,2-Diphenylhydrazine',
       'Iron, Total Recoverable', 'Dibromochloromethane',
       'M

In [23]:
location.get_parameter('Flow').get_variables()

[Variable(name='Flow', calculated_method='', units='MGD', parameter=Parameter(name='Flow', location=Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
        'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
       dtype='<U265'))), analytical_method_code='DU', qualifier='='),
 Variable(name='Flow', calculated_method='Average Monthly (AMEL)', units='MGD', parameter=Parameter(name='Flow', l

In [24]:
location.get_parameter('Flow').get_variables()[0].result.hvplot()

In [25]:
location.get_parameter(
    'Electrical Conductivity @ 25 Deg. C').get_variables()[0].result.hvplot.scatter()

In [26]:
vars = location.get_parameter('Flow').get_variables()
for v in vars:
    print(v)

Variable(name='Flow', calculated_method='', units='MGD', parameter=Parameter(name='Flow', location=Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
       'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
      dtype='<U265'))), analytical_method_code='DU', qualifier='=')
Variable(name='Flow', calculated_method='Average Monthly (AMEL)', units='MGD', parameter=Parameter(name='Flow', locati

In [27]:
vars[1].result.resample('M').mean().to_period().hvplot.bar(rot=90)

TypeError: bad operand type for abs(): 'pandas._libs.tslibs.offsets.MonthEnd'

:Bars   [sampling_datetime]   (Flow [Average Monthly (AMEL)] (MGD) )

In [28]:
location.get_parameter('Flow').get_variables()[0].result.hvplot()

In [29]:
location.get_parameter('Temperature').get_variables()[0].result.hvplot()

In [30]:
for i, p in enumerate(location.get_parameter_names()):
    print(location.get_parameter(p).name)
    if i > 5:
        break;

Chronic Toxicity
Temperature
Flow
Biochemical Oxygen Demand (BOD) (5-day @ 20 Deg. C)
pH
Total Suspended Solids (TSS)
Chlorine, Total Residual


In [31]:
# read analytical_method_code from here
analytical_codes = pd.read_csv('../tests/data/analytical_method_list.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../tests/data/analytical_method_list.csv'

In [32]:
# .analytical_method_code.unique()
location.get_parameter('1,2-Diphenylhydrazine')

Parameter(name='1,2-Diphenylhydrazine', location=Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
       'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
      dtype='<U265')))

In [33]:
location.get_parameter('1,2-Diphenylhydrazine')

Parameter(name='1,2-Diphenylhydrazine', location=Location(name='EFF-001', facility=Facility(name='City of Jackson WWTP', region='Region 5S - Sacramento', place_id=214642), place_id=array(['740325.0', '752557.0'], dtype='<U32'), place_type='Effluent Monitoring', desc=array(["A location where a representative sample of the effluent from the facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged into Jackson Creek. [Latitude: 38� 30' 28  N; Longitude: -120� 14' 04  W]",
       'A location where a representative sample of the effluent from the Facility can be collected after all treatment processes and prior to commingling with other waste streams or being discharged to Jackson Creek.'],
      dtype='<U265')))

In [34]:
parameters = ['Flow', 'Temperature', 'pH',
              'Electrical Conductivity @ 25 Deg. C', 'Chloroform', 'Dissolved Oxygen']
hv.Layout([location.get_parameter(p).get_variables()[0].result.hvplot()
          for p in parameters]).cols(1)