In [1]:
import pandas as pd
import numpy as np

# **UKWIR DATASET Visualization**

### This is a notebook to showcase the recently released UKWIR dataset on water quality from effluents of sewage treatment plants and surface water. An interactive Web application is beign created from the dataset.


**Source: [UKWIR- Chemical Investigation Programme](https://ukwir.org/chemical-investigations-programme-EIR-Database)

In [45]:
#Import cleaned Dataset
#Rows with null values were removed and the data with dates before 2015 were also removed - dates are suppoed to be from 2015 - 2020 according to the documentation.

swt_data = pd.read_csv('effluent_data.csv')
swt_data

Unnamed: 0,SampleDateTime,TreatmentPlant,SampleDateTime.1,SampleLocationName,SampleValue,NameDeterminandName,UnitsName,BelowMinReading,Year,Month
0,2015-11-19,Driffield STW,2015-11-19,Treatment Effluent,0.0140,triclosan,micrograms per litre,Yes,2015,11
1,2015-04-06,Pen-y-bont STW,2015-04-06,Treatment Effluent,0.0005,BDE 99,micrograms per litre,Yes,2015,4
2,2015-04-06,Pen-y-bont STW,2015-04-06,Treatment Effluent,0.0005,BDE 100,micrograms per litre,Yes,2015,4
3,2015-04-06,Pen-y-bont STW,2015-04-06,Treatment Effluent,0.0005,BDE 153,micrograms per litre,Yes,2015,4
4,2015-04-06,Pen-y-bont STW,2015-04-06,Treatment Effluent,0.0005,BDE 154,micrograms per litre,Yes,2015,4
...,...,...,...,...,...,...,...,...,...,...
638118,2020-01-28,Nettleham STW,2020-01-28,Treatment Effluent,0.0002,Tributyltin,micrograms per litre,Yes,2020,1
638119,2020-01-28,Nettleham STW,2020-01-28,Treatment Effluent,0.1000,Octylphenols,micrograms per litre,Yes,2020,1
638120,2020-01-28,Nettleham STW,2020-01-28,Treatment Effluent,0.1300,Nonylphenol,micrograms per litre,No,2020,1
638121,2020-01-28,Nettleham STW,2020-01-28,Treatment Effluent,19.3000,total oxidised nitrogen as N,milligrammes per litre,No,2020,1


In [48]:
swt_data.SampleDateTime = pd.to_datetime(swt_data.SampleDateTime, format='%m/%d/%Y')

In [49]:
import plotly.graph_objs as go
import plotly.express as px


In [50]:
T_plant = ['Nettleham STW', 'Diss STW']

print(len(T_plant))

2


In [37]:
chemicals =  swt_data.NameDeterminandName.unique()

chemical = ['total phosphorus as P', 'soluble reactive phosphate as P',
       'ammoniacal nitrogen as N', 'nickel dissolved', 'nickel total',
       'lead total', 'copper total', 'lead dissolved',
       'mono nonylphenol ethoxylates', 'Biochemical Oxygen Demand',
       'total organic carbon', 'total suspended solids',
       'total oxidised nitrogen as N', 'Chemical Oxygen Demand',]


print(len(chemical))

14


In [54]:
BOD = swt_data[swt_data['NameDeterminandName']== 'Biochemical Oxygen Demand']

BOD.sort_values(by=['SampleDateTime'], inplace = True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [56]:
BOD_diss = BOD[BOD['TreatmentPlant'] == 'Diss STW' ]

BOD_diss

Unnamed: 0,SampleDateTime,TreatmentPlant,SampleDateTime.1,SampleLocationName,SampleValue,NameDeterminandName,UnitsName,BelowMinReading,Year,Month
454922,2018-05-12,Diss STW,2018-05-12,Treatment Effluent,3.2,Biochemical Oxygen Demand,milligrammes per litre,No,2018,5
487633,2018-09-18,Diss STW,2018-09-18,Treatment Effluent,1.91,Biochemical Oxygen Demand,milligrammes per litre,No,2018,9
450339,2018-10-30,Diss STW,2018-10-30,Treatment Effluent,2.69,Biochemical Oxygen Demand,milligrammes per litre,No,2018,10
558114,2019-01-23,Diss STW,2019-01-23,Treatment Effluent,2.38,Biochemical Oxygen Demand,milligrammes per litre,No,2019,1
571018,2019-02-28,Diss STW,2019-02-28,Treatment Effluent,1.91,Biochemical Oxygen Demand,milligrammes per litre,No,2019,2
571118,2019-04-06,Diss STW,2019-04-06,Treatment Effluent,1.48,Biochemical Oxygen Demand,milligrammes per litre,No,2019,4
570890,2019-04-24,Diss STW,2019-04-24,Treatment Effluent,2.0,Biochemical Oxygen Demand,milligrammes per litre,No,2019,4
553026,2019-06-12,Diss STW,2019-06-12,Treatment Effluent,2.77,Biochemical Oxygen Demand,milligrammes per litre,No,2019,6
600991,2019-06-26,Diss STW,2019-06-26,Treatment Effluent,2.08,Biochemical Oxygen Demand,milligrammes per litre,No,2019,6
558503,2019-08-08,Diss STW,2019-08-08,Treatment Effluent,0.83,Biochemical Oxygen Demand,milligrammes per litre,Yes,2019,8


In [58]:
fig = go.Figure([go.Scatter(x=BOD['SampleDateTime'], y=BOD[BOD['TreatmentPlant'] == 'Diss STW' ]['SampleValue'])])
fig.show()

In [55]:
# Plot of line graph of parameters time against SWP



quality = [{
    'x': BOD['SampleDateTime'],
    'y': BOD[BOD['TreatmentPlant']== plant]['SampleValue'],
    'name' : plant,
    'mode': 'lines'

} for plant in T_plant]


layout = go.Layout(title='BOD values in selected Treatment sites')

fig = go.Figure (data=quality, layout=layout)

fig.show()