![title](DEP_LOGO_2.jpg)

# Trend Station Water Quality Data 
## Web Application Demo - Expanded Parameters

In [25]:
#HIDDEN

import ipywidgets as widgets
from ipywidgets import interact
import pandas as pd
import csv
from geopy.geocoders import Nominatim
from time import sleep
import folium
import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
from folium.plugins import MarkerCluster
import seaborn as sns
from scipy import stats
from scipy.stats import linregress

pd.set_option('display.max_columns', 30)

## Explore Available Trend Stations:

In [26]:
#HIDDEN

dfhead = pd.read_csv('gm_export_TS_Location.csv')
dfhead = dfhead.fillna(0)
dfheadloc = dfhead[dfhead.latitude != 0]

geolocator = Nominatim(user_agent="specify_your_app_name_here")
location = geolocator.geocode("Flatwoods, WV")

longitude = location.longitude
latitude = location.latitude

# create map of West Virginia using latitude and longitude values
ts_locations = folium.Map(control_scale=True, width='100%', height='100%', location=[latitude, longitude], tiles="OpenStreetMap", zoom_start=7.4)

#folium.TileLayer('MapQuest Open Aerial').add_to(ts_locations)

# add markers to map
for lat, lng, label in zip(dfheadloc['latitude'], dfheadloc['longitude'], dfheadloc['station_id']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7,
        parse_html=True).add_to(ts_locations)  

ts_locations

## Select Trend Station:

In [27]:
#HIDDEN

def unique_sorted_values(array):
    unique = array.unique().tolist()
    unique.sort()
    return unique

df_comb = pd.read_csv('TS_New_Combined_Final.csv')
df_comb = df_comb.fillna(0)

dfhead = pd.read_csv('gm_export_TS_Location.csv')
dfhead = dfhead.fillna(0)
dfheadloc = dfhead[dfhead.latitude != 0]

output_two = widgets.Output()
output_three = widgets.Output()
output_four = widgets.Output()
output_five = widgets.Output()
output_six = widgets.Output()
plot1_output = widgets.Output()
plot_output = widgets.Output()

dropdown_stations = widgets.Dropdown(options = unique_sorted_values(df_comb.station_id), description='Trend Station')


def common_filtering1(station):
    
    output_two.clear_output()
    output_three.clear_output()
    output_four.clear_output()
    output_five.clear_output()
    output_six.clear_output()
    plot_output.clear_output()
    
    df_comb = pd.read_csv('TS_New_Combined_Final.csv')
    df_comb = df_comb.fillna(0)
    
    common_filter1 = df_comb[(df_comb.station_id == station)]
    
    global ts
    ts = dropdown_stations.value
    
    df_comb = df_comb[df_comb['station_id'].str.contains(str(ts), na=False)]
        
    group = df_comb.groupby('param_name')
    df2 = pd.DataFrame(group.apply(lambda x: x['test_batch_id'].unique()))

    df2.reset_index(inplace=True)
    df2 = df2.rename({0: 'DATES'}, axis='columns')

    dates = df2[df2.param_name == 'BERYLLIUM'].DATES.item()

    initial_date = dates[0]
    
    df_comb_date = common_filter1
    df_comb_date = df_comb_date[df_comb_date['test_batch_id'] == initial_date]
    df_comb_date = df_comb_date.drop(['station_id', 'sample_id', 'fraction', 'result_unit', 'test_batch_id'], axis=1)
    df_comb_date = df_comb_date.set_index('param_name').transpose()

    #df_comb_date = df_comb_date.astype('float')
    df_comb_date.columns = ['FIELD ph', 'FIELD CONDUCTIVITY','TEMPERATURE','FLOW', 'LAB CONDUCTIVITY', 'SULFATE', 'DISSOLVED SOLIDS', 'SUSPENDED SOLIDS', 'LAB ph', 'ALKALINITY', 'HOT ACIDITY', 'MINERAL ACIDITY', 'IRON_D', 'MANGANESE_D', 'ALUMINUM_D', 'IRON_T', 'MANGANESE_T', 'ALUMINUM_T', 'ANTIMONY', 'ARSENIC', 'BERYLLIUM', 'CADMIUM', 'CHROMIUM', 'COPPER', 'LEAD', 'MERCURY', 'NICKEL', 'SELENIUM', 'SILVER', 'THALLIUM', 'ZINC', 'CYANIDE', 'PHENOLS', 'NITRATE', 'SODIUM', 'CALCIUM', 'MAGNESIUM', 'POTASSIUM', 'BI-CARBONATE', 'CARBONATE', 'PHOSPHATE']

    global df_comb_concat
    df_comb_concat = []
    df_comb_concat = pd.concat([df_comb_date])

    as_list = df_comb_concat.index.tolist()
    idx = as_list.index('result_value')
    as_list[idx] = initial_date
    df_comb_concat.index = as_list

    for i, date in enumerate(dates):
    
        df_comb_date = common_filter1
        df_comb_date = df_comb_date[df_comb_date['test_batch_id'] == dates[i]]
        df_comb_date = df_comb_date.drop(['station_id', 'sample_id', 'fraction', 'result_unit', 'test_batch_id'], axis=1)
        df_comb_date = df_comb_date.set_index('param_name').transpose()
    
        #df_comb_date = df_comb_date.astype('float')

        df_comb_date.columns = ['FIELD ph', 'FIELD CONDUCTIVITY','TEMPERATURE','FLOW', 'LAB CONDUCTIVITY', 'SULFATE', 'DISSOLVED SOLIDS', 'SUSPENDED SOLIDS', 'LAB ph', 'ALKALINITY', 'HOT ACIDITY', 'MINERAL ACIDITY', 'IRON_D', 'MANGANESE_D', 'ALUMINUM_D', 'IRON_T', 'MANGANESE_T', 'ALUMINUM_T', 'ANTIMONY', 'ARSENIC', 'BERYLLIUM', 'CADMIUM', 'CHROMIUM', 'COPPER', 'LEAD', 'MERCURY', 'NICKEL', 'SELENIUM', 'SILVER', 'THALLIUM', 'ZINC', 'CYANIDE', 'PHENOLS', 'NITRATE', 'SODIUM', 'CALCIUM', 'MAGNESIUM', 'POTASSIUM', 'BI-CARBONATE', 'CARBONATE', 'PHOSPHATE']
    
        df_comb_concat = pd.concat([df_comb_concat, df_comb_date])
    
        as_list = df_comb_concat.index.tolist()
        idx = as_list.index('result_value')
        as_list[idx] = dates[i]
        df_comb_concat.index = as_list

        df_comb_concat.index.name = str(dropdown_stations.value)
        
    df_comb_concat = df_comb_concat.fillna(0)
    
    df_comb_concat = df_comb_concat.loc[:, (df_comb_date != 0).any(axis=0)]

    df_comb_concat.index = pd.to_datetime(df_comb_concat.index)

    df_comb_concat = df_comb_concat.iloc[1:]
  
    global dfdesc
    dfdesc = df_comb_concat.describe()
    dfdesc.index.name = str(ts)

    global dfcorr
    dfcorr = df_comb_concat.corr()
    dfcorr.index.name = str(ts)
    
    global common_filter
    common_filter = dfheadloc[(dfheadloc.station_id == station)]    
    
    with output_two:
        display(common_filter)
    
    with output_three:
        display(common_filter1)
        
    with output_four:
        display(df_comb_concat)
    
    with output_five:
        display(dfdesc)
        
    with output_six:
        display(dfcorr)
              
    with plot_output:
        width = 50
        height = 50
        fig = plt.figure(figsize=(width, height))
        ax1 = fig.add_subplot(111)
        ax1.grid(True)
        labels=['FIELD ph', 'FIELD CONDUCTIVITY','TEMPERATURE','FLOW', 'LAB CONDUCTIVITY', 'SULFATE', 'DISSOLVED SOLIDS', 'SUSPENDED SOLIDS', 'LAB ph', 'ALKALINITY', 'HOT ACIDITY', 'MINERAL ACIDITY', 'IRON_D', 'MANGANESE_D', 'ALUMINUM_D', 'IRON_T', 'MANGANESE_T', 'ALUMINUM_T', 'ANTIMONY', 'ARSENIC', 'BERYLLIUM', 'CADMIUM', 'CHROMIUM', 'COPPER', 'LEAD', 'MERCURY', 'NICKEL', 'SELENIUM', 'SILVER', 'THALLIUM', 'ZINC', 'CYANIDE', 'PHENOLS', 'NITRATE', 'SODIUM', 'CALCIUM', 'MAGNESIUM', 'POTASSIUM', 'BI-CARBONATE', 'CARBONATE', 'PHOSPHATE']
        ax1.set_xticklabels(labels,fontsize=28)
        ax1.set_yticklabels(labels,fontsize=28)
        sns.heatmap(df_comb_concat.corr(), annot=True, fmt=".2f", annot_kws={"size": 25},cbar_kws={'label': 'Pearson R'})
        plt.title('Trend Station Parameter Comparison - ' + ts, fontsize=70)
        ax1.figure.axes[-1].set_ylabel('Pearson R', size=50)
        plt.show()
    
def dropdown_stations_eventhandler(change):
    common_filtering1(change.new)


dropdown_stations.observe(dropdown_stations_eventhandler, names='value')  
  

display(dropdown_stations)

Dropdown(description='Trend Station', options=('TS001', 'TS002', 'TS003', 'TS004', 'TS005', 'TS006', 'TS007', …

## Selected Trend Station Location Information:

In [28]:
#HIDDEN

display(output_two)

Output()

## Water Quality Data from Selected Trend Station:

In [29]:
#HIDDEN

display(output_four)

Output()

## Water Quality Statistics for Selected Trend Station Data:

In [30]:
#HIDDEN

display(output_five)

Output()

## Pearson R Coefficients for Selected Trend Station Water Quality Parameters:

In [31]:
#HIDDEN

display(output_six)

Output()

## Plot Water Quality Time-Series Data from Selected Trend Station:

In [32]:
#HIDDEN

labels=['FIELD ph', 'FIELD CONDUCTIVITY','TEMPERATURE','FLOW', 'LAB CONDUCTIVITY', 'SULFATE', 'DISSOLVED SOLIDS', 'SUSPENDED SOLIDS', 'LAB ph', 'ALKALINITY', 'HOT ACIDITY', 'MINERAL ACIDITY', 'IRON_D', 'MANGANESE_D', 'ALUMINUM_D', 'IRON_T', 'MANGANESE_T', 'ALUMINUM_T', 'ANTIMONY', 'ARSENIC', 'BERYLLIUM', 'CADMIUM', 'CHROMIUM', 'COPPER', 'LEAD', 'MERCURY', 'NICKEL', 'SELENIUM', 'SILVER', 'THALLIUM', 'ZINC', 'CYANIDE', 'PHENOLS', 'NITRATE', 'SODIUM', 'CALCIUM', 'MAGNESIUM', 'POTASSIUM', 'BI-CARBONATE', 'CARBONATE', 'PHOSPHATE']

dropdown_param = widgets.Dropdown(options = labels, description='Parameter')
button1 = widgets.Button(description='Display Plot')

plot1_output = widgets.Output()

def common_filtering5(param):
    plot1_output.clear_output()
    
    
    with plot1_output:
        if param not in df_comb_concat.columns:
            print('NO DATA EXISTS FOR THE CHOSEN STATION/PARAMETER COMBINATION! PLEASE SELECT ANOTHER PARAMETER.')
    
        else:
            plt.figure(figsize=(20,8));
            plt.plot(df_comb_concat[param], 'go--', linewidth=1, markersize=6);
            plt.grid();
            plt.ylabel(param, fontsize=15);
            plt.xlabel('Date', fontsize=15);
            plt.title(str(ts), fontsize=25);
            plt.show();
    
def dropdown_param_eventhandler(change):
    common_filtering5(change.new)

dropdown_param.observe(dropdown_param_eventhandler, names='value')  

display(dropdown_param)
display(button1)

Dropdown(description='Parameter', options=('FIELD ph', 'FIELD CONDUCTIVITY', 'TEMPERATURE', 'FLOW', 'LAB CONDU…

Button(description='Display Plot', style=ButtonStyle())

In [33]:
#HIDDEN

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

display(plot1_output)

Output()

## Trend Station Parameter Correlogram:

In [34]:
#HIDDEN

display(plot_output)

Output()

## Trend Station Parameter Comparison by Linear Regression:

In [35]:
#HIDDEN

labels=['FIELD ph', 'FIELD CONDUCTIVITY','TEMPERATURE','FLOW', 'LAB CONDUCTIVITY', 'SULFATE', 'DISSOLVED SOLIDS', 'SUSPENDED SOLIDS', 'LAB ph', 'ALKALINITY', 'HOT ACIDITY', 'MINERAL ACIDITY', 'IRON_D', 'MANGANESE_D', 'ALUMINUM_D', 'IRON_T', 'MANGANESE_T', 'ALUMINUM_T', 'ANTIMONY', 'ARSENIC', 'BERYLLIUM', 'CADMIUM', 'CHROMIUM', 'COPPER', 'LEAD', 'MERCURY', 'NICKEL', 'SELENIUM', 'SILVER', 'THALLIUM', 'ZINC', 'CYANIDE', 'PHENOLS', 'NITRATE', 'SODIUM', 'CALCIUM', 'MAGNESIUM', 'POTASSIUM', 'BI-CARBONATE', 'CARBONATE', 'PHOSPHATE']

dropdown_param1 = widgets.Dropdown(options = labels, description='Parameter: X')
dropdown_param2 = widgets.Dropdown(options = labels, description='Parameter: Y')
button2 = widgets.Button(description='Display Plot')

plot2_output = widgets.Output()

def common_filtering6(param1, param2):
    plot2_output.clear_output()
    
    with plot2_output: 
        
        if param1 not in df_comb_concat.columns:
            print('NO DATA EXISTS FOR THE CHOSEN STATION/PARAMETER COMBINATION! PLEASE SELECT ANOTHER PARAMETER.')
        elif param2 not in df_comb_concat.columns:
            print('NO DATA EXISTS FOR THE CHOSEN STATION/PARAMETER COMBINATION! PLEASE SELECT ANOTHER PARAMETER.')
        else:
            width = 20;
            height = 8;
            plt.figure(figsize=(width, height));
            plt.ylabel(param2, fontsize=15);
            plt.xlabel(param1, fontsize=15);
            sns.regplot(df_comb_concat[param1], df_comb_concat[param2], color='green').set_title(ts, fontsize=20);
            plt.show()
            pearson_coef, p_value = stats.pearsonr(df_comb_concat[param1], df_comb_concat[param2]);
            print("The Pearson Correlation Coefficient is", pearson_coef, " with a P-value of P =", p_value, " for parameters", param1, "(X) and", param2, "(Y)");  
            
            
def dropdown_param1_eventhandler(change):
    common_filtering6(change.new, dropdown_param2.value)

def dropdown_param2_eventhandler(change):
    common_filtering6(dropdown_param1.value, change.new)

dropdown_param1.observe(dropdown_param1_eventhandler, names='value')
dropdown_param2.observe(dropdown_param2_eventhandler, names='value')

display(dropdown_param1)
display(dropdown_param2)
display(button2)

Dropdown(description='Parameter: X', options=('FIELD ph', 'FIELD CONDUCTIVITY', 'TEMPERATURE', 'FLOW', 'LAB CO…

Dropdown(description='Parameter: Y', options=('FIELD ph', 'FIELD CONDUCTIVITY', 'TEMPERATURE', 'FLOW', 'LAB CO…

Button(description='Display Plot', style=ButtonStyle())

In [36]:
#HIDDEN

display(plot2_output)

Output()