In [81]:
'''
GEOG5790 - Programming for Geographical Information Analysis: Advanced Skills
Independent Project - EA WIMS Water Quality Data Analyser/Viewer

Anne Harding (200754573)
05/05/2019

interactive_plot.ipynb

Jupyter notebook for user to view and analyse data for user-specified .csv file
containing WQ data (using Text widget for user input) and for a specified determinand
(using Dropdown widget for user input).
'''
# -----------------------------------------------------------------------------------
# STEP ONE: IMPORT MODULES AND LOAD INPUT FILE FOR ANALYSIS.
# -----------------------------------------------------------------------------------

%matplotlib inline

# Import modules:
import os
import pandas as pd
import numpy as np
import folium
from matplotlib import pyplot
import ipywidgets as widgets
from IPython.display import display
from plotly.offline import download_plotlyjs, init_notebook_mode, plot
from convertbng.util import convert_lonlat

# Set up Text widget for user to add filepath for datafile:
file_input = widgets.Text(
    value='...',
    description='Data file:',
    disabled=False
)
# Display widget:
print("Please select .csv file for analysis:")
display(file_input)

# Note: Would like a file browser window here, but I don't think a widget exists to do so in Jupyter.

Please select .csv file for analysis:


Text(value='...', description='Data file:')

In [4]:
# -----------------------------------------------------------------------------------
# STEP TWO: CONFIRM INPUT FILE IS APPROPRIATE FOR ANALYSIS AND DISPLAY DROPDOWN
# WIDGET FOR USER TO SELECT DETERMINAND FOR ANALYSIS.
# -----------------------------------------------------------------------------------

# Get path for input data file from Text widget:
datafile = file_input.value

# Check if file exists:
if os.path.isfile(datafile):
    # Get file extension:
    filename, extension = os.path.splitext(datafile)
    # Print statement to manually check file extension:
    # print(extension)
    # Check if file extension is .csv:
    if extension != '.csv':
        raise ValueError("File must be .csv format.")
    else:
        print("Input data file accepted.")
        # Read .csv file into a pandas dataframe:
        print("Reading data.")
        df = pd.read_csv(datafile)
        # Print statement to manually check dataframe:
        # print(df)
        # Get list of unique determinands from dataframe:
        dets = df["determinand.definition"].unique()
        # Print statement to manually check unique list of determinands:
        # print(dets)
else:
    raise ValueError("File does not exist.")

# Set up Dropdown widget for user to choose determinand of interest:
det_dd = widgets.Dropdown(
    options=dets,
    value=dets[0],
    description='Determinand:',
    disabled=False,
)
# Display widget:
print("Please select determinand of interest:")
display(det_dd)

Input data file accepted.
Reading data.
Please select determinand of interest:


Dropdown(description='Determinand:', options=('Solids, Suspended at 105 C', 'BOD : 5 Day ATU', 'Iron', 'pH', '…

In [80]:
# -----------------------------------------------------------------------------------
# STEP THREE: PREPARE DATA FOR PLOTTING, PRODUCE PLOT AND CALCULATE STATISTICS.
# -----------------------------------------------------------------------------------
# PREPARE DATA:

# Obtain chosen determinand for analysis from Dropdown widget:
chosen_det = det_dd.value
# Print statement to manally check chosen determinand:
# print(chosen_det)

# Filter dataframe to only keep values for chosen determinand:
df_filtered = df[df['determinand.definition'] == chosen_det]
# Print statement to check filtered dataframe:
# print(df_filtered)

# Get standard units for chosen determinand:
units = df_filtered["determinand.unit.label"].unique()[0]
# Print statement to manually check units:
# print(units)

# Get list of sampling points from filtered dataframe:
locs = df_filtered["sample.samplingPoint.notation"].unique()
# Print statement to manually check unique list of sampling points:
# print(locs)

# Sort df_filtered by date:
df_ordered = df_filtered.sort_values(by='sample.sampleDateTime')
# print(df_ordered)

# -----------------------------------------------------------------------------------
# DIRECTORY AND FILENAMES FOR SAVING PLOTS:

# Get directory of input file:
dir = os.path.dirname(datafile)
# Define path for subdirectory to save plots:
plots_dir = os.path.join(dir, "plots")

# Try to create output directory:
try:
    os.mkdir(plots_dir)
    print("Directory {} created.".format(plots_dir))
# If output directory already exists, raise FileExistsError:
except FileExistsError:
    # print("Directory {} already exists.".format(plots_dir))
    pass

# Create filename for saving plot:
plot_filename = os.path.join(plots_dir, chosen_det + "_plot.html")
# Create filename for saving map:
map_filename = os.path.join(plots_dir, chosen_det + "_map.html")

# -----------------------------------------------------------------------------------
# DATA PLOTTING:

# Plot data and save as .html file using filename:
fig = {
    'data': [
        {
            'x': df_ordered[df_ordered['sample.samplingPoint.notation']
                             ==loc]['sample.sampleDateTime'],
            'y': df_ordered[df_ordered['sample.samplingPoint.notation']
                             ==loc]['resultQualified'],
            'name': loc, 'mode': 'markers+lines',
        } for loc in locs
    ],
    'layout': {
        'xaxis': {'title': 'Date'},
        'yaxis': {'title': (chosen_det + " (" + str(units) + ")")}
    }
}

plot(fig, filename=plot_filename)

# -----------------------------------------------------------------------------------
# DESCRIPTIVE STATISTICS:

# Descriptive statistics for chosen determinand for each sampling point:
print("Descriptive statistics table for {}.".format(chosen_det + " (" + str(units) + ")"))
df_ordered['resultQualified'].groupby(df_ordered['sample.samplingPoint.notation']).describe()

# -----------------------------------------------------------------------------------
# MAPPING:

# Get list of x-coordinates for sampling points:
eastings = df_ordered["sample.samplingPoint.easting"].unique()
# Get list of y-coordinates for sampling points:
northings = df_ordered["sample.samplingPoint.northing"].unique()
# Get list of notations for sampling points:
notations = df_ordered["sample.samplingPoint.notation"].unique()

# Convert eastings, northings to longitude, latitude:
lonlat = convert_lonlat(eastings, northings)

# Create empty list to write coordinate pairs:
coords = []
# Get each pair of coordinates from lonlat:
for i in range(0, len(lonlat[0])):
    coord = [coord[i] for coord in lonlat]
    coord.append(notations[i])
    # Print statement to manually check coordinate pair:
    # print(coord)
    # Add coordinate pair to coords list:
    coords.append(coord)
# Print statement to manually check list of coordinate pairs:
# print(coords)

# Get notation and label for each sampling point:
for coord in coords:
    df_ordered['sample.samplingPoint.easting']

# Make empty map centered on London using OpenStreetMap background:
m = folium.Map(location=[51.4772, 0], tiles="openstreetmap", zoom_start=5)

# Add markers for sampling points selected:
for coord in coords:
    folium.Marker([coord[1], coord[0]], popup=coord[2]).add_to(m)
    
m.save(map_filename)

# -----------------------------------------------------------------------------------

Descriptive statistics table for Conductivity at 25 C (us/cm).
