In [1]:
import json

from pymongo import MongoClient

import numpy as np
import matplotlib.pyplot as plt
import line_profiler

from helpers import *
from imputation import *

from bokeh.io import output_notebook, push_notebook
from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.models import *

from ipywidgets.widgets import Button, HBox
from IPython.display import display

client = MongoClient("localhost", 27017)
db = client['usgs']
output_notebook()
%load_ext Cython

In [2]:
sid = json.load(open('revision_list_d.json'))['to_review']

In [25]:
si = 11
print(sid[si])

Tm, Zm, Tc, Zc = get_data(db, sid[si])
Zm = feet_to_meters(Zm)
Zc = feet_to_meters(Zc)

dt, Ym, Yc = align_measurements(Tm, Zm, Tc, Zc)
print(Ym.size, Yc.size, dt)

4185000
65015 65015 1800


In [26]:
Ym[Ym <= 0] = -1
Yc[Yc <= 0] = -1

Ym = fill_gaps(Ym, max_gap = 4, spike_size = 2, window_size = 10)
Yc = fill_gaps(Yc, max_gap = 4, spike_size = 2, window_size = 10)

y_true = mark_anomaly(Ym, Yc, 0.02)

Ym[Ym <= 0] = np.nan
Yc[Yc <= 0] = np.nan

n = 1000
k = 50
i = k
count = 0
idx = list()   

source = ColumnDataSource(
    data = dict(
        x  = np.arange(Ym.size) * dt / 3600, 
        m  = Ym, 
        c  = Yc, 
        a  = y_true
    )
)

gh_tools = [BoxZoomTool(), WheelZoomTool(dimensions=['width']), WheelZoomTool(dimensions=['height']), PanTool(), 
            SaveTool()]

gh_plot = figure(title = 'Site %i' % sid[si], 
                 plot_height = 300, 
                 plot_width = 950, 
                 tools = gh_tools, 
                 toolbar_location = "above", 
                 x_axis_label = "Time, hours",
                 y_axis_label = "Gage Height, meters",
                 y_range = (-1, Zc.max()))

anomaly_plot = figure(title = 'Site %i. Is Anomaly' % sid[si], plot_height = 200, plot_width = 950, y_range = (-0.5,1.5), 
                      x_range = gh_plot.x_range, toolbar_location = "above")

gh_plot.line("x", "m",  source = source, color = '#ff0000', line_width = 2, legend="GH Measured")
gh_plot.line("x", "c",  source = source, color = '#0000ff', line_width = 2, legend="GH Computed")

gh_plot.ygrid.minor_grid_line_color = 'navy'
gh_plot.ygrid.minor_grid_line_alpha = 0.1

gh_plot.xgrid.minor_grid_line_color = 'navy'
gh_plot.xgrid.minor_grid_line_alpha = 0.1

anomaly_plot.line  ("x", "a", source = source, color = '#000000', line_width = 2)

show(column(gh_plot, anomaly_plot))



In [13]:
si = 6
print(sid[si])

Tm, Zm, Tc, Zc = get_data(db, sid[si])
Zm = feet_to_meters(Zm)
Zc = feet_to_meters(Zc)

dt, Ym, Yc = align_measurements(Tm, Zm, Tc, Zc)

Ym[Ym <= 0] = -1
Yc[Yc <= 0] = -1

Ym = fill_gaps(Ym, max_gap = 15, spike_size = 2, window_size = 10)
Yc = fill_gaps(Yc, max_gap = 15, spike_size = 2, window_size = 10)

8095300


In [14]:
Ym.size

130028

Ym[:100]

In [12]:
np.unique(Tm[1:] - Tm[:-1], return_counts = True)

(array([   300,    600,    900,   1800,   2400,   2700,   3300,   3600,
          4500,   6000,  95400, 645900], dtype=int32),
 array([ 38825,   1849, 114931,      1,      1,      3,      1,      6,
             6,      1,      1,      1]))