In [27]:
# Imports
import math
import datetime
import pandas as pd
import numpy as np

from IPython.display import display
from ipywidgets import Button, HBox, VBox, widgets

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm

from scipy.stats import johnsonsu  # ,johnsonsb
import Outliers as grubbs

import statsmodels.api as sm

In [28]:
## Globals
LAG_LOWER_PERCENTILE=0.25
# percentile list
perc =[.05,.25,  .75, .95]
LVL=-1E11
UVL=1E11
LCL=-1E9
UCL=1E9
CL=0
limitsAlgo="Johnson Fit"
lagFilter=False
GrubbsFilter=True
# assign widget values to globals
def getWidgetValues():
    LVL=wLVL.value
    UVL=wUVL.value
    LCL=wLCL.value
    UCL=wUCL.value
    CL=wCL.value
    limitsAlgo=wAutoLimits.value
    lagFilter=wLag.value
    GrubbsFilter=wGrubbs.value
    wOut.clear_output()

In [29]:
## Read data
Data=pd.read_csv(r'test.csv',sep=',',header=0,names=['Date','Value'],parse_dates=['Date'])
print(f"Number of valid points:{Data.Value.dropna().count()}")

Number of valid points:103


In [30]:
def GetDataSets(oData):
    #fData=Data.query('Value>' + str(LVL) + ' and Value<' + str(UVL)).sort_values(by=['Date']).copy() 
    fData=oData[oData.Value.isin([LVL,UVL])]
    fData['OOC']=~fData.isin([LCL,UCL])
    
    fData['Grubbs']=False
    if GrubbsFilter:
        idx = grubbs.two_sided_test(fData.Value.to_numpy(), alpha=0.05)
        fData.loc[(~fData.Value.isin(idx)), "Grubbs"] = True
    
    fData['Lag'] = -fData.Date.diff().astype('timedelta64[h]')
    if lagFilter:
        lag_min = np.nanquantile(fData.Lag, LAG_LOWER_PERCENTILE)
        fData['LagFilter'] = ((fData.OOC == True) & (fData.OOC.shift(-1) == True) & (fData.Lag <= lag_min))
    else:
        fData['LagFilter'] = False
    return fData[~fData.LagFilter & ~fData.Grubbs]

In [31]:
## Functions
def doFilter(b):
    getWidgetValues()
    vData=GetDataSets(Data)
    with(wOut):
        print(f"Valid points:{vData.Value.count()}")
        vData["MovingRange"]=vData.loc[:,"Value"].diff().abs()
        mrMean,mrStd,mrCnt=vData.MovingRange.mean(),vData.MovingRange.std(),vData.MovingRange.dropna().count()
        vMean=vData.Value.mean()
        if limitsAlgo=="Johnson fit":
            try:
                res = johnsonsu.fit(vData.Value.to_numpy(), floc=0, fscale=1)
                UCL = johnsonsu.ppf(0.99, res[0], res[1])
                LCL = johnsonsu.ppf(0.01, res[0], res[1])
                CL = johnsonsu.ppf(0.5, res[0], res[1])
            except:
                limitsAlgo="None"
        elif limitsAlgo=="3s":
             LCL = vMean-3.0*mrMean/1.128
             UCL= vMean+3.0*mrMean/1.128
             CL=vMean
        elif limitsAlgo=="Percentiles":
             LCL = np.percentile(vData.Value,5.0)
             UCL= np.percentile(vData.Value,95.0)
             CL=np.percentile(vData.Value,50.0)
        if limitsAlgo!="None":
            wLCL.value=LCL
            wUCL.value=UCL
            wCL.value=CL 
        #chart setup 
        fig, axs = plt.subplots(3,1,sharex=False,figsize=(12, 15))
        fig.tight_layout(pad=0)
        
        #ax.set_title('Individual Chart')
        ax=axs[0]
        ax.set(xlabel='', ylabel='Value')
        line, = ax.plot(vData.Date,vData.Value,linestyle='-', marker='o', markersize=2)
        if UCL<1E9:
            ax.axhline(UCL, color='blue', linestyle='dashed')
        ax.axhline(CL, color='blue', linestyle='solid')
        if LCL>-1E9:
            ax.axhline(LCL, color='blue', linestyle='dashed')
        
        #ax.set_title('mR Chart')
        ax=axs[1]
        ax.set(xlabel='', ylabel='Range')
        line, = ax.plot(vData.Date,vData.MovingRange,linestyle='-', marker='o', markersize=2)
        ax.axhline(mrMean, color='blue', linestyle='solid')
        if mrMean>3.0*mrMean*0.8525:
            ax.axhline(mrMean-3.0*mrMean*0.8525, color='blue', linestyle='dashed')
        ax.axhline(mrMean+3.0*mrMean*0.8525, color='blue', linestyle='dashed')
        ax.set_ylim(bottom=0)

        #ax.set_title('Histogram')
        ax=axs[2]
        cx = np.sort(vData.Value.to_numpy())
        cy = 100.0*np.arange(len(cx))/(len(cx)-1)
        ax.hist(x=vData.Value, bins='auto',  alpha=0.5, rwidth=1.0, edgecolor="k", zorder=5, density=True)
        # sns.displot(data=vData, x="Value", kde=True)
        if limitsAlgo=="Johnson fit":
            rv = johnsonsu(res[0], res[1])
            jx = np.linspace(min(LCL,vData.Value.min()), max(UCL,vData.Value.max()), num=200)
            ax.plot(jx, rv.pdf(jx), lw=2, label="Johnson fit")
        else:
            kde = sm.nonparametric.KDEUnivariate(vData.Value)
            # Estimate the densities silverman normal_reference
            kde.fit(bw='scott', kernel='gau')
            ax.plot(kde.support, kde.density, lw=2, label="KDE from samples")
        if UCL<1E9:
            ax.axvline(UCL, color='blue', linestyle='dashed')
        ax.axvline(CL, color='blue', linestyle='solid')
        if LCL>-1E9:
            ax.axvline(LCL, color='blue', linestyle='dashed')
        ax.legend()
        plt.show()

In [32]:
## Widgets
wUVL=widgets.BoundedFloatText(
    value=UVL,
    min=-1E11,
    max=1E11,
    description='UVL:',
    disabled=False
)
wLVL=widgets.BoundedFloatText(
    value=LVL,
    min=-1E11,
    max=1E11,
    description='LVL:',
    disabled=False
)
wUCL=widgets.BoundedFloatText(
    value=UCL,
    min=-1E11,
    max=1E11,
    description='UCL:',
    layout={'width': 'max-content'}, 
    disabled=False
)
wLCL=widgets.BoundedFloatText(
    value=LCL,
    min=-1E11,
    max=1E11,
    description='LCL:',
    layout={'width': 'max-content'}, 
    disabled=False
)
wCL=widgets.BoundedFloatText(
    value=CL,
    min=-1E11,
    max=1E11,
    description='CL:',
    layout={'width': 'max-content'}, 
    disabled=False
)
wAutoLimits=widgets.Dropdown(
    options=['None', '3s', 'Johnson fit', 'Percentiles'],
    layout={'width': 'max-content'}, 
    description='Limits computation',
    disabled=False
)
wGrubbs=widgets.Checkbox(
    value=False,
    description='Grubbs OOC',
    disabled=False,
    indent=False
)
wLag=widgets.Checkbox(
    value=False,
    description='Filter repeated tests',
    disabled=False,
    indent=False
)
wLagDays=widgets.BoundedFloatText(
    value=1,
    min=0.01,
    max=100,
    description='Days:',
    disabled=False
)
button = widgets.Button(
    description='Run',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Filter data'
)
wOut = widgets.Output(layout={'border': '1px solid black'})
button.on_click(doFilter)

In [33]:
VBox([widgets.HTML(value="<b>Filters</b>:"),HBox([wLVL,wUVL]),wAutoLimits,HBox([wLCL,wCL,wUCL]),wGrubbs,HBox([wLag,wLagDays]),button,wOut])

VBox(children=(HTML(value='<b>Filters</b>:'), HBox(children=(BoundedFloatText(value=-100000000000.0, descripti…