In [None]:
import math
import numpy as np
import scipy
import scipy.stats
from scipy.stats import gamma
import pandas as pd
import altair as alt
from detail.altairdf import altairDF
alt.renderers.enable("notebook")

In [None]:
plotdf = altairDF(np.linspace(0, 20, num=20).tolist(), \
                  [lambda x: gamma.cdf(x, 1.)], \
                  labels = ["gamma(1.0)"], ycol="CDF")
chart = alt.Chart().mark_line().encode(x="x", y="CDF", color="color")
rule = alt.Chart().mark_rule(color="red").encode(x='maximum:Q')
alt.layer(chart, rule, data=plotdf).transform_calculate(maximum="10")

In [None]:
sum([max(gamma.rvs(1.0, size=100)) for x in range(10)])/10

In [None]:
sum([max(gamma.rvs(1.0, size=1000)) for x in range(10)])/10

In [None]:
sum([max(gamma.rvs(1.0, size=10000)) for x in range(10)])/10

In [None]:
def extremeCDF(x, n, cdf):
    return math.pow(cdf(x), n)

In [None]:
ssv = [100, 1000, 10000, 100000]
pd.DataFrame({
    'ss': ssv,
    'evCDF': [extremeCDF(10, ss, lambda x: gamma.cdf(x, 1.)) for ss in ssv]
})

In [None]:
from detail.tdigest import TDigest

class ExtremeValueAnomalyDetector(object):
    def __init__(self, td):
        self.td = td
    
    def anomaly(self, xmax, n):
        p = 1 - math.pow(self.td.cdf(xmax), n)
        p = max(p, 1e-100)
        return -math.log(p)

In [None]:
sketch = TDigest(compression = 0.05)
for x in gamma.rvs(1.0, size = 100000):
    sketch.update(x)
detector = ExtremeValueAnomalyDetector(sketch)

In [None]:
plotdf = altairDF(range(13), \
                  [lambda x: detector.anomaly(x, 100),lambda x: detector.anomaly(x, 1000),lambda x: detector.anomaly(x, 10000)], \
                  labels = ["100", "1000", "10000"], \
                  xcol="xmax", ycol="anomaly-score", ccol="sample-size")
alt.Chart(plotdf).mark_line(point=True,clip=True).encode(alt.Y('anomaly-score', scale=alt.Scale(domain=(0, 7))), x="xmax", color="sample-size")