In [None]:
import sys
sys.path.append('../../code/python')
import numpy as np
from collections import Counter
import data
import matplotlib.pyplot as plt
%matplotlib inline

from collections import Counter
class Irregularities(object):
    def __init__(self, x):
        self.x = x
        t = np.array(x.index, dtype='datetime64[m]')
        dt = np.diff(t).astype(float)
        self.c = Counter(dt)

        # look for indexes which are != the most common timestep on both sides
        self.d = np.r_[np.nan, dt, dt, np.nan].reshape((2,-1))
        
    def dt(self, dt):
        return (self.d == dt).any(0)
    
    def plot(self, dts):
        plt.figure(figsize=(15, 5))
        plt.plot(self.x)
        for dt in dts:
            plt.plot(self.x[self.dt(dt)], 'x', label='{}'.format(dt))
        plt.legend()

In [None]:
from sklearn import tree

tr = tree.DecisionTreeClassifier(min_samples_leaf = 1000)
def cut(df):
    t = np.array(df.index, dtype='datetime64[m]').astype(int)
    dt = np.diff(t)
    # use min of dt on either side of timestamp as label
    d = np.r_[dt[0], dt, dt, dt[-1]].reshape((2,-1)).min(0)
    x = t.reshape((-1, 1))
    tr.fit(x, d.T)
    cl = tr.predict(x)

    ix = np.where(np.diff(cl))[0]
    plt.figure(figsize=(min(15, len(ix)*6), 4))
    for i, j in enumerate(ix):
        plt.subplot(1, len(ix), i+1)
        plt.plot(df[j-9:j+11])
        plt.plot(df[j-9:j+1], 'x', label='{}'.format(cl[j]))
        plt.plot(df[j+1:j+11], 'o', label='{}'.format(cl[j+1]))
        plt.gca().set_xticks([df.index[j-4], df.index[j+6]])
        plt.gca().set_xticklabels([df.index[j-4].strftime('%Y-%m'), df.index[j+6].strftime('%Y-%m')])
        plt.gca().set_title(df.index[j].strftime('%Y-%m-%d %Hh'))
        plt.legend()

In [None]:
D = data.Data('../python/data.cfg')
D.open('r', '_raw')

In [None]:
Irregularities(D.r['5'].xs('avg', 1, 'aggr')).plot([5, 15, 30, 45, 60])

In [None]:
cut(D.r['5'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['6'].xs('avg', 1, 'aggr')).plot([5, 10, 15, 30, 45, 60])

In [None]:
cut(D.r['6'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['8'].xs('avg', 1, 'aggr')).plot([15, 30])

In [None]:
cut(D.r['8'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['BTG'].xs('avg', 1, 'aggr')).plot([10, 15, 30])

In [None]:
cut(D.r['BTG'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['MARPCH'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['MARPCH'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['ANDA'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['ANDA'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['COMB'].xs('avg', 1, 'aggr')).plot([5, 10, 15])

In [None]:
cut(D.r['COMB'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['CT'].xs('avg', 1, 'aggr')).plot([10, 15])

In [None]:
cut(D.r['CT'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['INILLA'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['INILLA'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['LSC'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['LSC'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['MET3CH'].xs('avg', 1, 'aggr')).plot([5, 10])

In [None]:
cut(D.r['MET3CH'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['MET3LP'].xs('avg', 1, 'aggr')).plot([5, 10])

In [None]:
cut(D.r['MET3LP'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['MIN'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['MIN'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['PC'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['PC'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['PLMCN'].xs('avg', 1, 'aggr')).plot([15, 60])

In [None]:
cut(D.r['PLMCN'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['PLV'].xs('avg', 1, 'aggr')).plot([ 15, 60])

In [None]:
cut(D.r['PLV'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['RPL'].xs('avg', 1, 'aggr')).plot([15, 60, 120])

In [None]:
cut(D.r['RPL'].xs('avg', 1, 'aggr'))

In [None]:
Irregularities(D.r['TLH'].xs('avg', 1, 'aggr')).plot([5, 15])

In [None]:
cut(D.r['TLH'].xs('avg', 1, 'aggr'))