In [None]:
# eBay Atlas

## Imports

In [1]:
# iPython-specific
import IPython

# standard libs
import pandas as pd
from pprint import pprint
from datetime import datetime, timedelta
from influxdb import DataFrameClient
import collections
import numpy as np
import math
# custom libs
import library.date_manipulation as date_manipulation
import library.stats.atlas as atlas
import library.stats.variation as stats_variation
import library.influx.query as influx_query

# dataset import
import library.dataset as dataset

## Global Variables

In [3]:
#prediction_method = 'mean'
#prediction_method = 'least_squares'
prediction_method = 'polyfit'

# whether we consider the current point when doing a prediction
#keep = False
keep = True

boundary_from = '2018-05-29'
boundary_to = '2018-06-06'

#surprise calculation interval duration
atlas_interval_duration = timedelta(hours=1)
atlas_sliding_shift = timedelta(minutes=10)

#either 'stacked' or 'sliding'
atlas_mode = 'stacked'
#atlas_mode = 'sliding'


## Querying a Time Series

In [4]:
# TODO: get from DEV w/ specific time boundaries (fixed days)
df_dict = dataset.get_data_from_prod_interval_low_precision('2018-05-28','2018-05-30',"timer_request_by_business", )

# we get the first series
#df = df_dict.values()[0]

# field against which we run Atlas
field = 'count'
#field = 'mean'
#field = 'sum'

## Calculate surprise on TS

In [None]:
# output table definition
output_table = 'atlas'
output_table_test= 'Grubbs'
output_tags = {
    'prediction_method': prediction_method,
    'mode': atlas_mode,
    'interval_duration': str(atlas_interval_duration),
    'keep':keep
}
if atlas_mode == 'sliding':
    output_tags['sliding_shift'] = str(atlas_sliding_shift)
series_name = influx_query.construct_influx_series_name(output_table, output_tags)
series_name_test = influx_query.construct_influx_series_name(output_table_test, output_tags)

# print('key type:')
# pprint(type(series_name))
# pprint(series_name)

# print('series_name:')
# pprint(type(series_name))
# pprint(series_name)

# pprint(df)
# pprint(df[field])
    
dt_start = df.index[0]
dt_finish = df.index[-1]

    
suprise_dict = {}
dt_i = dt_start
while dt_i < dt_finish:
    series = df[field].loc[dt_i:dt_i + atlas_interval_duration]
    #print(series)
    #print('from:')
    #pprint(dt_i)
    #print('to:')
    #pprint(dt_i + atlas_interval_duration)
    
    if atlas_mode == 'stacked':
        dt_i += atlas_interval_duration
    elif atlas_mode == 'sliding':
        dt_i += atlas_sliding_shift
    
    # convert series to vect
    vect=[]
    for i in range(0, len(series)):
        vect.append(series[i])
        #print(series[i])
    # TODO: test if vect.values() does not do the same
    #print("len du vect")
    #print(vect)
    if keep == False:
        if len(vect) >= 3 :
            #print(len(vect))
        # compute surprise for last element in vect
            if atlas_mode == 'stacked':
                suprise_dict[dt_i] = atlas.compute_largest_surprise(vect, prediction_method=prediction_method, keep=keep)
            elif atlas_mode == 'sliding':
                suprise_dict[dt_i] = atlas.compute_last_surprise(vect, prediction_method=prediction_method, keep=keep)
                #print(suprise_dict[dt_i])
        else:
            continue
            #print("empty vect")
    if keep == True:
        if len(vect) >= 2 :
            #print(len(vect))
        # compute surprise for last element in vect
            if atlas_mode == 'stacked':
                suprise_dict[dt_i] = atlas.compute_largest_surprise(vect, prediction_method=prediction_method, keep=keep)
            elif atlas_mode == 'sliding':
                suprise_dict[dt_i] = atlas.compute_last_surprise(vect, prediction_method=prediction_method, keep=keep)
        else:
            continue
            #print("empty vect")
    
# convert dict into ordered dict
        
    
suprise_dict = collections.OrderedDict(sorted(suprise_dict.items()))

# transform into df
df = pd.DataFrame(data=suprise_dict.values(), index=suprise_dict.keys(), columns=['surprise'])
df_dict = collections.defaultdict()



df_dict[series_name] = df

# save
dataset.insert_data_in_dev_mydb(df_dict)




#### Grubbs test
df = pd.DataFrame(data=stats_variation.compute_grubbs_test_relative(df_dict.values()[0]['surprise']), index=df_dict.values()[0]['surprise'].keys(), columns=['test'])
df_dict[series_name_test] = df
dataset.insert_data_in_dev_mydb(df_dict)

In [None]:
# config
grafana_host = 'localhost:8300' 
graph_key = 'atlas?orgId=1&panelId=2'

dt_from = date_manipulation.get_normalized_dt(boundary_from)
dt_to = date_manipulation.get_normalized_dt(boundary_to)

from_timestamp = date_manipulation.datetime_to_timestamp(dt_from, 'ms')
to_timestamp = date_manipulation.datetime_to_timestamp(dt_to, 'ms')

print(from_timestamp)
print(to_timestamp)

url = 'http://' + grafana_host + '/d-solo/nbMyMEMmk/' + graph_key + '&from=' + str(from_timestamp) + '&to=' + str(to_timestamp)
iframe = '<iframe src="' + url + '" width="700" height="400" frameborder="0"></iframe>'

IPython.display.HTML(iframe)

In [None]:
dict=atlas.compute_surprises_for_data(df_dict) 

In [None]:
atlas.insert_surprises_in_mydb(dict, prediction_method='mean', keep=False,
                               atlas_interval_duration=timedelta(hours=1), atlas_sliding_shift=timedelta(minutes=10),
                               atlas_mode='stacked')

In [None]:
val=atlas.compute_grubbs_for_surprises(dict)

In [None]:
atlas.insert_grubbs_in_mydb(val, prediction_method='mean', keep=False,atlas_interval_duration=timedelta(hours=1), atlas_sliding_shift=timedelta(minutes=10),atlas_mode='stacked')

In [5]:
atlas.atlas_test(df_dict, prediction_method='mean', keep=False,atlas_interval_duration=timedelta(hours=1), atlas_sliding_shift=timedelta(minutes=10),atlas_mode='sliding')

begin of calculating of surprise
OrderedDict([((u'timer_request_by_business', ((u'application', u''), (u'env', u''), (u'interface', u''), (u'method', u''), (u'metric_type', u'timing'), (u'mno', u''), (u'mvno', u''), (u'requestType', u''), (u'server', u''), (u'service', u''))), {Timestamp('2018-05-29 14:50:00+0000', tz='UTC'): 3.51, Timestamp('2018-05-29 04:50:00+0000', tz='UTC'): 2.4099999999999997, Timestamp('2018-05-28 04:20:00+0000', tz='UTC'): 1.01, Timestamp('2018-05-29 07:30:00+0000', tz='UTC'): 36.67666666666667, Timestamp('2018-05-29 18:30:00+0000', tz='UTC'): 18.343333333333334, Timestamp('2018-05-29 21:00:00+0000', tz='UTC'): 2.26, Timestamp('2018-05-29 12:40:00+0000', tz='UTC'): 1.51, Timestamp('2018-05-29 00:00:00+0000', tz='UTC'): 1.6766666666666665, Timestamp('2018-05-28 17:10:00+0000', tz='UTC'): 12.843333333333332, Timestamp('2018-05-28 08:30:00+0000', tz='UTC'): 8.176666666666668, Timestamp('2018-05-28 14:30:00+0000', tz='UTC'): 10.676666666666664, Timestamp('2018-05-2

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


finish of calculating of grubbs
begin insert of grubbs
finish insert of grubbs


In [None]:
import library.dataset as dataset
import library.stats.atlas as atlas
# iPython-specific
import IPython

# standard libs
import pandas as pd
from pprint import pprint
from datetime import datetime, timedelta
from influxdb import DataFrameClient
import collections
import numpy as np
import math
# custom libs
import library.date_manipulation as date_manipulation
import library.stats.atlas as atlas
import library.stats.variation as stats_variation
import library.influx.query as influx_query

# dataset import
import library.dataset as dataset
boundary_from = '2018-04-28'
boundary_to = '2018-05-30'
df_dict = dataset.get_data_from_prod_interval_low_precision(boundary_from,boundary_to,"timer_request_by_business", )
#df_dict=date_manipulation.remove_boundry(boundary_from,boundary_to,df_dict,10,"min")
atlas.atlas_test(df_dict, prediction_method='polyfit', keep=False,atlas_interval_duration=timedelta(hours=1), atlas_sliding_shift=timedelta(minutes=10),atlas_mode='sliding')