In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score #https://scikit-learn.org/stable/modules/classes.html#regression-metrics
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS
from plotly.graph_objects import Figure
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pickle
import plotly.express as px
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
class record:
    def __init__(self, init_state, time):
        self.last_state = init_state
        self.times = []
        self.timestamp = float(time)
        self.counter = 0
    def update(self, state, time):
        if state == 'W' and (self.last_state == 'B' or self.last_state == 'A'):
            self.last_state = 'W'
            self.times.append(float(time) - self.timestamp)
            self.timestamp = float(time)
            self.counter += 1
        elif state == 'A' and self.last_state == 'W':
            self.last_state = 'A'
            self.timestamp = float(time)

In [3]:
with open('save_sorted.pickle', 'rb') as f:
    data = pickle.load(f)
data = data[1:]

In [38]:
# create a feature preparation pipeline for a model
def get_pipeline(model):
    pipeline = make_pipeline(
        RobustScaler(),
        MinMaxScaler(),
        model
    )
    return pipeline
# prepare a list of ml models
def get_models(models=dict()):
    # linear models
    models['lr'] = LinearRegression()
    models['lasso'] = Lasso()
    models['ridge'] = Ridge()
    models['en'] = ElasticNet()
    models['huber'] = HuberRegressor(max_iter = 10000)
    models['lars'] = Lars()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=10000, tol=1e-3)
    models['ranscac'] = RANSACRegressor()
    models['sgd'] = SGDRegressor(max_iter=10000, tol=1e-3)
    models['MLPR'] = MLPRegressor(max_iter = 100000)
    models['DBSCAN'] = DBSCAN(min_samples = 20, n_jobs = -1, eps=0.15)
    models['forest'] = IsolationForest(n_jobs = -1, max_samples = 1.0)
    models['LOF'] = LocalOutlierFactor(n_jobs = -1)
    models['OPTICS'] = OPTICS(n_jobs = -1, min_samples=20, cluster_method='dbscan', eps=0.15)
    return models

In [40]:
models = get_models()
counter = 0
for each_pair in data:
    df = pd.DataFrame({'Uptime':each_pair[1].times})
    input_data_reshaped = df['Uptime'].values.reshape(-1, 1)
    pipeline = make_pipeline(RobustScaler(), models['LOF'])
    df['Outlier'] = pipeline.fit_predict(input_data_reshaped)
    outliers = df[df['Outlier'] == -1]
    inliners = df[df['Outlier'] == 1].copy()
    input_data_reshaped = inliners['Uptime'].values.reshape(-1, 1)
    pipeline = get_pipeline(models['DBSCAN'])
    inliners['result'] = pipeline.fit_predict(input_data_reshaped)
    print('sample %d:'%counter, len(set(inliners['result'].values)))
    for i in set(inliners['result'].values):
        plt.scatter(x=inliners[inliners['result'] == i].index, y=inliners[inliners['result'] == i]['Uptime'])
    plt.savefig('./figs/%d.png'%counter, format = 'png')
    plt.clf()
    counter += 1

sample 0: 2
sample 1: 1
sample 2: 2
sample 3: 2
sample 4: 2
sample 5: 3
sample 6: 1
sample 7: 2
sample 8: 3
sample 9: 2
sample 10: 2
sample 11: 2
sample 12: 2
sample 13: 2
sample 14: 2
sample 15: 2
sample 16: 2
sample 17: 2
sample 18: 2
sample 19: 2
sample 20: 2
sample 21: 2
sample 22: 2
sample 23: 2
sample 24: 2
sample 25: 2
sample 26: 2
sample 27: 2
sample 28: 2
sample 29: 2
sample 30: 2
sample 31: 2
sample 32: 2
sample 33: 2
sample 34: 2
sample 35: 2
sample 36: 2
sample 37: 2
sample 38: 2
sample 39: 2
sample 40: 2
sample 41: 2
sample 42: 1
sample 43: 1
sample 44: 2
sample 45: 2
sample 46: 2
sample 47: 2
sample 48: 2
sample 49: 2
sample 50: 2
sample 51: 2
sample 52: 2
sample 53: 2
sample 54: 2
sample 55: 1
sample 56: 2
sample 57: 2
sample 58: 2
sample 59: 2
sample 60: 2
sample 61: 3
sample 62: 2
sample 63: 2
sample 64: 4
sample 65: 2
sample 66: 2
sample 67: 2
sample 68: 2
sample 69: 2
sample 70: 2
sample 71: 2
sample 72: 2
sample 73: 2
sample 74: 2
sample 75: 2
sample 76: 2
sample 77

<Figure size 432x288 with 0 Axes>

In [9]:
print(data[0][1].counter)

4757
