In this Jupyter Notebook file, we are trying to extract features (month/day/hour/min/sec) from the unix timestamps to do supervised machine training to predict the uptime = ((withdraw time) - (announce time)).

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score #https://scikit-learn.org/stable/modules/classes.html#regression-metrics
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS
from plotly.graph_objects import Figure
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pickle
import plotly.express as px
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt

In [3]:
# datetime.utcfromtimestamp(start)
class record_new:
    def __init__(self, init_state, time):
        self.last_state = init_state
        self.timestamps = []
        self.timestamp = float(time)
    def update(self, state, time):
        if state == 'W' and (self.last_state == 'B' or self.last_state == 'A'):
            self.last_state = 'W'
            self.timestamps.append([self.timestamp, float(time)])
        elif state == 'A' and self.last_state == 'W':
            self.last_state = 'A'
            self.timestamp = float(time)

In [4]:
with open('save.pickle', 'rb') as f:
    data = pickle.load(f)

In [5]:
data = sorted(data.items(), key = lambda item: len(item[1].timestamps, reverse = True))

In [9]:
sample = data[0:90000:500]

In [11]:
with open('multi_sample.pickle', 'wb') as f:
    pickle.dump(sample, f)

In [30]:
with open('multi_sample.pickle', 'rb') as f:
    data = pickle.load(f)

In [12]:
# create a feature preparation pipeline for a model
def get_pipeline(model):
    pipeline = make_pipeline(
        RobustScaler(),
        MinMaxScaler(),
        model
    )
    return pipeline
# prepare a list of ml models
def get_models(models=dict()):
    # linear models
    models['lr'] = LinearRegression()
    models['lasso'] = Lasso()
    models['ridge'] = Ridge()
    models['en'] = ElasticNet()
    models['huber'] = HuberRegressor(max_iter = 10000)
    models['lars'] = Lars()
    models['llars'] = LassoLars()
    models['pa'] = PassiveAggressiveRegressor(max_iter=10000, tol=1e-3)
    models['ranscac'] = RANSACRegressor()
    models['sgd'] = SGDRegressor(max_iter=10000, tol=1e-3)
    models['MLPR'] = MLPRegressor(max_iter = 100000)
    models['DBSCAN'] = DBSCAN(min_samples = 20, n_jobs = -1, eps=0.15)
    models['forest'] = IsolationForest(n_jobs = -1, max_samples = 1.0)
    models['LOF'] = LocalOutlierFactor(n_jobs = -1)
    models['OPTICS'] = OPTICS(n_jobs = -1, min_samples=20, cluster_method='dbscan', eps=0.15)
    return models
# convert history into inputs and outputs
def to_supervised(history, n_input):
	# convert history to a univariate series
	X, y = list(), list()
	ix_start = 0
	# step over the entire history one time step at a time
	for i in range(len(history)):
		# define the end of the input sequence
		ix_end = ix_start + n_input
		# ensure we have enough data for this instance
		if ix_end < len(history):
			X.append(history[ix_start:ix_end])
			y.append(history[ix_end])
		# move along one time step
		ix_start += 1
	return np.asarray(X), np.asarray(y)

In [None]:
input_data = np.asarray(data[4][1].timestamps)
month = np.zeros((len(input_data),4))
for i in range(input_data):
    date_obj = input_data[i,0]
X_train, X_test, y_train, y_test = train_test_split(X, y)
models = get_models()
pipeline = get_pipeline(models['lr'])
pipeline.fit(X_train, y_train)
y_hat = pipeline.predict(X_test)
print('Accuracy score:', r2_score(y_pred=y_hat, y_true = y_test))

In [36]:
input_data = np.asarray(data[1][1].timestamps)
input_data = input_data.astype(float)

In [41]:
hours = np.zeros((input_data.shape[0],1))
days = np.zeros((input_data.shape[0],1))
seconds = np.zeros((input_data.shape[0],1))

In [48]:
for i in range(month.shape[0]):
    date_obj = datetime.utcfromtimestamp(input_data[i,0])
    hours[i] = date_obj.hour
    days[i] = date_obj.day
    seconds[i] = date_obj.second

In [49]:
uptime = input_data[:,1] - input_data[:,0]