# Imports


In [1]:
import sys
import logging
import pandas as pd
import dill as pickle

import numpy as np
from src.outlier_model import OutlierModel
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
    sys.path.insert(1, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

In [2]:
# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

logging.basicConfig(format='%(asctime)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger("TimeSeries")
logger.setLevel(logging.INFO)

# Dataset

In [3]:
base_path = '#datasets'
output_dir = 'hydraulic_output'


files = ['Sim_Standard', 'Sim_Signal']
outlier_keys = ["force:1", "pA:1"]


In [4]:
data_df_final = pd.DataFrame()
for file in files:
    data_df = pd.read_csv(f'{base_path}/crane-sim/{file}.csv',index_col=False)
    data_df['type'] = file
    data_df_final = pd.concat([data_df_final, data_df], ignore_index=True)

data_df_final.head()

data = data_df_final.loc[data_df_final['type'] == files[1]]
data = data.reset_index()

In [5]:
height = 4
width = 6

m = 15
preload_size = 30
std_dev_mult = 2
range_mult = 2
models = {}
for outlier_key in outlier_keys:
    data_test = data.copy()

    outlier_model = OutlierModel(m=m,std_dev_mult=std_dev_mult,range_mult=range_mult,
                                 time_series=data_test[:preload_size][outlier_key].astype(np.float64),
                                 egress=True)
    fault = False
    for index, row in data_test[preload_size:].iterrows():
        outlier_model.train_one(row[outlier_key])
        fault = outlier_model.predict_one(index)
        if index % 10000 == 0:
            print(f"Current Global index: {index}")
    models[outlier_key] = outlier_model

04/21/2022 03:08:31 PM:  Anomaly at Global index: 45
04/21/2022 03:08:31 PM: max_mp: 110.3887, metric:76.68690000000001: metric-max_mp: 33.70179999999999 range: 110.2186
04/21/2022 03:08:31 PM:  Anomaly at Global index: 112
04/21/2022 03:08:31 PM: max_mp: 16938.9959, metric:16769.4947: metric-max_mp: 169.50120000000243 range: 678.6430999999975
04/21/2022 03:08:31 PM:  Anomaly at Global index: 172
04/21/2022 03:08:31 PM: max_mp: 5365.5743, metric:4697.4591: metric-max_mp: 668.1152000000002 range: 3203.5456000000004
04/21/2022 03:08:31 PM:  Anomaly at Global index: 244
04/21/2022 03:08:31 PM: max_mp: 5956.6836, metric:5728.6473000000005: metric-max_mp: 228.03629999999976 range: 2003.353
04/21/2022 03:08:31 PM:  Anomaly at Global index: 304
04/21/2022 03:08:31 PM: max_mp: 10382.3909, metric:9533.137999999999: metric-max_mp: 849.2529000000013 range: 4010.2956000000004
04/21/2022 03:08:31 PM:  Anomaly at Global index: 45
04/21/2022 03:08:31 PM: max_mp: 39094.3979, metric:27155.7054: metric-

In [6]:
with open("../masters-thesis-graphing/_data/crane-sim/models.pkl", 'wb') as handle:
    pickle.dump(models, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open("../masters-thesis-graphing/_data/crane-sim/data.pkl", 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)