# anomaly_score_unsupervised

Send arbitrary time-series data to the component and train an unsupervised LSTM-Autoencoder model. The moment unseen patters occur the anomaly score rises.

Future work:

- reset / rollback model (for regular flushing or after a real anomaly (true positive) occurred)
- add check-pointing for service persistence and rollback

In [1]:
import os

#os.environ['create_image']='True'
#os.environ['repository']='romeokienzler'
#os.environ['version']='0.1'
#
os.environ['install_requirements']='True'

In [9]:
if bool(os.environ.get('create_image',False)):
    docker_file="""
    FROM registry.access.redhat.com/ubi8/python-39
    RUN pip install ipython nbformat numpy ibm-cos-sdk-core ibm-cos-sdk ibm-watson-machine-learning ibm-watson-studio-pipelines ibmcloudsql pyyaml
    ADD ibm-sql-query-cpd.py .
    ADD start.sh .

    """
    with open("Dockerfile", "w") as text_file:
        text_file.write(docker_file)

    start_file="""
    #!/bin/bash
    echo "Parameter 1: $1"
    echo "Parameter 2: $2"
    echo "Parameter 3: $3"
    echo "Parameter 4: $4"
    echo "Parameter 5: $5"
    echo "Parameter 6: $6"
    echo "Parameter 7: $7"
    echo "Parameter 8: $8"
    echo "Parameter 9: $9"
    echo "Parameter 10: ${10}"
    echo "Parameter 11: ${11}"
    echo "Parameter 12: ${12}"
    echo "Parameter 13: ${13}"
    echo "Parameter 14: ${14}"
    echo "Parameter 15: ${15}"
    echo "Parameter 16: ${16}"
    echo "Parameter 17: ${17}"
    echo "Parameter 18: ${18}"
    echo "Parameter 19: ${19}"
    echo "Parameter 20: ${20}"
    python /opt/app-root/src/ibm-sql-query-cpd.py "$1$2" "$3$4" "$5$6" "$7$8" "$9${10}" "${11}${12}" "${13}${14}" "${15}${16}" "${17}${18}" "${19}${20}"
    """
    with open("start.sh", "w") as text_file:
        text_file.write(start_file)

    !chmod 755 start.sh
    !jupyter nbconvert --to script ibm-sql-query-cpd.ipynb    
    !docker build -t ibm_sql_query_cpd:`echo $version` .
    !docker tag ibm_sql_query_cpd:`echo $version` `echo $repository`/ibm_sql_query_cpd:`echo $version`
    !docker push `echo $repository`/ibm_sql_query_cpd:`echo $version`
    !rm Dockerfile
    !rm ibm-sql-query-cpd.py
    !rm start.sh
elif bool(os.environ.get('install_requirements',False)):
    !pip install tensorflow==2.9.1 numpy==1.23.2 scikit-learn==1.1.2  pandas==1.4.3

Collecting numpy==1.23.2
  Using cached numpy-1.23.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Collecting scikit-learn==1.1.2
  Using cached scikit_learn-1.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.8 MB)
Collecting scipy>=1.3.2
  Downloading scipy-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.9 MB)
     |████████████████████████████████| 43.9 MB 72.3 MB/s            2/s eta 0:00:01
[?25hCollecting joblib>=1.0.0
  Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)
     |████████████████████████████████| 306 kB 54.1 MB/s            
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: numpy, threadpoolctl, scipy, joblib, scikit-learn
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.1
    Uninstalling numpy-1.23.1:
      Successfully uninstalled numpy-1.23.1
Successfully installed joblib-1.1.0 numpy-1.23.2 scikit-lear

In [15]:
import numpy as np
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Activation

import pickle

In [None]:
# COS URL where the results of the SQL job are to be stored
target_dir_path = os.environ.get('target_dir_path')

# Asset name to register for the results written by the SQL job
target_asset_name = os.environ.get('target_asset_name')

# sql statement to execute
sql = os.environ.get('sql')

# (unique) Custom Resource Name (CRN) of IBM SQL Query Service
data_engine_crn = os.environ.get('data_engine_crn')

# default: CSV - (will be generated into according STORED AS … clause in the INTO clause)
format = os.environ.get('format' , 'CSV')

# optional, list of columns to use for partitioning the results of the SQL job, will be generated into according PARTITIONED BY (<columns>) clause in the INTO clause)
partition_columns = os.environ.get('partition_columns','')

# optional, number of objects to store the results of the SQL job in, will be generated into according PARTITIONED INTO <num> OBJECTS clause in INTO clause
number_of_objects = int(os.environ.get('number_of_objects', 0))

# optional, number of rows to be stored in each result object of the SQL job, will be generated into according PARTITIONED EVERY <num> ROWS clause in INTO clause
rows_per_object = int(os.environ.get('rows_per_object', 0))

# default: False, only valid when none of the above partitioning option is specified, produces exactly one object with name specified in target_dir_path, twill be generated into sqlClient.rename_exact_result(jobid) after SQL has run.
exact_name = os.environ.get('exact_name', 'False')

# default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)
no_jobid_folder = os.environ.get('no_jobid_folder', 'False')

# default: output.txt - output file name containing the CPD path of the resulting asset
data_asset = os.environ.get('data_asset','output.txt')


In [None]:
for element in sys.argv:
    logging.warning('argv raw ' +  element)

parameters = list(
    map(lambda s: re.sub('$', '"', s),
        map(
            lambda s: s.replace('=', '="'),
            filter(
                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\/A-Za-z0-9]*', s)),
                sys.argv
            )
    )))


for parameter in parameters:
    exec(parameter)
    logging.warning('Parameter: ' + parameter)

exact_name = ast.literal_eval(exact_name.capitalize())
no_jobid_folder = ast.literal_eval(no_jobid_folder.capitalize())
number_of_objects = number_of_objects if type(number_of_objects)==int else int(number_of_objects) if len(number_of_objects)>0 else 0
rows_per_object = rows_per_object if type(rows_per_object)==int else int(rows_per_object) if len(rows_per_object)>0 else 0


for parameter in parameters:
    exec("logging.warning('final parameter: ' + str({}))".format(parameter.split('=')[0]))
    exec("logging.warning('final parameter type: ' + str(type({})))".format(parameter.split('=')[0]))

In [13]:
!rm watsoniotp.*
!wget https://raw.githubusercontent.com/romeokienzler/developerWorks/master/lorenzattractor/watsoniotp.healthy.phase_aligned.pickle
!wget https://raw.githubusercontent.com/romeokienzler/developerWorks/master/lorenzattractor/watsoniotp.broken.phase_aligned.pickle


rm: cannot remove 'watsoniotp.*': No such file or directory
--2022-08-19 07:28:12--  https://raw.githubusercontent.com/romeokienzler/developerWorks/master/lorenzattractor/watsoniotp.healthy.phase_aligned.pickle
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 194639 (190K) [text/plain]
Saving to: ‘watsoniotp.healthy.phase_aligned.pickle’


2022-08-19 07:28:12 (37.4 MB/s) - ‘watsoniotp.healthy.phase_aligned.pickle’ saved [194639/194639]

--2022-08-19 07:28:13--  https://raw.githubusercontent.com/romeokienzler/developerWorks/master/lorenzattractor/watsoniotp.broken.phase_aligned.pickle
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubuserc

In [31]:
with open('watsoniotp.healthy.phase_aligned.pickle','rb') as file_object:
    raw_data = file_object.read()
    data_healthy = pickle.loads(raw_data, encoding='latin1')

with open('watsoniotp.broken.phase_aligned.pickle','rb') as file_object:
    raw_data = file_object.read()
    data_broken = pickle.loads(raw_data, encoding='latin1')

In [32]:
data_healthy = data_healthy.reshape(3000,3)
data_broken = data_broken.reshape(3000,3)


In [33]:
def scaleData(data):
    # normalize features
    scaler = MinMaxScaler(feature_range=(0, 1))
    return scaler.fit_transform(data)


In [35]:
data_healthy_scaled = scaleData(data_healthy)
data_broken_scaled = scaleData(data_broken)


In [40]:
timesteps = 10
dim = 3
samples = 3000
data_healthy_scaled_reshaped = data_healthy_scaled
#reshape to (300,10,3)
data_healthy_scaled_reshaped.shape = (int(samples/timesteps),timesteps,dim)


In [42]:
losses = []

def handleLoss(loss):
        global losses
        losses+=[loss]
        print(loss)

class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        handleLoss(logs.get('loss'))


In [43]:
# design network

model = Sequential()
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(LSTM(50,input_shape=(timesteps,dim),return_sequences=True))
model.add(Dense(3))
model.compile(loss='mae', optimizer='adam')

def train(data):
    data.shape = (300, 10, 3)
    model.fit(data, data, epochs=50, batch_size=72, validation_data=(data, data), verbose=0, shuffle=False,callbacks=[LossHistory()])
    data.shape = (3000, 3)

def score(data):
    data.shape = (300, 10, 3)
    yhat =  model.predict(data)
    yhat.shape = (3000, 3)
    return yhat


2022-08-19 07:48:51.611702: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-08-19 07:48:51.611823: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-08-19 07:48:51.611862: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (elyra): /proc/driver/nvidia/version does not exist
2022-08-19 07:48:51.616415: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
for i in range(1):

    print("----------------")
    train(data_healthy_scaled)
    yhat_healthy = score(data_healthy_scaled)
    yhat_broken = score(data_broken_scaled)
    data_healthy_scaled.shape = (3000, 3)
    data_broken_scaled.shape = (3000, 3)

print("----------------broken")
train(data_broken_scaled)
yhat_healthy = score(data_healthy_scaled)
yhat_broken = score(data_broken_scaled)
data_healthy_scaled.shape = (3000, 3)
data_broken_scaled.shape = (3000, 3)


----------------
0.38889142870903015
0.42711207270622253
0.444160521030426
0.4445486068725586
0.44750458002090454
0.347119003534317
0.3821924924850464
0.39599481225013733
0.3923739790916443
0.39436835050582886
0.2589453458786011
0.2920001447200775
0.30200913548469543
0.2988244295120239
0.30028802156448364
0.2626585364341736
0.2849235534667969
0.29126226902008057
0.290312260389328
0.2908632159233093
0.224995955824852
0.24460551142692566
0.2526387870311737
0.253427654504776
0.2551236152648926
0.20142149925231934
0.22818347811698914
0.23976394534111023
0.2416950911283493
0.24323312938213348
0.20171914994716644
0.222492054104805
0.23051127791404724
0.23217719793319702
0.23300279676914215
0.20880062878131866
0.22260898351669312
0.2272585779428482
0.22873860597610474
0.22916366159915924
0.20833897590637207
0.2194531112909317
0.22285868227481842
0.223546102643013
0.22400808334350586
0.19624044001102448
0.20971210300922394
0.21469222009181976
0.21557730436325073
0.2163671851158142
0.1860225647