In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
from load_data import *
from flow_catlog import *
import flow_manager as fm
import autoencoder as ae
import classifier as clfr
from sampler import *
from utl import *
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from tensorflow.compat.v1 import logging
logging.set_verbosity(logging.ERROR)

2021-08-31 09:48:20.034548: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-08-31 09:48:20.034577: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
DTYPE=tf.float32
NP_DTYPE=np.float32
num_bijectors = 6
steps = 2000
lr = 1e-3
datafile = 'machine-1-1.csv'

In [None]:
X_data = pd.read_csv(datafile, header=None)

In [None]:
X_data = X_data.values.astype(np.float32)
X_data = MinMaxScaler().fit_transform(X_data)

In [None]:
# Plot joint distribution
plt.scatter(X_data[:, 0], X_data[:, 1], s=10, color='red')

In [None]:
# sample from the base distribution

input_dims = X_data.shape[1]
base_dist = tfd.MultivariateNormalDiag(loc=tf.zeros([input_dims], tf.float32))
activation = tf.nn.relu
flow = IAF(base_dist, num_bijectors, hidden_units=[256, 256], ndims=input_dims, activation=activation)

# Training an IAF Flow

In [None]:
losses = fm.train_dist_routine(X_data, flow, learning_rate=lr, steps=steps)
plt.plot(losses)

In [None]:
# Make samples
names, samples = make_samples(base_dist, flow, n_samples=X_data.shape[0])
visualize_training_data(X_data, samples)

In [None]:
plot_contour_prob([flow], scale_fig=6)

In [None]:
# TODO: how to determin the threshold?
anomalies = detect_anomalies(data=X_data, model=flow, threshold=0.01)
fig, axes = plt.subplots(2, figsize=(30,10))
x = np.arange(len(X_data[:, 0]))
axes[0].plot(x, X_data[:, 0], color='red', zorder=0)
axes[0].scatter(anomalies, X_data[anomalies, 0], s=10, color='blue', zorder=1)

axes[1].plot(x, X_data[:, 1], color='red', zorder=0)
axes[1].scatter(anomalies, X_data[anomalies, 1], color='blue', s=10, zorder=1)

plt.show()

# Data Distribution Before/After Training

In [None]:
temp = flow.bijector.inverse(X_data).numpy()

## Before

In [None]:
fig, axes = plt.subplots(2)
axes[0].hist(temp[:, 0], bins=100, density=True)
axes[1].hist(temp[:, 1], bins=100, density=True)
plt.show()

## After

In [None]:
fig, axes = plt.subplots(2)
axes[0].hist(X_data[:, 0], bins=100, density=True)
axes[1].hist(X_data[:, 1], bins=100, density=True)
plt.show()

# Generate Data for the Classifier

In [None]:
factor = 2
anomalies = sample_anomalies(flow, factor=factor, n_samples=3000)
normals = sample_normals(flow, factor=factor, n_samples=10000)

In [None]:
anomalies = pd.DataFrame(anomalies)
normals = pd.DataFrame(normals)
anomalies.columns = ['v1', 'v2']
normals.columns = ['v1', 'v2']
anomalies['label'] = 1
normals['label'] = 0

In [None]:
clr_data = pd.concat([anomalies, normals], axis=0)
clr_data = clr_data.dropna()
clr_data = shuffle(clr_data)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(clr_data[['v1', 'v2']], clr_data['label'], test_size=0.33, random_state=42)
clf, score = clfr.RFClassifier(X_train, y_train, X_test, y_test)
print('precision: {}'.format(score[0]))
print('recall: {}'.format(score[1]))
print('fscore: {}'.format(score[2]))
print('support: {}'.format(score[3]))

# Apply Classifier on Real Data

In [None]:
y_data = clf.predict(X_data)
y_data_anomalies = np.where(y_data == 1)

In [None]:
fig, axes = plt.subplots(2, figsize=(30,10))
x = np.arange(len(X_data[:, 0]))
axes[0].plot(x, X_data[:, 0], color='red', zorder=0)
axes[0].scatter(y_data_anomalies, X_data[y_data_anomalies, 0], s=10, color='blue', zorder=1)

axes[1].plot(x, X_data[:, 1], color='red', zorder=0)
axes[1].scatter(y_data_anomalies, X_data[y_data_anomalies, 1], color='blue', s=10, zorder=1)

plt.show()