# Metrics Fetcher

Data fetched using the metrics fetcher application is stored in an SQLite database. This notebook will help analyze the content of this database.


Firstly, we will import dependencies and set constants for this analysis:


In [896]:
import sqlite3
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split


db_path = "storage.db"
conn = sqlite3.connect(db_path)

Next would be to extract data from the database:

In [897]:
# region SQL queries to extract data
cpuMetrics = pd.read_sql_query("SELECT * FROM CPUMetricProvider WHERE 1=1 ORDER BY id", conn)
processMetrics = pd.read_sql_query("SELECT * FROM ProcessMetricProvider WHERE 1=1 ORDER BY id", conn)
memoryMetrics = pd.read_sql_query("SELECT * FROM RAMMetricProvider WHERE 1=1 ORDER BY id", conn)
storageMetrics = pd.read_sql_query("SELECT * FROM StorageMetricProvider WHERE 1=1 ORDER BY id", conn)
# networkMetrics = pd.read_sql_query("SELECT * FROM NetworkMetricProvider WHERE 1=1 ORDER BY id", conn)
scriptMetrics = pd.read_sql_query("SELECT * FROM ScriptData WHERE 1=1 ORDER BY id", conn)
# endregion

cpuMetrics.drop(columns="name", inplace=True)
cpuMetrics['timestamp'] = (cpuMetrics['timestamp'] // 100) * 100
cpuMetrics['counterU'] = cpuMetrics['timestamp'].astype(str) + cpuMetrics['counter'].astype(str)
cpuMetrics['counterU'] = cpuMetrics['counterU'].astype("int64")
# cpuMetrics = cpuMetrics.groupby(['counterU'], as_index=False).mean()
cpuMetrics.head()

Unnamed: 0,id,counter,usage,instructionsRetired,cycles,floatingPointOperations,temperature,timestamp,counterU
0,1,0,30.9732,0.0,0.0,0.0,0.0,1704935000,17049350000
1,2,1,5.12935,0.0,0.0,0.0,0.0,1704935000,17049350001
2,3,2,4.46969,0.0,0.0,0.0,0.0,1704935000,17049350002
3,4,3,5.77369,0.0,0.0,0.0,0.0,1704935000,17049350003
4,5,4,24.021,0.0,0.0,0.0,0.0,1704935000,17049350004


In [898]:
processMetrics.drop(columns="name", inplace=True)
processMetrics['timestamp'] = (processMetrics['timestamp'] // 100) * 100
processMetrics['counterU'] = processMetrics['timestamp'].astype(str) + processMetrics['counter'].astype(str)
processMetrics['counterU'] = processMetrics['counterU'].astype("int64")
processMetrics.head()

Unnamed: 0,id,counter,processCount,activeProcess,activeWindow,bytesReadPerSecond,bytesWrittenPerSecond,timestamp,counterU
0,1,0,236.0,WindowsTerminal.exe,ICE,533873.0,2431480.0,1704935000,17049350000
1,2,2,235.0,WindowsTerminal.exe,ICE,425455.0,185232.0,1704935000,17049350002
2,3,3,235.0,WindowsTerminal.exe,ICE,1143460.0,150020.0,1704935000,17049350003
3,4,4,234.0,WindowsTerminal.exe,ICE,157642.0,216894.0,1704935000,17049350004
4,5,5,237.0,chrome.exe,My Drive - Google Drive - Google Chrome,2422430.0,2428540.0,1704935000,17049350005


In [899]:
memoryMetrics.drop(columns="name", inplace=True)
memoryMetrics['timestamp'] = (memoryMetrics['timestamp'] // 100) * 100
memoryMetrics['counterU'] = memoryMetrics['timestamp'].astype(str) + memoryMetrics['counter'].astype(str)
memoryMetrics['counterU'] = memoryMetrics['counterU'].astype("int64")
memoryMetrics = memoryMetrics.groupby(['counter'], as_index=False).mean()
memoryMetrics.head()

Unnamed: 0,counter,id,available,committed,pageFaults,timestamp,counterU
0,1,1.5,10428700000.0,6614860000.0,20839.445,1704935000.0,17049350000.0
1,2,3.0,10365500000.0,6619230000.0,2705.25,1704935000.0,17049350000.0
2,3,4.0,10439500000.0,6535710000.0,2359.37,1704935000.0,17049350000.0
3,5,5.0,10164100000.0,6902400000.0,28631.7,1704935000.0,17049350000.0
4,6,6.0,9644950000.0,7780360000.0,58525.7,1704935000.0,17049350000.0


In [900]:
storageMetrics.drop(columns="name", inplace=True)
storageMetrics['timestamp'] = (storageMetrics['timestamp'] // 100) * 100
storageMetrics['counterU'] = storageMetrics['timestamp'].astype(str) + storageMetrics['counter'].astype(str)
storageMetrics['counterU'] = storageMetrics['counterU'].astype('Int64')
storageMetrics = storageMetrics.groupby(['counter'], as_index=False).mean()
storageMetrics.head()

Unnamed: 0,counter,id,read,write,transferRate,timestamp,counterU
0,1,1.0,464097.0,14108600.0,14572600.0,1704935000.0,17049350001.0
1,2,2.0,9395.95,580609.0,590005.0,1704935000.0,17049350002.0
2,3,3.0,0.0,142101.0,142101.0,1704935000.0,17049350003.0
3,4,4.5,617607.6,1239224.0,1856832.0,1704935000.0,17049350004.0
4,6,6.5,755052.45,1714380.0,2469430.0,1704935000.0,17049351006.0


In [901]:
scriptMetrics['timestamp'] = (scriptMetrics['timestamp'] // 100) * 100
scriptMetrics['counterU'] = scriptMetrics['timestamp'].astype(str) + scriptMetrics['counter'].astype(str)
scriptMetrics = scriptMetrics.groupby(['key', 'counterU'], as_index=False).mean()
scriptMetrics['counterU'] = scriptMetrics['counterU'].astype('Int64')
scriptMetrics.head()

Unnamed: 0,key,counterU,id,counter,value,timestamp
0,ProcessorPerformance,1704938300329,1.0,329.0,115.051,1704938000.0
1,ProcessorPerformance,1704938300330,2.0,330.0,123.516,1704938000.0
2,ProcessorPerformance,1704938300331,3.0,331.0,90.827,1704938000.0
3,ProcessorPerformance,1704938300332,4.0,332.0,121.996,1704938000.0
4,ProcessorPerformance,1704938300333,5.0,333.0,75.5344,1704938000.0


In [902]:
scriptMetrics = scriptMetrics.pivot(index='counterU', columns='key', values='value').reset_index()
scriptMetrics.head()

key,counterU,ProcessorPerformance
0,1704938300329,115.051
1,1704938300330,123.516
2,1704938300331,90.827
3,1704938300332,121.996
4,1704938300333,75.5344


Now that we have all the tables we require, we will create labels for them.
The final goal is to create one single table with all the features we have seen,
so we will use a common prefix for the labeling.

In [903]:
# cpuMetrics, processMetrics, memoryMetrics, storageMetrics, scriptMetrics

# This indicates whether for each reading the CPU is in good state or not.
# We will use "1" and "0" to represent bad state and good state in labels.
cpuMetrics["t_cpu_label"] = 0
cpuMetrics["t_cpu_label"] = np.where((cpuMetrics["usage"] > 80), 1, cpuMetrics["t_cpu_label"]);

processMetrics["t_proc_label"] = 0;
processMetrics["t_proc_label"] = np.where((processMetrics["bytesReadPerSecond"] < 2000000), 1, processMetrics["t_proc_label"]);

memoryMetrics["t_mem_label"] = 0;
memoryMetrics["t_mem_label"] = np.where((memoryMetrics["pageFaults"] > 500000), 1, memoryMetrics["t_mem_label"]);

storageMetrics["t_store_label"] = 0;
storageMetrics["t_store_label"] = np.where((storageMetrics["transferRate"] < 9999999), 1, storageMetrics["t_store_label"]);

scriptMetrics["t_script_label"] = 0;
scriptMetrics["t_script_label"] = np.where((scriptMetrics["ProcessorPerformance"] < 9999999), 1, scriptMetrics["t_script_label"]);

# Merge the tables
cpuMetrics.drop(columns=["counter", "id", "timestamp"], inplace=True)
processMetrics.drop(columns=["counter", "id", "timestamp"], inplace=True)
memoryMetrics.drop(columns=["counter", "id", "timestamp"], inplace=True)
storageMetrics.drop(columns=[ "counter", "id", "timestamp"], inplace=True)

combined_data = pd.merge(cpuMetrics, processMetrics, on='counterU', how='inner')
combined_data = pd.merge(combined_data, memoryMetrics, on='counterU', how='inner')
combined_data = pd.merge(combined_data, storageMetrics, on='counterU', how='inner')
combined_data = pd.merge(combined_data, scriptMetrics, on='counterU', how='inner')
combined_data = combined_data.dropna()
combined_data.head()

Unnamed: 0,usage,instructionsRetired,cycles,floatingPointOperations,temperature,counterU,t_cpu_label,processCount,activeProcess,activeWindow,...,available,committed,pageFaults,t_mem_label,read,write,transferRate,t_store_label,ProcessorPerformance,t_script_label
0,9.58457,0.0,0.0,0.0,0.0,1704938300332,0,260.0,chrome.exe,Metrics Fetcher - Google Chrome,...,9455550000.0,8379360000.0,1108.5,0,11830.5,309633.0,321463.0,1,121.996,1
1,9.58457,0.0,0.0,0.0,0.0,1704938300332,0,260.0,chrome.exe,Metrics Fetcher - Google Chrome,...,9455550000.0,8379360000.0,1108.5,0,11830.5,309633.0,321463.0,1,121.996,1
2,2.36571,0.0,0.0,0.0,0.0,1704938300334,0,260.0,chrome.exe,Metrics Fetcher - Google Chrome,...,9480595000.0,8341630000.0,1230.8855,0,16766.5,165211.0,181978.0,1,116.932,1
3,3.27768,0.0,0.0,0.0,0.0,1704938400337,0,260.0,chrome.exe,Winter Lofi ❄ Lofi Keep You Safe 🍂 Calm Your M...,...,9456330000.0,8355590000.0,2623.225,0,1641.57,111627.0,113268.0,1,112.802,1
4,1.88729,0.0,0.0,0.0,0.0,1704938400339,0,260.0,chrome.exe,Winter Lofi ❄ Lofi Keep You Safe 🍂 Calm Your M...,...,9473650000.0,8338820000.0,469.175,0,10230.7,104762.0,114993.0,1,66.594,1


In [904]:
colummns_to_encode = ['activeProcess', 'activeWindow']
combined_data = pd.get_dummies(combined_data, columns=colummns_to_encode, prefix=colummns_to_encode, drop_first=True)
# combined_data['counterU'] = combined_data['counterU'].astype('int64')
combined_data.fillna(0, inplace=True)
combined_data.drop(['counterU'], axis=1, inplace=True)

Now that we have our combined table, we can proceed with the steps to create a model. But we need to encode the string values we have.
One-hot encoder from `Pandas` is preferred over a `LabelEncoder` because the data strings identified are nominal.

In [905]:
label_columns = [col for col in combined_data.columns if '_label' in col.lower()]
feature_columns = [col for col in combined_data.columns if col not in label_columns]

num_timesteps = 1
num_features = len(feature_columns)

X = combined_data[feature_columns]
X = X.values.reshape((X.shape[0], num_timesteps, X.shape[1]))
y = combined_data[label_columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = tf.constant(X_train, dtype=tf.float32)
X_test = tf.constant(X_test, dtype=tf.float32)
y_train = tf.constant(y_train, dtype=tf.int32)
y_test = tf.constant(y_test, dtype=tf.int32)

In [906]:
X_train

<tf.Tensor: shape=(3274, 1, 172), dtype=float32, numpy=
array([[[ 6.55935,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]],

       [[ 5.67742,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]],

       [[11.5573 ,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]],

       ...,

       [[ 5.04017,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]],

       [[ 8.09318,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]],

       [[ 8.59966,  0.     ,  0.     , ...,  0.     ,  0.     ,
          0.     ]]], dtype=float32)>

In [907]:
y_train

<tf.Tensor: shape=(3274, 5), dtype=int32, numpy=
array([[0, 1, 0, 1, 1],
       [0, 1, 0, 1, 1],
       [0, 1, 0, 1, 1],
       ...,
       [0, 1, 0, 1, 1],
       [0, 1, 0, 1, 1],
       [0, 1, 0, 1, 1]])>

In [908]:
# input_layer = tf.keras.layers.Input(shape=(num_timesteps, num_features))
input_layer = tf.keras.layers.Input(shape=(None, num_features))
lstm_layer = tf.keras.layers.LSTM(units=50, activation='tanh')(input_layer)

# Define separate output layers for each label
output_t_cpu = tf.keras.layers.Dense(1, activation='sigmoid', name='t_cpu_label')(lstm_layer)
output_t_proc = tf.keras.layers.Dense(1, activation='sigmoid', name='t_proc_label')(lstm_layer)
output_t_mem = tf.keras.layers.Dense(1, activation='sigmoid', name='t_mem_label')(lstm_layer)
output_t_store = tf.keras.layers.Dense(1, activation='sigmoid', name='t_store_label')(lstm_layer)
output_t_script = tf.keras.layers.Dense(1, activation='sigmoid', name='t_script_label')(lstm_layer)

model = tf.keras.models.Model(inputs=input_layer, outputs=[output_t_cpu, output_t_proc, output_t_mem, output_t_store, output_t_script])
# model.add(tf.keras.layers.Dense(32, activation='relu'))
# model.add(tf.keras.layers.Dense(16, activation='relu'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "model_13"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_14 (InputLayer)       [(None, None, 172)]          0         []                            
                                                                                                  
 lstm_24 (LSTM)              (None, 50)                   44600     ['input_14[0][0]']            
                                                                                                  
 t_cpu_label (Dense)         (None, 1)                    51        ['lstm_24[0][0]']             
                                                                                                  
 t_proc_label (Dense)        (None, 1)                    51        ['lstm_24[0][0]']             
                                                                                           

Now we train the model 🥷😍

In [909]:
%load_ext tensorboard
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/fit/", histogram_freq=1)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

model.fit(X_train, [y_train[:, 0], y_train[:, 1], y_train[:, 2], y_train[:, 3], y_train[:, 4]], epochs=1000, batch_size=32, validation_split=0.2, callbacks=[early_stopping, tensorboard_callback])

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Epoch 1/1000
 1/82 [..............................] - ETA: 6s - loss: 3.7173 - t_cpu_label_loss: 1.3200 - t_proc_label_loss: 0.4409 - t_mem_label_loss: 0.5644 - t_store_label_loss: 0.5296 - t_script_label_loss: 0.8624 - t_cpu_label_accuracy: 0.0000e+00 - t_proc_label_accuracy: 0.9062 - t_mem_label_accuracy: 0.7500 - t_store_label_accuracy: 0.9688 - t_script_label_accuracy: 0.0000e+00



Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000

Test the model accuracy

In [None]:
evaluation = model.evaluate(X_test, [y_test[:, i] for i in range(5)], verbose=2)
# predictions = model.predict(X_test)
# binary_predictions = tf.round(predictions).numpy()
# accuracy = accuracy_score(y_test, binary_predictions)

# accuracy
for name, value in zip(model.metrics_names, evaluation):
    print(name, ": ", value)

26/26 - 1s - loss: 0.4793 - t_cpu_label_loss: 0.1343 - t_proc_label_loss: 0.2374 - t_mem_label_loss: 2.8654e-05 - t_store_label_loss: 0.1076 - t_script_label_loss: 1.6019e-05 - t_cpu_label_accuracy: 0.9604 - t_proc_label_accuracy: 0.9332 - t_mem_label_accuracy: 1.0000 - t_store_label_accuracy: 0.9542 - t_script_label_accuracy: 1.0000 - 602ms/epoch - 23ms/step
loss :  0.4793359637260437
t_cpu_label_loss :  0.13429011404514313
t_proc_label_loss :  0.2374054342508316
t_mem_label_loss :  2.865408532670699e-05
t_store_label_loss :  0.10759570449590683
t_script_label_loss :  1.6019264876376837e-05
t_cpu_label_accuracy :  0.9603960514068604
t_proc_label_accuracy :  0.9331682920455933
t_mem_label_accuracy :  1.0
t_store_label_accuracy :  0.9542078971862793
t_script_label_accuracy :  1.0


In [None]:
# Save the model
model.save("trained.model", save_format="tf")

INFO:tensorflow:Assets written to: trained.model\assets


INFO:tensorflow:Assets written to: trained.model\assets


In [None]:
num_samples = 10
num_features = 172

example_data = np.random.rand(num_samples, num_features)

example_data_reshaped = example_data.reshape((num_samples, 1, num_features))

# Make predictions
predictions = model.predict(example_data_reshaped)

predictions





[array([[0.4785819 ],
        [0.44603148],
        [0.28424406],
        [0.53706396],
        [0.41176972],
        [0.38258395],
        [0.4183371 ],
        [0.25457302],
        [0.41240776],
        [0.36800995]], dtype=float32),
 array([[0.5383295 ],
        [0.60924715],
        [0.6695589 ],
        [0.557432  ],
        [0.6234349 ],
        [0.6724018 ],
        [0.61246085],
        [0.6707883 ],
        [0.5934156 ],
        [0.6233861 ]], dtype=float32),
 array([[0.19104344],
        [0.18166777],
        [0.15705737],
        [0.22233996],
        [0.26623252],
        [0.15941095],
        [0.1581104 ],
        [0.13894133],
        [0.23879799],
        [0.28085533]], dtype=float32),
 array([[0.41651618],
        [0.411501  ],
        [0.6029622 ],
        [0.454498  ],
        [0.5913472 ],
        [0.55606174],
        [0.4354481 ],
        [0.70667714],
        [0.5522105 ],
        [0.60464555]], dtype=float32),
 array([[0.8044146 ],
        [0.8377071 ],
        

In [None]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 36556), started 0:04:05 ago. (Use '!kill 36556' to kill it.)

In [None]:
conn.close()