Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

SPDX-License-Identifier: Apache-2.0

# Train and Predict Wifi Data Using GDN

This notebook contains the steps to
   1. Find/Load the preprocess data for training using GDN  
   2. Train the model  
   3. Model inference-- Visualize the trained graph and results  

# Loading the Preprocessed Data For Training


In [1]:
import sys 
sys.path.append('../../src/anomaly_detection_spatial_temporal_data/')

In [2]:
import pandas as pd
import numpy as np
import yaml
from model.GDN.GDNTrainer import GDNTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data_dir = "../../data/03_primary/wifi/gdn"

In [4]:
with open(f"{data_dir}/wifi_sensor_list.txt", "r") as f:
    sensors = f.read().split("\n")

In [5]:
print(f"Number of sensors: {len(sensors)}")

Number of sensors: 29


In [6]:
print(sensors)

['host_26', 'host_19', 'host_9', 'host_13', 'host_17', 'host_1', 'host_5', 'host_0', 'host_4', 'host_18', 'host_12', 'host_6', 'host_8', 'host_7', 'host_14', 'host_2', 'host_3', 'host_10', 'host_24', 'host_25', 'host_16', 'host_11', 'host_20', 'host_27', 'host_22', 'host_28', 'host_29', 'host_21', 'host_23']


In [7]:
train_df = pd.read_csv(f"{data_dir}/wifi_gdn_train.csv")
test_df = pd.read_csv(f"{data_dir}/wifi_gdn_test.csv")

print(train_df.shape)
print(test_df.shape)

(1200, 29)
(1200, 30)


In [8]:
model_config_file = "../../conf/base/parameters/gdn.yml"

In [9]:
with open(model_config_file, "r") as stream:
    try:
        model_config = yaml.safe_load(stream)
        print(model_config)
    except yaml.YAMLError as exc:
        print(exc)

{'env_config_iot': {'checkpoint_save_dir': 'data/07_model_output/iot/gdn', 'report': 'best', 'device': 'cpu', 'load_model_path': ''}, 'env_config_wifi': {'checkpoint_save_dir': 'data/07_model_output/wifi/gdn', 'report': 'best', 'device': 'cpu', 'load_model_path': ''}, 'train_config': {'seed': 5, 'batch': 32, 'slide_win': 5, 'dim': 64, 'out_layer_num': 1, 'slide_stride': 1, 'topk': 5, 'out_layer_inter_dim': 128, 'val_ratio': 0.2, 'decay': 0, 'epoch': 3, 'comment': ''}}


In [10]:
train_config = model_config["train_config"]
env_config = model_config["env_config_wifi"]

env_config["checkpoint_save_dir"] = "../../data/07_model_output/gdn-wifi-notebook"

## Train Model

In [11]:
trainer = GDNTrainer(
    sensors, train_df, test_df, 
    train_config, env_config
)

In [12]:
trainer.run()

epoch (0 / 3) (Loss:0.28815645, ACU_loss:8.64469342)
epoch (1 / 3) (Loss:0.10898511, ACU_loss:3.26955318)
epoch (2 / 3) (Loss:0.08161449, ACU_loss:2.44843475)


## Model Inference

In [13]:
pred, labels = trainer.predict()


In [14]:
pred

array([0., 0., 0., ..., 1., 1., 1.])

In [15]:
pred.shape


(1195,)

In [16]:
np.array(labels).shape


(1195,)

# References

Anisa Allahdadi and Ricardo Morla. 2017. 802.11 Wireless Access Point Usage Simulation and Anomaly Detection. CoRR abs/1707.02933, (2017). Retrieved from http://arxiv.org/abs/1707.02933 

Ailin Deng and Bryan Hooi. 2021. Graph Neural Network-Based Anomaly Detection in Multivariate Time Series. CoRR abs/2106.06947, (2021). Retrieved from https://arxiv.org/abs/2106.06947 