#### Imports

In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa import stattools
from statsmodels.graphics import correlation
from context_printer import ContextPrinter as Ctp
from sklearn.decomposition import PCA

#### Local Imports

In [2]:
sys.path.insert(0,'src/')
from data import read_all_data, all_devices
from supervised_data import get_dataset

### Counting the number of samples from each class for each device

In [3]:
data = read_all_data()

[93m[1mReading data[0m
[93m█ [0m[1/9] Data from Danmini_Doorbell[0m
[93m█ [0m[2/9] Data from Ecobee_Thermostat[0m
[93m█ [0m[3/9] Data from Ennio_Doorbell[0m
[93m█ [0m[4/9] Data from Philips_B120N10_Baby_Monitor[0m
[93m█ [0m[5/9] Data from Provision_PT_737E_Security_Camera[0m
[93m█ [0m[6/9] Data from Provision_PT_838_Security_Camera[0m
[93m█ [0m[7/9] Data from Samsung_SNH_1011_N_Webcam[0m
[93m█ [0m[8/9] Data from SimpleHome_XCS7_1002_WHT_Security_Camera[0m
[93m█ [0m[9/9] Data from SimpleHome_XCS7_1003_WHT_Security_Camera[0m


In [16]:
def number_str(number: int):
    return  "\\numprint" + "{" + "{}".format(number) + "}"

In [25]:
def count_class_samples(device_id: int):
    binary_dataset = get_dataset(data[device_id:device_id+1], multiclass=False)
    binary_labels = binary_dataset[:][1].numpy()
    features = binary_dataset[:][0].numpy()

    unique, counts = np.unique(binary_labels, return_counts=True)
    percentage_benign = counts[0] * 100 / (counts[0] + counts[1])
    percentage_attack = counts[1] * 100 / (counts[0] + counts[1])
    print("Number of benign instances: " + number_str(counts[0]) + " ({:.2f}\\%)".format(percentage_benign))
    print("Number of attack instances: " + number_str(counts[1]) + " ({:.2f}\\%)".format(percentage_attack))
    print("Total number of instances: " + number_str(counts[0] + counts[1]))
    return counts[0], counts[1]

In [28]:
min_benign = 1e9
min_attack = 1e9
sum_benign = 0
sum_attack = 0

for device_id in range(9):
    print('Device {}'.format(device_id + 1) + ': ' + all_devices[device_id])
    benign, attack = count_class_samples(device_id)
    min_benign = min(min_benign, benign)
    min_attack = min(min_attack, attack)
    sum_benign += benign
    sum_attack += attack
    print()
    
print("Minimum number of benign samples: " + number_str(min_benign))
print("Minimum number of attack samples: " + number_str(min_attack))

percentage_benign = 100 * sum_benign / (sum_benign + sum_attack)
percentage_attack = 100 * sum_attack / (sum_benign + sum_attack)

print("Total number of benign samples: " + number_str(sum_benign) + " ({:.2f}\\%)".format(percentage_benign))
print("Total number of attack samples: " + number_str(sum_attack) + " ({:.2f}\\%)".format(percentage_attack))
print("Total number of samples: " + number_str(sum_benign + sum_attack))

Device 1: Danmini_Doorbell
Number of benign instances: \numprint{49548} (4.87\%)
Number of attack instances: \numprint{968750} (95.13\%)
Total number of instances: \numprint{1018298}

Device 2: Ecobee_Thermostat
Number of benign instances: \numprint{13113} (1.57\%)
Number of attack instances: \numprint{822763} (98.43\%)
Total number of instances: \numprint{835876}

Device 3: Ennio_Doorbell
Number of benign instances: \numprint{39100} (11.00\%)
Number of attack instances: \numprint{316400} (89.00\%)
Total number of instances: \numprint{355500}

Device 4: Philips_B120N10_Baby_Monitor
Number of benign instances: \numprint{175240} (15.95\%)
Number of attack instances: \numprint{923437} (84.05\%)
Total number of instances: \numprint{1098677}

Device 5: Provision_PT_737E_Security_Camera
Number of benign instances: \numprint{62154} (7.50\%)
Number of attack instances: \numprint{766106} (92.50\%)
Total number of instances: \numprint{828260}

Device 6: Provision_PT_838_Security_Camera
Number of

End.