In [1]:
from botdet.data.dataset_botnet import BotnetDataset
from botdet.data.dataloader import GraphDataLoader
import time
import numpy as np
from dgl.data.utils import save_graphs, load_graphs
from torch.utils.data import DataLoader
from data_utils import BotNetWrapper
import dgl
import os
from tqdm import tqdm


BASE_DATA_DIR = "/p/adversarialml/as9rw/datasets/botnet"

Using backend: pytorch


In [None]:
# Custom dataloader to handle graphs our way

class GraphDataLoader(DataLoader):
    """
    Graph data loader, for a series of static graphs.
    Args:
        dataset (BotnetDataset): botnet graph dataset object
        batch_size (int, optional): batch size
        num_workers (int, optional): number of workers for multiple subprocesses
    """

    def __init__(self, dataset, batch_size=1, shuffle=False, num_workers=0):

        def collate_graph(graph_obj_list):
            batch = dgl.batch(graph_obj_list)
            return batch

        super().__init__(
            dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            collate_fn=collate_graph,
            num_workers=num_workers)

In [None]:
dataset_name = "chord"

In [None]:
start = time.time()
botnet_dataset_test = BotnetDataset(
    name=dataset_name, root=BASE_DATA_DIR,
    add_nfeat_ones=True, in_memory=True,
    split='test', graph_format='dgl')
print("Loaded test data")
end = time.time()
print("Load time for test data", end - start)

In [None]:
start = time.time()
botnet_dataset_val = BotnetDataset(
    name=dataset_name, root=BASE_DATA_DIR,
    add_nfeat_ones=True, in_memory=True,
    split='val', graph_format='dgl')
print("Loaded val data")
end = time.time()
print("Load time for val data", end - start)

In [None]:
start = time.time()
botnet_dataset_train = BotnetDataset(
    name=dataset_name, root=BASE_DATA_DIR,
    add_nfeat_ones=True, in_memory=True,
    split='train', graph_format='dgl')
print("Loaded train data")
end = time.time()
print("Load time for train data", end - start)

In [None]:
victim_ratio = 0.7

STORAGE_DIR_ADV = os.path.join(BASE_DATA_DIR, dataset_name, "adv")
STORAGE_DIR_VICTIM = os.path.join(BASE_DATA_DIR, dataset_name, "victim")

In [None]:
train_perm = np.random.permutation(len(botnet_dataset_train))
val_perm = np.random.permutation(len(botnet_dataset_val))
test_perm = np.random.permutation(len(botnet_dataset_test))

In [None]:
victim_train = train_perm[:int(victim_ratio * len(botnet_dataset_train))]
adv_train = train_perm[int(victim_ratio * len(botnet_dataset_train)):]

victim_val = val_perm[:int(victim_ratio * len(botnet_dataset_val))]
adv_val = val_perm[int(victim_ratio * len(botnet_dataset_val)):]

victim_test = test_perm[:int(victim_ratio * len(botnet_dataset_test))]
adv_test = test_perm[int(victim_ratio * len(botnet_dataset_test)):]

In [None]:
print("Adv: %d,%d,%d" % (len(adv_train), len(adv_val), len(adv_test)))
print("Victim: %d,%d,%d" % (len(victim_train), len(victim_val), len(victim_test)))

In [None]:
%%time

# Save test graphs for victim/adv
test_victim_graphs = [botnet_dataset_val[i] for i in victim_val] + [botnet_dataset_test[i] for i in victim_test]
test_adv_graphs = [botnet_dataset_val[i] for i in adv_val] + [botnet_dataset_test[i] for i in adv_test]

print("adv:%d | victim:%d" % (len(test_adv_graphs), len(test_victim_graphs)))

save_graphs(os.path.join(STORAGE_DIR_VICTIM, "test.bin"), test_victim_graphs)
save_graphs(os.path.join(STORAGE_DIR_ADV, "test.bin"), test_adv_graphs)

In [None]:
%%time

# Save train graphs for victim/adv
train_victim_graphs = [botnet_dataset_train[i] for i in victim_train]
train_adv_graphs = [botnet_dataset_train[i] for i in adv_train]

save_graphs(os.path.join(STORAGE_DIR_VICTIM, "train.bin"), [botnet_dataset_train[i] for i in victim_train])
save_graphs(os.path.join(STORAGE_DIR_ADV, "train.bin"), [botnet_dataset_train[i] for i in adv_train])

print("adv:%d | victim:%d" % (len(train_adv_graphs), len(train_victim_graphs)))

In [None]:
%%time

glist, _ = load_graphs(os.path.join(STORAGE_DIR_VICTIM, "test.bin"))
gdl = GraphDataLoader(glist, 1)
print(len(gdl))

In [None]:
%%time

glist, _ = load_graphs(os.path.join(STORAGE_DIR_ADV, "test.bin"))
gdl = GraphDataLoader(glist, 1)
print(len(gdl))

In [None]:
%%time

glist, _ = load_graphs(os.path.join(STORAGE_DIR_VICTIM, "train.bin"))
gdl = GraphDataLoader(glist, 1)
print(len(gdl))

In [None]:
%%time

glist, _ = load_graphs(os.path.join(STORAGE_DIR_ADV, "train.bin"))
gdl = GraphDataLoader(glist, 1)
print(len(gdl))

In [3]:
%%time
br = BotNetWrapper("adv", fast=True)

CPU times: user 236 ms, sys: 7.56 s, total: 7.8 s
Wall time: 7.8 s


In [4]:
%%time
br = BotNetWrapper("adv", fast=False)

CPU times: user 193 ms, sys: 7.77 s, total: 7.96 s
Wall time: 8.07 s
