## Module Import & Data Load

In [1]:
import sys

from pathlib import Path
from datetime import timedelta

import dateutil
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import trange
from TaPR_pkg import etapr
from src import dataset
import random
import pickle
from torchinfo import summary
from collections import Counter


In [2]:
import transformers
from src.models.gpt2 import GPT2Model

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

In [4]:
def set_seed(random_seed):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

set_seed(72)

In [5]:
WINDOW_GIVEN = 254
WINDOW_SIZE = 255


class HaiDataset(Dataset):
    def __init__(self, timestamps, df, stride=1, attacks=None):
        self.ts = np.array(timestamps)
        self.tag_values = np.array(df, dtype=np.float32)
        self.valid_idxs = []
        for L in trange(len(self.ts) - WINDOW_SIZE + 1):
            R = L + WINDOW_SIZE - 1
            if dateutil.parser.parse(self.ts[R]) - dateutil.parser.parse(
                self.ts[L]
            ) == timedelta(seconds=WINDOW_SIZE - 1):
                self.valid_idxs.append(L)
        self.valid_idxs = np.array(self.valid_idxs, dtype=np.int32)[::stride]
        self.n_idxs = len(self.valid_idxs)
        print(f"# of valid windows: {self.n_idxs}")
        if attacks is not None:
            self.attacks = np.array(attacks, dtype=np.float32)
            self.with_attack = True
        else:
            self.with_attack = False

    def __len__(self):
        return self.n_idxs

    def __getitem__(self, idx):
        i = self.valid_idxs[idx]
        last = i + WINDOW_SIZE - 1
        item = {"attack": self.attacks[last]} if self.with_attack else {}
        item["ts"] = self.ts[i + WINDOW_SIZE - 1]
        item["given"] = torch.from_numpy(self.tag_values[i : i + WINDOW_GIVEN])
        item["answer"] = torch.from_numpy(self.tag_values[last])
        return item

In [6]:
def boundary_check(df):
    x = np.array(df, dtype=np.float32)
    return np.any(x > 1.0), np.any(x < 0), np.any(np.isnan(x))

In [7]:
TRAIN_DATASET = sorted([x for x in Path("HAICon2021_dataset/train/").glob("*.csv")])
TRAIN_DATASET

[PosixPath('HAICon2021_dataset/train/train1.csv'),
 PosixPath('HAICon2021_dataset/train/train2.csv'),
 PosixPath('HAICon2021_dataset/train/train3.csv'),
 PosixPath('HAICon2021_dataset/train/train4.csv'),
 PosixPath('HAICon2021_dataset/train/train5.csv'),
 PosixPath('HAICon2021_dataset/train/train6.csv')]

In [8]:
TEST_DATASET = sorted([x for x in Path("HAICon2021_dataset/test/").glob("*.csv")])
TEST_DATASET

[PosixPath('HAICon2021_dataset/test/test1.csv'),
 PosixPath('HAICon2021_dataset/test/test2.csv'),
 PosixPath('HAICon2021_dataset/test/test3.csv')]

In [9]:
VALIDATION_DATASET = sorted([x for x in Path("HAICon2021_dataset/validation/").glob("*.csv")])
VALIDATION_DATASET

[PosixPath('HAICon2021_dataset/validation/validation.csv')]

In [10]:
def dataframe_from_csv(target):
    return pd.read_csv(target).rename(columns=lambda x: x.strip())

def dataframe_from_csvs(targets):
    return pd.concat([dataframe_from_csv(x) for x in targets])

# LOAD TRAIN

In [11]:
TRAIN_DF_RAW = dataframe_from_csvs(TRAIN_DATASET)
TRAIN_DF_RAW

Unnamed: 0,timestamp,C01,C02,C03,C04,C05,C06,C07,C08,C09,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,2021-07-11 10:00:00,-2.2642,0,12.26196,-0.00087,12.01019,0.2254,12.66931,70,1,...,0.92255,30.08042,7.08818,595.06104,276.40338,1,1014.79321,12.0,50,3506
1,2021-07-11 10:00:01,-2.4923,0,12.26196,0.00058,12.56714,-0.0711,12.66931,70,1,...,0.92255,30.08423,7.08818,531.50317,276.18634,1,1014.79321,12.0,50,3493
2,2021-07-11 10:00:02,-2.8460,0,12.26196,-0.00072,14.48975,0.0051,12.66931,70,1,...,0.91873,30.09148,7.08818,451.06253,279.85754,1,1014.79321,12.0,50,3490
3,2021-07-11 10:00:03,-2.1235,0,12.26196,0.00101,15.93170,0.1842,12.66931,70,1,...,0.91644,30.10407,7.08818,404.38739,281.50317,1,1014.79321,12.0,50,3525
4,2021-07-11 10:00:04,-2.9074,0,12.26196,0.00043,16.10718,0.1842,12.66931,70,1,...,0.91797,30.10331,7.08818,382.53925,281.34039,1,1014.79321,12.0,50,3503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259195,2021-08-09 08:59:56,-2.0065,0,12.26196,0.00051,100.28228,0.5977,12.53358,70,1,...,1.17355,35.05434,10.00000,3178.48877,370.02679,1,986.05908,12.0,50,136
259196,2021-08-09 08:59:57,-2.2101,0,12.26196,-0.00029,100.28228,0.5509,12.53358,70,1,...,1.16898,35.03488,10.00000,3175.36084,370.40656,1,986.05908,12.0,50,89
259197,2021-08-09 08:59:58,-2.3325,0,12.26196,0.00152,100.28228,0.5425,12.53358,70,1,...,1.16974,35.02840,10.00000,3175.73608,368.12787,1,986.05908,12.0,50,90
259198,2021-08-09 08:59:59,-2.3049,0,12.26196,0.00058,100.26703,0.6266,12.53358,70,1,...,1.16974,35.02420,10.00000,3176.61182,368.01941,1,986.05908,12.0,50,74


In [12]:
TIMESTAMP_FIELD = "timestamp"
IDSTAMP_FIELD = 'id'
ATTACK_FIELD = "attack"
VALID_COLUMNS_IN_TRAIN_DATASET = TRAIN_DF_RAW.columns.drop([TIMESTAMP_FIELD])
VALID_COLUMNS_IN_TRAIN_DATASET

Index(['C01', 'C02', 'C03', 'C04', 'C05', 'C06', 'C07', 'C08', 'C09', 'C10',
       'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20',
       'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30',
       'C31', 'C32', 'C33', 'C34', 'C35', 'C36', 'C37', 'C38', 'C39', 'C40',
       'C41', 'C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49', 'C50',
       'C51', 'C52', 'C53', 'C54', 'C55', 'C56', 'C57', 'C58', 'C59', 'C60',
       'C61', 'C62', 'C63', 'C64', 'C65', 'C66', 'C67', 'C68', 'C69', 'C70',
       'C71', 'C72', 'C73', 'C74', 'C75', 'C76', 'C77', 'C78', 'C79', 'C80',
       'C81', 'C82', 'C83', 'C84', 'C85', 'C86'],
      dtype='object')

In [13]:
TAG_MIN = TRAIN_DF_RAW[VALID_COLUMNS_IN_TRAIN_DATASET].min()
TAG_MAX = TRAIN_DF_RAW[VALID_COLUMNS_IN_TRAIN_DATASET].max()

In [14]:
def normalize(df):
    ndf = df.copy()
    for c in df.columns:
        if TAG_MIN[c] == TAG_MAX[c]:
            ndf[c] = df[c] - TAG_MIN[c]
        else:
            ndf[c] = (df[c] - TAG_MIN[c]) / (TAG_MAX[c] - TAG_MIN[c])
    return ndf

In [15]:
TRAIN_DF = normalize(TRAIN_DF_RAW[VALID_COLUMNS_IN_TRAIN_DATASET]).ewm(alpha=0.9).mean()
TRAIN_DF

Unnamed: 0,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,0.433107,0.0,0.056338,0.501686,0.116706,0.447955,0.816071,0.0,0.0,0.0,...,0.240561,0.194185,0.708818,0.183693,0.169066,0.0,0.538147,0.0,0.0,0.276078
1,0.395098,0.0,0.056338,0.517560,0.121738,0.397053,0.816071,0.0,0.0,0.0,...,0.240561,0.194535,0.708818,0.165517,0.168332,0.0,0.538147,0.0,0.0,0.275176
2,0.333267,0.0,0.056338,0.504886,0.139408,0.405431,0.816071,0.0,0.0,0.0,...,0.238319,0.195226,0.708818,0.141083,0.180570,0.0,0.538147,0.0,0.0,0.274889
3,0.446345,0.0,0.056338,0.522383,0.154058,0.436703,0.816071,0.0,0.0,0.0,...,0.236754,0.196440,0.708818,0.125447,0.187292,0.0,0.538147,0.0,0.0,0.277264
4,0.328324,0.0,0.056338,0.517845,0.157091,0.439827,0.816071,0.0,0.0,0.0,...,0.237495,0.196492,0.708818,0.117700,0.187419,0.0,0.538147,0.0,0.0,0.275990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259195,0.485034,0.0,0.056340,0.515751,0.994085,0.520004,0.640361,0.0,0.0,0.0,...,0.403941,0.696846,1.000000,0.996289,0.517805,0.0,0.414192,0.0,0.0,0.018717
259196,0.447224,0.0,0.056338,0.509379,0.994085,0.510481,0.640361,0.0,0.0,0.0,...,0.401381,0.695028,1.000000,0.995484,0.518674,0.0,0.414192,0.0,0.0,0.015676
259197,0.423251,0.0,0.056338,0.528358,0.994085,0.508101,0.640361,0.0,0.0,0.0,...,0.401571,0.694256,1.000000,0.995510,0.511131,0.0,0.414192,0.0,0.0,0.015440
259198,0.425407,0.0,0.056338,0.520068,0.993949,0.522157,0.640361,0.0,0.0,0.0,...,0.401590,0.693797,1.000000,0.995761,0.510014,0.0,0.414192,0.0,0.0,0.014318


In [16]:
HAI_DATASET_TRAIN = HaiDataset(TRAIN_DF_RAW[TIMESTAMP_FIELD], TRAIN_DF, stride=10)
HAI_DATASET_TRAIN[0]

HBox(children=(FloatProgress(value=0.0, max=1004148.0), HTML(value='')))


# of valid windows: 100288


{'ts': '2021-07-11 10:04:14',
 'given': tensor([[0.4331, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.2761],
         [0.3951, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.2752],
         [0.3333, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.2749],
         ...,
         [0.3126, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.0091],
         [0.4151, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.0091],
         [0.4197, 0.0000, 0.0563,  ..., 0.0000, 0.0000, 0.0091]]),
 'answer': tensor([0.4201, 0.0000, 0.0563, 0.4333, 0.1376, 0.4077, 0.8161, 0.0000, 0.0000,
         0.0000, 0.7641, 0.3990, 0.4475, 0.3553, 0.2482, 0.2243, 0.0000, 0.0000,
         0.0000, 0.3050, 0.3530, 0.0000, 0.2908, 0.6291, 0.0067, 0.0000, 0.2951,
         0.2689, 0.0000, 0.9763, 0.5345, 0.2550, 0.3436, 0.0000, 0.9009, 0.0000,
         0.2097, 0.0000, 0.0000, 0.0340, 0.2541, 0.8280, 0.4356, 0.3652, 1.0000,
         1.0000, 0.4115, 1.0000, 0.0000, 0.0297, 0.4960, 0.0000, 0.6607, 0.1375,
         0.0000, 0.2578, 0.7985, 0.1781, 0.2675, 0.3467, 1.

# LOAD VALID

In [17]:
VALIDATION_DF_RAW = dataframe_from_csvs(VALIDATION_DATASET)
VALIDATION_DF_RAW

Unnamed: 0,timestamp,C01,C02,C03,C04,C05,C06,C07,C08,C09,...,C78,C79,C80,C81,C82,C83,C84,C85,C86,attack
0,2021-07-10 00:00:01,-2.0395,0,11.78894,0.00058,16.32080,0.4010,12.903438,70,1,...,31.52100,0.716042,386.51172,356.64423,1,1034.712769,12.0,50,161,0
1,2021-07-10 00:00:02,-2.1750,0,11.78894,-0.00282,15.95459,0.4187,12.903438,70,1,...,31.51756,0.716042,411.33905,352.08698,1,1034.712769,12.0,50,155,0
2,2021-07-10 00:00:03,-2.5854,0,11.78131,-0.00398,14.79492,0.0691,12.903438,70,1,...,31.50269,0.716042,438.15259,347.63818,1,1034.712769,12.0,50,149,0
3,2021-07-10 00:00:04,-2.4722,0,11.78894,-0.00152,14.72626,0.3306,12.903438,70,1,...,31.47942,0.716042,465.95908,340.33203,1,1034.712769,12.0,50,148,0
4,2021-07-10 00:00:05,-2.2319,0,11.78131,0.00072,14.78729,0.2150,12.903438,70,1,...,31.47942,0.716042,452.05582,335.17798,1,1034.712769,12.0,50,148,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86395,2021-07-10 23:59:56,-2.8150,0,12.26196,0.00130,15.53497,-0.0507,12.669310,70,1,...,31.79814,0.716042,1193.48230,347.58386,1,1018.394040,12.0,50,7,0
86396,2021-07-10 23:59:57,-2.6272,0,12.26196,0.00109,15.53497,0.1829,12.669310,70,1,...,31.79738,0.716042,1179.76929,348.03607,1,1018.394040,12.0,50,7,0
86397,2021-07-10 23:59:58,-2.3564,0,12.26196,-0.00101,15.53497,0.2907,12.669310,70,1,...,31.78784,0.716042,1152.66187,348.81366,1,1018.394040,12.0,50,7,0
86398,2021-07-10 23:59:59,-2.1541,0,12.26196,-0.00166,15.53497,0.3280,12.669310,70,1,...,31.77029,0.716042,1130.65723,349.13916,1,1018.394040,12.0,50,7,0


In [18]:
VALIDATION_DF = normalize(VALIDATION_DF_RAW[VALID_COLUMNS_IN_TRAIN_DATASET]).ewm(alpha=0.9).mean()

# LOAD TEST

In [19]:
TEST_DF_RAW = dataframe_from_csvs(TEST_DATASET)
TEST_DF_RAW

Unnamed: 0,timestamp,C01,C02,C03,C04,C05,C06,C07,C08,C09,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,2021-07-13 16:00:01,-2.1424,0,12.37640,-0.00275,8.02002,0.1524,12.58874,70,1,...,1.13464,29.09910,2.85153,227.61664,333.51416,1,981.36951,12.0,50,2100
1,2021-07-13 16:00:02,-2.0449,0,12.37640,-0.00087,8.02002,0.1281,12.58874,70,1,...,1.13312,29.14869,2.85153,231.58908,340.18738,1,981.36951,12.0,50,2165
2,2021-07-13 16:00:03,-2.1287,0,12.37640,-0.00260,8.02002,0.0148,12.58874,70,1,...,1.13388,29.18836,2.85153,229.60286,347.14990,1,981.36951,12.0,50,2209
3,2021-07-13 16:00:04,-2.1287,0,12.37640,-0.00282,8.02002,0.0148,12.58874,70,1,...,1.13541,29.24558,2.85153,229.60286,355.90277,1,981.36951,12.0,50,2233
4,2021-07-13 16:00:05,-2.5969,0,12.37640,0.00058,8.02002,-0.2225,12.58874,70,1,...,1.13541,29.24558,2.85153,222.65125,358.23566,1,981.36951,12.0,50,2290
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129595,2021-07-16 23:59:56,-1.8328,0,12.07123,0.00036,15.03143,0.8050,12.58870,70,1,...,0.92941,29.45263,2.40398,1056.03259,373.20966,1,1015.06964,12.0,50,22
129596,2021-07-16 23:59:57,-1.8328,0,12.07123,-0.00470,13.74206,0.8050,12.58870,70,1,...,0.92255,29.45758,2.40398,1026.05530,376.66376,1,1015.06964,12.0,50,22
129597,2021-07-16 23:59:58,-1.5833,0,12.07123,-0.00130,13.64288,0.9984,12.58870,70,1,...,0.92255,29.45034,2.40398,981.37659,377.76691,1,1015.06964,12.0,50,22
129598,2021-07-16 23:59:59,-1.4022,0,12.07123,0.00087,13.62762,1.0595,12.58870,70,1,...,0.92255,29.43088,2.40398,919.62439,378.99664,1,1015.06964,12.0,50,22


In [20]:
TEST_DF = normalize(TEST_DF_RAW[VALID_COLUMNS_IN_TRAIN_DATASET]).ewm(alpha=0.9).mean()
TEST_DF

Unnamed: 0,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,0.455432,0.0,0.079812,0.479046,0.077046,0.434169,0.711769,0.0,0.0,0.0,...,0.378726,0.095024,0.285153,0.068104,0.381526,0.0,0.393961,0.0,0.0,0.168791
1,0.471678,0.0,0.079812,0.499628,0.077046,0.429998,0.711769,0.0,0.0,0.0,...,0.377826,0.099579,0.285153,0.069240,0.404094,0.0,0.393961,0.0,0.0,0.173300
2,0.459304,0.0,0.079812,0.482713,0.077046,0.410346,0.711769,0.0,0.0,0.0,...,0.378191,0.103601,0.285153,0.068779,0.429462,0.0,0.393961,0.0,0.0,0.176731
3,0.458079,0.0,0.079812,0.478654,0.077046,0.408400,0.711769,0.0,0.0,0.0,...,0.379124,0.109204,0.285153,0.068734,0.461282,0.0,0.393961,0.0,0.0,0.178719
4,0.380719,0.0,0.079812,0.515098,0.077046,0.367874,0.711769,0.0,0.0,0.0,...,0.379218,0.109763,0.285153,0.066761,0.472272,0.0,0.393961,0.0,0.0,0.182832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129595,0.511523,0.0,0.017216,0.516825,0.147339,0.557125,0.711717,0.0,0.0,0.0,...,0.245764,0.130889,0.240398,0.327644,0.529605,0.0,0.539339,0.0,0.0,0.010225
129596,0.512114,0.0,0.017216,0.461690,0.135262,0.557380,0.711717,0.0,0.0,0.0,...,0.241081,0.131212,0.240398,0.320111,0.540804,0.0,0.539339,0.0,0.0,0.010225
129597,0.553332,0.0,0.017216,0.493026,0.133167,0.590276,0.711717,0.0,0.0,0.0,...,0.240613,0.130586,0.240398,0.306708,0.545617,0.0,0.539339,0.0,0.0,0.010225
129598,0.587329,0.0,0.017216,0.519678,0.132821,0.603950,0.711717,0.0,0.0,0.0,...,0.240566,0.128753,0.240398,0.287885,0.550216,0.0,0.539339,0.0,0.0,0.010225


In [21]:
boundary_check(TEST_DF)

(True, True, False)

In [22]:
params = {'batch_size': 512,
          'shuffle': True,
          'num_workers': 4,
          'pin_memory' : True}
trainloader = DataLoader(HAI_DATASET_TRAIN, **params)

In [23]:
train_shape = next(iter(trainloader))['given'].shape
print(train_shape)

torch.Size([512, 254, 86])


## Model

In [24]:
class HAIGPT(nn.Module):

    def __init__(
            self,
            input_dim,
            hidden_size,
            max_len=254,
            device='cuda',
            **kwargs
    ):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_size = hidden_size
        self.device = device
        config = transformers.GPT2Config(
            vocab_size=1,  # doesn't matter -- we don't use the vocab
            n_embd=hidden_size,
            **kwargs
        )

        # note: the only difference between this GPT2Model and the default Huggingface version
        # is that the positional embeddings are removed (since we'll add those ourselves)
        self.transformer = GPT2Model(config)

        self.pos_embedding = nn.Parameter(torch.randn(1, max_len, hidden_size))
        self.embed_token = nn.Linear(self.input_dim, hidden_size)

        self.embed_ln = nn.LayerNorm(hidden_size)

        self.predict = nn.Linear(hidden_size, self.input_dim)

    def forward(self, token, attention_mask=None):

        batch_size, seq_length = token.shape[0], token.shape[1]

        if attention_mask is None:
            # attention mask for GPT: 1 if can be attended to, 0 if not
            attention_mask = torch.ones((batch_size, seq_length), dtype=torch.long).to(self.device)

        # embed each modality with a different head
        token_embeddings = self.embed_token(token)

        # time embeddings are treated similar to positional embeddings
        token_embeddings = token_embeddings + self.pos_embedding

        # which works nice in an autoregressive sense since states predict actions
        inputs = self.embed_ln(token_embeddings)

        # we feed in the input embeddings (not word indices as in NLP) to the model
        transformer_outputs = self.transformer(
            inputs_embeds=inputs,
            attention_mask=attention_mask,
        )
        x = transformer_outputs['last_hidden_state']
        
        # get predictions
        preds = self.predict(x)  

        return preds[:,-1]

In [25]:
train_shape[1:]

torch.Size([254, 86])

In [26]:
input_shape = train_shape[1:]
model = HAIGPT(
    input_dim=input_shape[-1],
    hidden_size=128,
    max_len=254,
    n_layer=3,
    n_head=1,
    n_inner=4*128,
    activation_function='relu',
    n_positions=1024,
    resid_pdrop=0.1,
    attn_pdrop=0.1,
).to(device)

In [27]:
print(model)

HAIGPT(
  (transformer): GPT2Model(
    (wte): Embedding(1, 128)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((128,), eps=1e-05, elementwise_affine=T

In [28]:
optimizer = optim.AdamW(model.parameters())
loss_fn = nn.MSELoss()

In [29]:
print(model(next(iter(trainloader))['given'].to(device)).shape)

torch.Size([512, 86])


In [30]:
def train(model, train_data, optimizer, loss_fn, use_fp16=True, max_norm=None):
    
    epoch_loss = 0
    
    model.train() 

    for idx, batch in enumerate(train_data):
        
        optimizer.zero_grad(set_to_none=True)
        scaler = torch.cuda.amp.GradScaler()
                
        input = batch['given'].to(device)
        answer = batch["answer"].to(device)
        
        with torch.cuda.amp.autocast(enabled=use_fp16):
            predictions = model.forward(input)
            train_loss = loss_fn(predictions, answer)
        if use_fp16:
            scaler.scale(train_loss).backward()
            if max_norm is not None:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
            scaler.step(optimizer)
            scaler.update()
        else:
            train_loss.backward()
            optimizer.step()
        
        epoch_loss += train_loss.item()
        
    return epoch_loss

In [31]:
def validation(model, val_data, loss_fn):
    model.eval()
    val_loss = 0
    for idx, batch in enumerate(val_data):
        input = batch['given'].to(device)
        answer = batch["answer"].to(device)
        with torch.no_grad():
            predictions = model.forward(input)
            val_loss += loss_fn(predictions, answer).item()
        
    return val_loss

In [32]:
class EarlyStopping():
    def __init__(self, patience=0, verbose=0):
        self._step = 0
        self._loss = float('inf')
        self.patience = patience
        self.verbose = verbose

    def validate(self, loss):
        if self._loss < loss:
            self._step += 1
            if self._step > self.patience:
                if self.verbose:
                    print(f'\n Training process is stopped early....')
                return True
        else:
            self._step = 0
            self._loss = loss

        return False

In [33]:
# %%time
# epochs = 300
# history = dict()
# best = {"loss": sys.float_info.max}
# early_stopping = EarlyStopping(patience=30, verbose=1)
# for epoch in range(1, epochs+1):
#     epoch_loss = train(model, trainloader, optimizer, loss_fn, use_fp16=False)
#     val_loss = validation(model, validloader, loss_fn)
    
#     history.setdefault('loss', []).append(epoch_loss) 
#     history.setdefault('val_loss', []).append(val_loss) 
    
#     sys.stdout.write(
#         "\r" + f"[Train] Epoch : {epoch:^3}"\
#         f"  Train Loss: {epoch_loss:.4}"\
#         f"  Validation Loss: {val_loss:.4}"\
#                     )
# #     print(f"[Train] Epoch : {epoch:^3}"\
# #         f"  Train Loss: {epoch_loss:.4}"\
# #         f"  Validation Loss: {val_loss:.4}")
#     if epoch_loss < best["loss"]:
#             best["state"] = model.state_dict()
#             best["loss"] = epoch_loss
#             best["epoch"] = epoch + 1
#     if early_stopping.validate(epoch_loss):
#         break

In [34]:
# plt.plot(history["loss"], label="Training Loss")
# plt.plot(history["val_loss"], label="Validation Loss")
# plt.legend()
# plt.show()

In [35]:
# after training
# model.load_state_dict(best["state"])

In [36]:
# load chkpoint
# model.load_state_dict(torch.load('./checkpoints/GPT_89.tar')['model'])
model.load_state_dict(torch.load('GPT_254.tar')['model'])

<All keys matched successfully>

## Validation

In [37]:
VALIDATION_DF_RAW

Unnamed: 0,timestamp,C01,C02,C03,C04,C05,C06,C07,C08,C09,...,C78,C79,C80,C81,C82,C83,C84,C85,C86,attack
0,2021-07-10 00:00:01,-2.0395,0,11.78894,0.00058,16.32080,0.4010,12.903438,70,1,...,31.52100,0.716042,386.51172,356.64423,1,1034.712769,12.0,50,161,0
1,2021-07-10 00:00:02,-2.1750,0,11.78894,-0.00282,15.95459,0.4187,12.903438,70,1,...,31.51756,0.716042,411.33905,352.08698,1,1034.712769,12.0,50,155,0
2,2021-07-10 00:00:03,-2.5854,0,11.78131,-0.00398,14.79492,0.0691,12.903438,70,1,...,31.50269,0.716042,438.15259,347.63818,1,1034.712769,12.0,50,149,0
3,2021-07-10 00:00:04,-2.4722,0,11.78894,-0.00152,14.72626,0.3306,12.903438,70,1,...,31.47942,0.716042,465.95908,340.33203,1,1034.712769,12.0,50,148,0
4,2021-07-10 00:00:05,-2.2319,0,11.78131,0.00072,14.78729,0.2150,12.903438,70,1,...,31.47942,0.716042,452.05582,335.17798,1,1034.712769,12.0,50,148,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86395,2021-07-10 23:59:56,-2.8150,0,12.26196,0.00130,15.53497,-0.0507,12.669310,70,1,...,31.79814,0.716042,1193.48230,347.58386,1,1018.394040,12.0,50,7,0
86396,2021-07-10 23:59:57,-2.6272,0,12.26196,0.00109,15.53497,0.1829,12.669310,70,1,...,31.79738,0.716042,1179.76929,348.03607,1,1018.394040,12.0,50,7,0
86397,2021-07-10 23:59:58,-2.3564,0,12.26196,-0.00101,15.53497,0.2907,12.669310,70,1,...,31.78784,0.716042,1152.66187,348.81366,1,1018.394040,12.0,50,7,0
86398,2021-07-10 23:59:59,-2.1541,0,12.26196,-0.00166,15.53497,0.3280,12.669310,70,1,...,31.77029,0.716042,1130.65723,349.13916,1,1018.394040,12.0,50,7,0


In [38]:
ATTACK_LABELS = VALIDATION_DF_RAW['attack']
Counter(ATTACK_LABELS)

Counter({0: 85515, 1: 885})

In [39]:
VALIDATION_DF

Unnamed: 0,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,0.474293,0.0,-0.040688,0.519147,0.159551,0.481116,1.119160,0.0,0.0,0.0,...,0.437871,0.339753,0.071604,0.118089,0.467573,0.0,0.624077,0.0,0.0,0.020832
1,0.451714,0.0,-0.040688,0.481925,0.156242,0.484154,1.119160,0.0,0.0,0.0,...,0.437871,0.339437,0.071604,0.125189,0.452160,0.0,0.624077,0.0,0.0,0.020416
2,0.381911,0.0,-0.042098,0.465987,0.145560,0.424951,1.119160,0.0,0.0,0.0,...,0.431603,0.338055,0.071604,0.133427,0.435862,0.0,0.624077,0.0,0.0,0.019966
3,0.393676,0.0,-0.040829,0.491074,0.143888,0.463538,1.119160,0.0,0.0,0.0,...,0.423829,0.335802,0.071604,0.142116,0.409784,0.0,0.624077,0.0,0.0,0.019852
4,0.434492,0.0,-0.042110,0.517858,0.144267,0.447745,1.119160,0.0,0.0,0.0,...,0.424389,0.335577,0.071604,0.139048,0.389922,0.0,0.624077,0.0,0.0,0.019841
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86395,0.330458,0.0,0.056340,0.526499,0.151740,0.395722,0.816071,0.0,0.0,0.0,...,0.459111,0.367795,0.071604,0.372276,0.434196,0.0,0.553681,0.0,0.0,0.009081
86396,0.362960,0.0,0.056338,0.525410,0.151740,0.435508,0.816071,0.0,0.0,0.0,...,0.458786,0.367692,0.071604,0.368093,0.435414,0.0,0.553681,0.0,0.0,0.009081
86397,0.410882,0.0,0.056338,0.502541,0.151740,0.457808,0.816071,0.0,0.0,0.0,...,0.459199,0.366815,0.071604,0.360001,0.438139,0.0,0.553681,0.0,0.0,0.009081
86398,0.449047,0.0,0.056338,0.493209,0.151740,0.466378,0.816071,0.0,0.0,0.0,...,0.458795,0.365131,0.071604,0.352961,0.439501,0.0,0.553681,0.0,0.0,0.009081


In [40]:
VALID_INFERENCE = VALIDATION_DF.copy()
VALID_INFERENCE=VALID_INFERENCE.reset_index(drop=True)
VALID_INFERENCE.head()

Unnamed: 0,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,0.474293,0.0,-0.040688,0.519147,0.159551,0.481116,1.11916,0.0,0.0,0.0,...,0.437871,0.339753,0.071604,0.118089,0.467573,0.0,0.624077,0.0,0.0,0.020832
1,0.451714,0.0,-0.040688,0.481925,0.156242,0.484154,1.11916,0.0,0.0,0.0,...,0.437871,0.339437,0.071604,0.125189,0.45216,0.0,0.624077,0.0,0.0,0.020416
2,0.381911,0.0,-0.042098,0.465987,0.14556,0.424951,1.11916,0.0,0.0,0.0,...,0.431603,0.338055,0.071604,0.133427,0.435862,0.0,0.624077,0.0,0.0,0.019966
3,0.393676,0.0,-0.040829,0.491074,0.143888,0.463538,1.11916,0.0,0.0,0.0,...,0.423829,0.335802,0.071604,0.142116,0.409784,0.0,0.624077,0.0,0.0,0.019852
4,0.434492,0.0,-0.04211,0.517858,0.144267,0.447745,1.11916,0.0,0.0,0.0,...,0.424389,0.335577,0.071604,0.139048,0.389922,0.0,0.624077,0.0,0.0,0.019841


In [45]:
len(list(np.around(np.arange(0.010, 0.020, 0.00005),5)))

200

In [46]:
WINDOW_GIVEN = 254
WINDOW_SIZE = 255

VAL_LEN = VALID_INFERENCE.shape[0]

best_thres = 0
best_f1 = 0

for THRESHOLD in list(np.around(np.arange(0.010, 0.020, 0.00005),5)):
    print(f'----------THRESHOLD : {THRESHOLD}----------')
    val_att = []

    for i in range(0, VAL_LEN - WINDOW_GIVEN):
        #초기화
        attack = 0

        # model loads
        given = np.array(VALID_INFERENCE.iloc[i:i+WINDOW_GIVEN])
        answer = np.array(VALID_INFERENCE.iloc[i+WINDOW_GIVEN])

        # match shape
        given = torch.from_numpy(given).float().unsqueeze(0).to(device)
        answer = torch.from_numpy(answer).unsqueeze(0).to(device)

        guess = model(given) 

        # diff
        diff = torch.abs(answer - guess)
        diff = np.mean(diff.cpu().detach().numpy())
        #print(f'i : {i} & diff : {diff}')

        if diff >= THRESHOLD:
            attack = 1
            VALID_INFERENCE.iloc[i+WINDOW_GIVEN] = np.array(guess.squeeze(0).cpu().detach().numpy())

        val_att.append(attack)

    # fill blanks
    add_zeros = [0]*WINDOW_GIVEN
    FINAL_LABELS = np.concatenate((add_zeros, val_att), axis=None)
    
    # print
    print(f'>>>> ATTACK_LABELS : {Counter(ATTACK_LABELS)}')
    print(f'>>>> FINAL_LABELS : {Counter(FINAL_LABELS)}')
    
    #  check
    if ATTACK_LABELS.shape[0] == FINAL_LABELS.shape[0]:
        TaPR = etapr.evaluate_haicon(anomalies=ATTACK_LABELS, predictions=FINAL_LABELS)
        print(f">>> F1: {TaPR['f1']:.3f} (TaP: {TaPR['TaP']:.3f}, TaR: {TaPR['TaR']:.3f})")
        print(f">>> # of detected anomalies: {len(TaPR['Detected_Anomalies'])}")
        print(f">>> Detected anomalies: {TaPR['Detected_Anomalies']}")
        
        if TaPR['f1']>best_f1:
            best_thres = THRESHOLD
            best_f1 = TaPR['f1']
            print("***Best Threshold updated!!***")


----------THRESHOLD : 0.01----------
>>>> ATTACK_LABELS : Counter({0: 85515, 1: 885})
>>>> FINAL_LABELS : Counter({1: 44989, 0: 41411})
>>> F1: 0.000 (TaP: 0.000, TaR: 0.000)
>>> # of detected anomalies: 0
>>> Detected anomalies: []
----------THRESHOLD : 0.01005----------
>>>> ATTACK_LABELS : Counter({0: 85515, 1: 885})
>>>> FINAL_LABELS : Counter({0: 45488, 1: 40912})
>>> F1: 0.006 (TaP: 0.003, TaR: 0.161)
>>> # of detected anomalies: 2
>>> Detected anomalies: [<TaPR_pkg.DataManage.Range.Range object at 0x7f946445d828>, <TaPR_pkg.DataManage.Range.Range object at 0x7f946445d278>]
***Best Threshold updated!!***
----------THRESHOLD : 0.0101----------
>>>> ATTACK_LABELS : Counter({0: 85515, 1: 885})
>>>> FINAL_LABELS : Counter({0: 43382, 1: 43018})
>>> F1: 0.000 (TaP: 0.000, TaR: 0.000)
>>> # of detected anomalies: 0
>>> Detected anomalies: []
----------THRESHOLD : 0.01015----------
>>>> ATTACK_LABELS : Counter({0: 85515, 1: 885})
>>>> FINAL_LABELS : Counter({1: 50729, 0: 35671})
>>> F1: 

KeyboardInterrupt: 

In [47]:
best_thres = 0.01315

# Inference

In [48]:
TEST_INFERENCE = TEST_DF.copy()
TEST_INFERENCE=TEST_INFERENCE.reset_index(drop=True)
TEST_INFERENCE.head()

Unnamed: 0,C01,C02,C03,C04,C05,C06,C07,C08,C09,C10,...,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86
0,0.455432,0.0,0.079812,0.479046,0.077046,0.434169,0.711769,0.0,0.0,0.0,...,0.378726,0.095024,0.285153,0.068104,0.381526,0.0,0.393961,0.0,0.0,0.168791
1,0.471678,0.0,0.079812,0.499628,0.077046,0.429998,0.711769,0.0,0.0,0.0,...,0.377826,0.099579,0.285153,0.06924,0.404094,0.0,0.393961,0.0,0.0,0.1733
2,0.459304,0.0,0.079812,0.482713,0.077046,0.410346,0.711769,0.0,0.0,0.0,...,0.378191,0.103601,0.285153,0.068779,0.429462,0.0,0.393961,0.0,0.0,0.176731
3,0.458079,0.0,0.079812,0.478654,0.077046,0.4084,0.711769,0.0,0.0,0.0,...,0.379124,0.109204,0.285153,0.068734,0.461282,0.0,0.393961,0.0,0.0,0.178719
4,0.380719,0.0,0.079812,0.515098,0.077046,0.367874,0.711769,0.0,0.0,0.0,...,0.379218,0.109763,0.285153,0.066761,0.472272,0.0,0.393961,0.0,0.0,0.182832


In [49]:
WINDOW_GIVEN = 254
WINDOW_SIZE = 255
#THRESHOLD = 0.04
THRESHOLD = best_thres

TEST_LEN = TEST_INFERENCE.shape[0]
att = []

for i in trange(0, TEST_LEN - WINDOW_GIVEN):
    #초기화
    attack = 0

    # model loads
    given = np.array(TEST_INFERENCE.iloc[i:i+WINDOW_GIVEN])
    answer = np.array(TEST_INFERENCE.iloc[i+WINDOW_GIVEN])
    
    # match shape
    given = torch.from_numpy(given).float().unsqueeze(0).to(device)
    answer = torch.from_numpy(answer).unsqueeze(0).to(device)
    
    guess = model(given) 

    # diff
    diff = torch.abs(answer - guess)
    diff = np.mean(diff.cpu().detach().numpy())
    #print(f'i : {i} & diff : {diff}')

    if diff >= THRESHOLD:
        attack = 1
        TEST_INFERENCE.iloc[i+WINDOW_GIVEN] = np.array(guess.squeeze(0).cpu().detach().numpy())
    
    att.append(attack)
    

HBox(children=(FloatProgress(value=0.0, max=274546.0), HTML(value='')))




In [50]:
add_zeros = [0]*WINDOW_GIVEN
print(len(add_zeros))

254


In [51]:
from collections import Counter
Counter(att)

Counter({0: 1, 1: 274545})

In [52]:
concat_attack = np.concatenate((add_zeros, att), axis=None)
print(len(concat_attack))
print()
print(concat_attack)

274800

[0 0 0 ... 1 1 1]


In [53]:
print(TEST_LEN)
print(len(concat_attack))

274800
274800


In [54]:
Counter(concat_attack)

Counter({0: 255, 1: 274545})

In [55]:
submission = pd.read_csv('./HAICon2021_dataset/sample_submission.csv')
submission.shape

(274800, 2)

In [56]:
submission['attack'] = pd.Series(concat_attack)
submission

Unnamed: 0,timestamp,attack
0,2021-07-13 16:00:01,0
1,2021-07-13 16:00:02,0
2,2021-07-13 16:00:03,0
3,2021-07-13 16:00:04,0
4,2021-07-13 16:00:05,0
...,...,...
274795,2021-07-16 23:59:56,1
274796,2021-07-16 23:59:57,1
274797,2021-07-16 23:59:58,1
274798,2021-07-16 23:59:59,1


In [57]:
submission.to_csv(f'GPT_254_renewal_{THRESHOLD}.csv', index=False)

In [None]:
# torch.save({'model': model.state_dict(), 
#                 'Threshold': 0.025, 
#                }, './checkpoints/GPT_89.tar')

----