### Import

In [1]:
import copy
from typing import Optional, Any, Union, Callable
import torch
from torch import Tensor
import torch.nn as n
from torch.nn import functional as F

### Define 

In [2]:
#class Transformer(
#    d_model: int = 512,
#    nhead: int = 8,
#    num_encoder_layers: int = 6,
#    num_decoder_layers: int = 6,
#    dim_feedforward: int = 2048,
#    dropout: float = 0.1,
#    activation: str | ((Tensor) -> Tensor) = F.relu,
#    custom_encoder: Any | None = None,
#    custom_decoder: Any | None = None,
#    layer_norm_eps: float = 0.00001,
#    batch_first: bool = False,
#    norm_first: bool = False,
#    device: Any | None = None,
#    dtype: Any | None = None
#)
# Transformer_v1 =  n.Transformer(d_model= 63, nhead=7,dim_feedforward= 512 ) 

### In network traffic

In [3]:
import numpy as np
import pandas as pd

#### Load data

In [4]:
#input = np.load('../SHAP/Network/test/task1/sources/task1_ver_kai_traffic.npy')
#input = pd.read_csv('./sources/ml_data.csv')
en_input = np.load('./sources/ml_data(5000).npy')
de_input = np.load('./sources/ml_data_labels(5000).npy',allow_pickle=True) 

In [5]:
columns = [ 'srcip_mean', 'Dstip_mean','pro_mean','srcport_mean','dstport_mean','IPLen_mean','iat_mean',
                    'srcip_std', 'Dstip_std','pro_std','srcport_std','dstport_std','IPLen_std','iat_std',
                    'srcip_skew', 'Dstip_skew','pro_skew','srcport_skew','dstport_skew','IPLen_skew','iat_skew',
                    'srcip_kurt', 'Dstip_kurt','pro_kurt','srcport_kurt','dstport_kurt','IPLen_kurt','iat_kurt',
                    'srcip_entropy', 'Dstip_entropy','pro_entropy','srcport_entropy','dstport_entropy','IPLen_entropy','iat_entropy',
                    'srcip_distinct', 'Dstip_distinct','pro_distinct','srcport_distinct','dstport_distinct','IPLen_distinct','iat_distinct',
                    'srcip_q1', 'Dstip_q1','pro_q1','srcport_q1','dstport_q1','IPLen_q1','iat_q1',
                    'srcip_q2', 'Dstip_q2','pro_q2','srcport_q2','dstport_q2','IPLen_q2','iat_q2',
                    'srcip_q3', 'Dstip_q3','pro_q3','srcport_q3','dstport_q3','IPLen_q3','iat_q3']

#### Embedding 

#### Positional Encoding (PE)

In [7]:
## PE(pos, 2i) = sin(pos/10000^(2i/d_model))
## PE(pos, 2i+1) = cos(pos/10000^(2i/d_model))

## Encoder 
def pos_encoding_2d(pos, i, d_model):
    return np.sin(pos / (10000 ** ((2 * i)/d_model))) if i%2==0 else np.cos(pos / (10000 ** ((2 * i)/d_model)))

pos_encoding = np.zeros((en_input.shape[0], en_input.shape[1]))
for pos in range(en_input.shape[0]):
    for i in range(en_input.shape[1]):
        pos_encoding[pos, i] = pos_encoding_2d(pos, i, en_input.shape[1])

en_input_df = pd.DataFrame(en_input, columns=columns)
pos_encoding_df = pd.DataFrame(pos_encoding, columns=[f'{columns[i]}_i{i}' for i in range(pos_encoding.shape[1])])
input_and_pe_df = pd.concat([en_input_df, pos_encoding_df], axis=1)
# sort columns by name group with i
input_and_pe_df = input_and_pe_df.reindex(sorted(input_and_pe_df.columns), axis=1)

encoder_input = en_input + pos_encoding
encoder_input_df = pd.DataFrame(encoder_input, columns=columns)

In [77]:
pos_encoding[1,62] # Dstip_mean_i1

1.3396277245180169e-08

In [46]:
## Decoder
vocab = [de_input[i] for i in range(len(de_input))]
embedding = n.Embedding(num_embeddings=len(vocab), embedding_dim=63)
#out
em_input = torch.tensor(de_input)
de_embedding_out = embedding(em_input)

pos_decoding = np.zeros((de_input.shape[0],de_embedding_out.shape[2] ))
for pos in range(de_input.shape[0]):
    for i in range(de_embedding_out.shape[2]):
        pos_decoding[pos, i] = pos_encoding_2d(pos, i, de_embedding_out.shape[2])

de_input_df = pd.DataFrame(de_input, columns=['label'])
de_pos_encoding_df = pd.DataFrame(pos_decoding, columns=[f'embedding_dem{i}' for i in range(pos_decoding.shape[1])])
# de_input_and_pe_df = pd.concat([de_input_df, de_encoding_df], axis=1)

#de_embedding_out_df = pd.DataFrame(de_embedding_out[i] for i in range(de_embedding_out.shape[0]))

deo_df = [de_embedding_out[i].detach().numpy() for i in range(de_embedding_out.shape[0])]
de_embedding_out_df = pd.DataFrame(deo_df[0])
for i in range(1, len(deo_df)):
    de_embedding_out_df = pd.concat([de_embedding_out_df, pd.DataFrame(deo_df[i])], axis=0)
de_embedding_out_df = de_embedding_out_df.reset_index(drop=True)

rd_de_embedding_out = (de_embedding_out.squeeze(dim=1)).detach().numpy()
decoder_input =  rd_de_embedding_out + pos_decoding
decoder_input_df = pd.DataFrame(decoder_input)
decoder_input_df

##### Visualize

In [93]:
import  plotly.graph_objects as go
fig = go.Figure()

for i in range(pos_encoding_df.shape[1]):
    fig.add_trace(go.Scatter(y=pos_encoding_df.iloc[:,i], name=pos_encoding_df.columns[i]))
fig.show()

#### Encoder

In [10]:
encoder_layer_v1 = n.TransformerEncoderLayer(d_model=63, nhead = 3) # 63 features, 3 heads
Transformer_encoder_v1 = n.TransformerEncoder(encoder_layer_v1, num_layers=6) # 6 layers, 6 encoder layers
#encoder_output = Transformer_encoder_v1(encoder_input)
#encoder_input = np.array(encoder_input)
encoder_output = Transformer_encoder_v1(torch.tensor(encoder_input).float()) 

In [11]:
a_1 = [1,2,3,4,5]
print(a_1)
print(torch.tensor(a_1).float())

[1, 2, 3, 4, 5]
tensor([1., 2., 3., 4., 5.])


In [12]:
Transformer_v1 = n.Transformer(d_model= 63, nhead=3 ,dim_feedforward= 252 )

In [106]:
out = Transformer_v1(torch.tensor(encoder_input).float(),torch.tensor(decoder_input).float())

In [107]:
out

tensor([[-1.7866,  2.2009, -0.2182,  ...,  0.4818, -1.3183,  0.0574],
        [-0.8623,  2.1220, -0.1918,  ...,  0.7956, -1.9441, -0.9264],
        [-1.7747,  1.4219, -0.3720,  ...,  1.3772, -1.7812,  0.3375],
        ...,
        [-2.5892,  1.6828, -0.3970,  ...,  2.1217, -0.8771,  0.6092],
        [-1.7894,  2.0973, -0.4050,  ...,  1.6138, -1.9466,  0.1759],
        [-3.0093,  1.1291, -0.8117,  ...,  1.9376, -1.6703,  0.5880]],
       grad_fn=<NativeLayerNormBackward0>)

In [100]:
torch.tensor(decoder_input).float()

torch.Size([1800, 63])

### Torch gpu

In [17]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# torch.cuda.is_available()
# torch.cuda.get_device_name(0)