In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
file_path = r"C:\Users\moaya\CleanMalwareData.csv"
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,FlowID0,FlowID1,FlowID2,SourceIP0,SourceIP1,SourceIP2,SourceIP3,DestinationIP0,DestinationIP1,DestinationIP2,...,SubflowBwdBytes,Init_Win_bytes_forward,Init_Win_bytes_backward,act_data_pkt_fwd,min_seg_size_forward,ActiveMax,ActiveMin,IdleMax,IdleMin,Label
0,57050,443,6,31,13,71,1,10,42,0,...,0,321,-1,0,32,0,0,0,0,1
1,57050,443,6,10,42,0,151,31,13,71,...,0,1601,-1,1,32,0,0,0,0,1
2,40019,50481,6,10,42,0,151,157,55,130,...,0,1402,33,0,32,0,0,0,0,1
3,60517,443,6,10,42,0,151,52,179,153,...,0,1550,510,0,32,0,0,0,0,1
4,54219,443,6,10,42,0,151,104,46,97,...,0,1550,510,0,32,0,0,0,0,1


In [3]:
data.shape

(2616566, 58)

In [5]:
import random
import copy
from copy import deepcopy as dp
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary

import warnings
warnings.filterwarnings('ignore')

In [7]:
def norm_fit(df_1,saveM = True, sc_name = 'zsco'):   
    from sklearn.preprocessing import StandardScaler,MinMaxScaler,MaxAbsScaler,RobustScaler,Normalizer,QuantileTransformer,PowerTransformer
    ss_1_dic = {'zsco':StandardScaler(),
                'mima':MinMaxScaler(),
                'maxb':MaxAbsScaler(), 
                'robu':RobustScaler(),
                'norm':Normalizer(), 
                'quan':QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal"),
                'powe':PowerTransformer()}
    ss_1 = ss_1_dic[sc_name]
    df_2 = pd.DataFrame(ss_1.fit_transform(df_1),index = df_1.index,columns = df_1.columns)
    if saveM == False:
        return(df_2)
    else:
        return(df_2,ss_1)

def norm_tra(df_1,ss_x):
    df_2 = pd.DataFrame(ss_x.transform(df_1),index = df_1.index,columns = df_1.columns)
    return(df_2)

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [8]:
SEED = [0, 1, 2, 3 ,4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]

In [None]:
def dwise_conv(ch_in, stride=1):
    return ( 
        nn.Sequential(
            #depthwise convolution
            nn.Conv1d(ch_in, ch_in, kernel_size=3, padding=1, stride = stride, groups=ch_in, bias=False),
            nn.BatchNorm1d(ch_in),
            nn.ReLU6(inplace=True),
        )
    )

def conv1x1(ch_in, ch_out):
    return ( 
        nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=1, padding=0, stride=1, bias=False),
            nn.BatchNorm1d(ch_out),
            nn.ReLU6(inplace=True),
        )
    )

def conv3x3(ch_in, ch_out, stride):
    return ( 
        nn.Sequential(
            nn.Conv1d(ch_in, ch_out, kernel_size=3, padding=1, stride=stride, bias=False),
            nn.BatchNorm1d(ch_out),
            nn.ReLU6(inplace=True),
        )
    )
class InvertedBlock(nn.Module):
    def __init__(self, ch_in, ch_out, expand_ratio, stride):
        super(InvertedBlock, self).__init__()
        
        self.stride = stride
        assert stride in [1,2]
        
        hidden_dim = ch_in*expand_ratio
        
        self.use_res_connect = self.stride==1 and ch_in==ch_out
        
        layers = []
        if expand_ratio != 1:
            layers.append(conv1x1(ch_in, hidden_dim))
        layers.extend([
            #dw
            dwise_conv(hidden_dim, stride=stride),
            #pw
            conv1x1(hidden_dim, ch_out)
        ])
        
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x):
        if self.use_res_connect:
            return x + self.layers(x)
        else:
            return self.layers(x)

class MobileNetV2(nn.Module):
    def __init__(self, ch_in=3, n_classes = 1000):
        super(MobileNetV2, self).__init__()
        
        self.configs=[
            #t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1]
        ]
        
        self.stem_conv = conv3x3(ch_in, 32, stride=2)
        
        layers = []
        input_channel = 32
        for t, c, n, s in self.configs:
            for i in range(n):
                stride = s if i==0 else 1
                layers.append(InvertedBlock(ch_in=input_channel, ch_out=c, expand_ratio=t, stride=stride))
                input_channel = c
        
        self.layers = nn.Sequential(*layers)
        
        self.last_conv = conv1x1(input_channel, 1200)
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(1200, n_classes)
        )
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        
    def forward(self, x):
        x = self.stem_conv(x)
        x = self.layers(x)
        x = self.last_conv(x)
        x = self.avg_pool(x).view(-1, 1200)
        x = self.classifier(x)
        
        return x

In [None]:
if __name__=="__main__":
    #model check
    model = MobileNetV2(ch_in=3, n_classes= 1)
    summary(model, (3,224), device='cpu')