In [1]:
import keras
import numpy
import scapy.plist
from scapy.all import *
import networkx as nx
import numpy as np
import scipy.sparse as sp
from scapy.layers.inet import TCP, Ether, IP, UDP
from scapy.layers.dns import DNS
import tensorflow as tf
import tensorflow.keras as tfk
from scipy.linalg import fractional_matrix_power
import pandas as pd
from sklearn.model_selection import train_test_split



In [2]:
class SgcLayer(tfk.layers.Layer):
    def __init__(self, outputsNumber):
        super(SgcLayer, self).__init__()
        self.outputs = outputsNumber
        pass

    def build(self, input_shape):
        self.teta = self.add_weight("teta",
                                    shape=[input_shape[-1],self.outputs],
                                    trainable=True,
                                    initializer="random_normal")

    def call(self, inputs):
        return tf.nn.relu(tf.matmul(inputs, self.teta))



In [3]:
class cgnn_model(tf.keras.Model):
    def __init__(self):
        super(cgnn_model, self).__init__()
        self.sgc1 = SgcLayer(516)
        self.sgc2 = SgcLayer(256)
        self.dense = tf.keras.layers.Dense(6)

    def call(self, inputs):
        x = self.sgc1(inputs)
        x = self.sgc2(x)
        x = tf.keras.layers.AveragePooling1D(strides=1, pool_size=(inputs.shape[1]))(x)
        x = self.dense(x)
        x = tf.nn.softmax(x)

        return x

In [4]:
def get_diagonal_degree(A):
    D = []
    for i in range(len(A)):
        temp = []
        for j in range(len(A)):
            if i != j:
                temp.append(0)
            else:
                sum = 0
                for k in range(len(A[i])):
                    sum += A[i][k]
                temp.append(sum)
        D.append(temp)
    return D


def sum_matrices(A, B):
    result = np.array([[0 for x in range(len(A))] for y in range(len(A))])
    for i in range(len(A)):
        # iterate through columns
        for j in range(len(A[0])):
            result[i][j] = A[i][j] + B[i][j]
    return np.array(result)


def getSX(session):
    X = np.array(session)
    A = calculate_A(session)
    I = np.identity(len(A[0]))
    A_t = sum_matrices(A, I)
    D_t = np.array(get_diagonal_degree(A_t))
    S_temp = np.dot(D_t, A_t)
    S = np.dot(S_temp, D_t)
    SX = np.dot(S, X)
    return np.asarray(SX)

def complete(s, l=1500):
    return [i for i in s] + [0] * (l - len(s))

def calculate_A(session):
    X = session
    A = []
    for i in range(len(X)):
        temp = []
        for j in range(len(X)):
            if i == j + 1 or i == j - 1:
                temp.append(1)
            else:
                temp.append(0)
        A.append(temp)
    return np.array(A)



In [5]:
def createGraphFromSession(pcapName):
    file = rdpcap("/Users/yuvalmarmer/Files/Study/CyberAnalysis/Data-For-Project/filtered_raw_dataset_temu2016_first_10_sec/w_hi_chrome/" + pcapName)
    l = []
    for p in file:
        packet_proc = preprocessing(p)
        if packet_proc:
            l.append(complete(packet_proc))

    return getSX(l)

In [6]:
def preprocessing(packet):
    if packet.haslayer(IP):
        packet[IP].src = 0
        packet[IP].dst = 0
        if packet.haslayer(TCP):
            FIN = 0x01
            SYN = 0x02
            ACK = 0x10
            F = packet['TCP'].flags  # this should give you an integer
            # if F & FIN or F & SYN or F & ACK or packet.haslayer(DNS):
            #   return None

            w_eth_header = bytes(packet)[14:]
            return w_eth_header
        elif packet.haslayer(UDP):
            w_eth_header = bytes(packet)[14:]
            zero_bytes = bytearray(12)
            new_packet = bytes(w_eth_header[:8]) + zero_bytes + bytes(w_eth_header[8:])
            return new_packet


In [7]:
def getTrainTestGraphs():
    df = pd.read_csv("/Users/yuvalmarmer/Files/Study/CyberAnalysis/Data-For-Project/filtered_raw_dataset_temu2016_first_10_sec/w_hi_chrome/id.csv")
    train_name, test_name, train_label, test_label = train_test_split(df["fname"],
                                                                      df["label"],
                                                                      test_size=0.98,
                                                                      random_state=42)
    GraphsForTrain = []
    GraphsForTest = []
    labelsForTrain = []
    labelsForTest = []

    for i in train_name:
        g = createGraphFromSession(i)
        GraphsForTrain.append(np.ndarray.tolist(g))

    LableForTrain = []
    for i in train_label:
        LableForTrain.append(int(i))
    
    for i in test_name[:50]:
        g = createGraphFromSession(i)
        GraphsForTest.append(g)

    return GraphsForTrain, list(LableForTrain), GraphsForTest, list(test_label)  # the graphs is SX


In [None]:
GraphsForTrain, LabelsForTrain, GraphsForTest, LabelsForTest = getTrainTestGraphs()
m = cgnn_model()
m.compile(loss='categorical_crossentropy', optimizer='adam')

max_n = max([len(i) for i in GraphsForTrain])
for g in GraphsForTrain:
    zeros = [0]*1500
    for i in range(max_n - len(g)):
        g.append(zeros)

graph_to_train = tf.convert_to_tensor(GraphsForTrain)
graph_to_train = tf.cast(graph_to_train, tf.float32)

dict_label = {}
counter = 0
for item in set(LabelsForTrain):
    dict_label[item] = counter
    counter+=1

list_of_lables = []
for item in LabelsForTrain:
    list_temp = [0]*6
    list_temp[dict_label[item]] = 1
    list_of_lables.append(list_temp)

label_graph_to_train = tf.convert_to_tensor(list_of_lables)

  X = np.array(session)


In [None]:
m.fit(graph_to_train, np.reshape(label_graph_to_train, (32,1,6)), epochs=30)

In [None]:
list_of_lables_test = []
for item in LabelsForTest[:50]:
    list_temp = [0] * 6
    list_temp[dict_label[item]] = 1
    list_of_lables_test.append(list_temp)
for g in GraphsForTest[:50]:
    zeros = [0] * 1500
    for i in range(max_n - len(g)):
        g.append(zeros)
graph_to_test = tf.convert_to_tensor(GraphsForTrain[:50])
graph_to_test = tf.cast(graph_to_test, tf.float32)

m.evaluate(graph_to_test, list_of_lables_test[:50])
# df = pd.read_csv("./data/w_hi_chrome/id.csv")
# print(len(df["label"].unique()))
