In [12]:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt 

def readPacketHist(path):
    files = os.listdir(path)[0:800]
    X_data = []
    for f in files:
        packets = readFromFile(path + f).split(" ")
        pkts = convert_stringArrays_to_floatArray(packets)
        hist = createHistFromPkst(pkts)
        if hist != "error":
            X_data.append(np.array(hist))
    return X_data


def readFromFile(path):
    with open(path, 'r') as content_file:
        content = content_file.read()
        return content


def isfloat(value):
    try:
        float(value)
        return True
    except ValueError:
        return False


def convert_stringArrays_to_floatArray(array):
    intArray = []

    for k in array:
        if isfloat(k):
            intArray.append(float(k))
    return intArray


def buil_db(array, value):
    data_X = []
    data_Y = []
    n = len(array)
    for i in range(0, n):
        data_X.append(array[i])
        data_Y.append(value)

    return data_X, data_Y


def convert_stringArrays_to_intArray(array):
    intArray = []

    for k in array:
        if isfloat(k):
            intArray.append(int(k))
    return intArray


def createHistFromPkst(pkts):
    numberOfPkts = 1

    hist = [0] * 1515
    largerThanMTU = 0
    for i in range(0, len(pkts)):
        if pkts[i] not in [0, 40, 52]:
            if pkts[i] < len(hist):
                hist[int(pkts[i])] += 1
                numberOfPkts += 1
            else:
                largerThanMTU += 1

    for h in range(0, len(hist)):
        hist[h] /= float(numberOfPkts)
    if largerThanMTU / float((largerThanMTU + numberOfPkts)) < 0.05:
        return hist
    else:
        return "error"

def readFalseData():
    nTestDataFromEach = 200
    categories = ['httpMultipleTab/6','httpMultipleTab/7','httpMultipleTab/8','httpMultipleTab/5','httpMultipleTab/4'
                  ,'httpMultipleTab/3','httpMultipleTab/2','voip']

    x_train, y_train, x_test, y_test = [], [], [], []
    for n in range(len(categories)):
        dataset = readPacketHist(path='/home/fatemeh/Bitcoin/nov 13/hist/dataset/' + categories[n] + "/")
        print(categories[n], "  ReadFalse Data", len(dataset))
        data_x, data_y = buil_db(dataset[nTestDataFromEach:], 0)
        x_train.extend(data_x)
        y_train.extend(data_y)
        x_t, y_t = buil_db(dataset[0:nTestDataFromEach], 0)
        x_test.extend(x_t)
        y_test.extend(y_t)
    return (x_train, y_train), (x_test, y_test)


def readTrueData():
    nTestDataFromEach = 200
  
    categories = [ "noisyBitMultipleTab/6","noisyBitMultipleTab/7","noisyBitMultipleTab/8","noisyBitMultipleTab/5",
                 "noisyBitMultipleTab/4",
                 "noisyBitMultipleTab/3","noisyBitMultipleTab/2",'noisyBitcoinOverTor']
    x_train, y_train, x_test, y_test = [], [], [], []

    for n in range(len(categories)):
        dataset = readPacketHist(path='/home/fatemeh/Bitcoin/nov 13/hist/dataset/' + categories[n] + "/")
        print(categories[n], "Read TrueData", len(dataset))
        data_x, data_y = buil_db(dataset[nTestDataFromEach:], 1)
        x_train.extend(data_x)
        y_train.extend(data_y)
        x_t, y_t = buil_db(dataset[0:nTestDataFromEach], 1)
        x_test.extend(x_t)
        y_test.extend(y_t)

    return (x_train, y_train), (x_test, y_test)


def loadLargeDataset():
    (x_train, y_train), (x_test, y_test) = readFalseData()
    (x_train2, y_train2), (x_test2, y_test2) = readTrueData()
    x_train.extend(x_train2)
    y_train.extend(y_train2)
    x_test.extend(x_test2)
    y_test.extend(y_test2)
    x_test, y_test, x_train, y_train = np.array(x_test), np.array(y_test), np.array(x_train), np.array(y_train)
    return (x_train, y_train), (x_test, y_test)



'''
I should realize what to give as training data to my mode.
If I want to recognize bitcoin in presence of noise, should
I give noisyBitcoin as my training data? the problem here is when I increase the noise,
 results improves which makes
the approach fishy. I guess in these cases, I should increase the noise in my http too.
This makes it more sensible. When trying to analyze two tab
open noise + bitcoin, I should compare it with http which has 2 noise open.
'''


'\nI should realize what to give as training data to my mode.\nIf I want to recognize bitcoin in presence of noise, should\nI give noisyBitcoin as my training data? the problem here is when I increase the noise,\n results improves which makes\nthe approach fishy. I guess in these cases, I should increase the noise in my http too.\nThis makes it more sensible. When trying to analyze two tab\nopen noise + bitcoin, I should compare it with http which has 2 noise open.\n'

In [13]:

nTrainingData = 600
nTestData = 100
(x_train, y_train), (x_test, y_test) = loadLargeDataset() 

httpMultipleTab/6   ReadFalse Data 800
httpMultipleTab/7   ReadFalse Data 800
httpMultipleTab/8   ReadFalse Data 800
httpMultipleTab/5   ReadFalse Data 800
httpMultipleTab/4   ReadFalse Data 800
httpMultipleTab/3   ReadFalse Data 800
httpMultipleTab/2   ReadFalse Data 800
voip   ReadFalse Data 160
noisyBitMultipleTab/6 Read TrueData 295
noisyBitMultipleTab/7 Read TrueData 295
noisyBitMultipleTab/8 Read TrueData 295
noisyBitMultipleTab/5 Read TrueData 295
noisyBitMultipleTab/4 Read TrueData 295
noisyBitMultipleTab/3 Read TrueData 295
noisyBitMultipleTab/2 Read TrueData 295
noisyBitcoinOverTor Read TrueData 295


In [9]:

#what to do for a countermeasure:
#we have 800 http data, and rest of it is bitcoin traffic.

In [11]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # 'binary_crossentropy',
              metrics=['accuracy'])  # categorical_accuracy
# model.compile(loss='categorical_crossentropy',
# optimizer='sgd', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=100)
model.evaluate(x_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


[0.09591515600681305, 0.9725]