In [69]:
import numpy as np
import pandas as pd
import os
import csv, json
import math
from PIL import Image

from collections import Counter

In [64]:
import utils

In [None]:
WINDOW_SIZE = 1024
STEP_SIZE = 512

In [61]:
DATASET_ATTACK = '../dataset/attack_decoded.csv'
DATASET_NORMAL = '../dataset/normalast.csv'

EXPORT_PATH = '../export'

In [106]:
def get_byte_entropy(byte_array):
    total_count = np.sum(byte_array)

    # P(X)
    P_X = byte_array / total_count

    # 엔트로피 H(X)를 계산
    # Shannon 엔트로피 공식 사용
    P_X_nonzero = P_X[P_X > 0]
    H_X = -np.sum(P_X_nonzero * np.log2(P_X_nonzero))

    P_H_given_X = P_X * H_X
    P_H_X = P_X * P_H_given_X

    return P_H_X

In [None]:
def export_image(filename:str, pixel1d, size=16):
    pixels = [pixel1d[i * size:(i + 1) * size] for i in range(size)]
    
    final_image = Image.fromarray(np.array(pixels, dtype=np.uint8), 'L')
    final_image.save(filename)

In [203]:
SIZE = 16

def export_image(filename, pixel1d:np.ndarray):
    pixel2d = [pixel1d[i * SIZE:(i + 1) * SIZE] for i in range(SIZE)]

    final_image = Image.fromarray(np.array(pixel2d, dtype=np.uint8), 'L')
    final_image.save(filename)

In [192]:
def debug_minmax(target, message):
    print(message)
    print('min:', np.min(target))
    print('max:', np.max(target))

In [201]:
def normalize_minmax(arr:np.ndarray)->np.ndarray:
    min_val = np.min(arr)
    max_val = np.max(arr)
    normalized = (arr - min_val) / (max_val - min_val)

    return normalized

In [212]:
attacks = pd.read_csv(DATASET_ATTACK)
normals = pd.read_csv(DATASET_NORMAL)

attacks_bytes = attacks['bytes']
normals_bytes = attacks['bytes']

In [225]:
def extract_entropy_histogram(dataset, *, export_csvfile, export_directory):
    with open(os.path.join(export_directory, export_csvfile), 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['index', 'path', 'filename', 'bytes'])

        for i, bytes_str in enumerate(dataset['bytes']):
            bs = json.loads(bytes_str)
            counter = Counter(bs)
            bc = [counter[i] for i in range(0, 256)]
            
            entropy = get_byte_entropy(bc)
            normalized = normalize_minmax(entropy)
            normalized = np.log(normalized + 1) / np.log(2) * 255.0
            
            writer.writerow([i, dataset['path'][i], dataset['filename'][i], json.dumps(normalized.tolist())])
            export_image(os.path.join(export_directory, f'dataset_{i}.png'), np.round(normalized, 0).tolist())

In [227]:
EXPORT_ATTACK_PATH = os.path.join(EXPORT_PATH, 'attack')
EXPORT_NORMAL_PATH = os.path.join(EXPORT_PATH, 'normal')
os.makedirs(EXPORT_ATTACK_PATH, exist_ok=True)
os.makedirs(EXPORT_NORMAL_PATH, exist_ok=True)

extract_entropy_histogram(attacks, export_csvfile='attack_entropy.csv', export_directory=EXPORT_ATTACK_PATH)
extract_entropy_histogram(normals, export_csvfile='normal_entropy.csv', export_directory=EXPORT_NORMAL_PATH)


In [217]:

with open(EXPORT_PATH + f'/attack_entropy.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['index', 'path', 'filename', 'bytes'])

    arr = []
    for i, bytes_str in enumerate(attacks['bytes']):
        bs = json.loads(bytes_str)
        counter = Counter(bs)
        bc = [counter[i] for i in range(0, 256)]
        
        entropy = get_byte_entropy(bc)
        normalized = normalize_minmax(entropy)
        normalized = np.log(normalized + 1) / np.log(2) * 255.0
        
        writer.writerow([i, attacks['path'][i], attacks['filename'][i], json.dumps(normalized.tolist())])
        export_image(EXPORT_PATH + f'/normal{i}.png', np.round(normalized, 0).tolist())

In [169]:
np.log(1) / np.log(2)

0.0