In [215]:
import pandas as pd
import scipy
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.tree import DecisionTreeRegressor

from des import des_enc, random_bytes, group_bytes
from saes import SimplifiedAES
import math

In [216]:
import struct

def convert_to_bytes(list):
    return struct.pack('B' * len(list), *list)

NUM_BYTES_KEY = 2
NUM_BYTES_MSG = 2

def generate_samples(msg, N):
    inputs = []
    keys = set()
    labels = []
    
    for _ in range(N):
        key = random_bytes(NUM_BYTES_KEY)
        key_num = int.from_bytes(key)
        while key_num in keys:
            key = random_bytes(NUM_BYTES_KEY)
            key_num = int.from_bytes(key)
        
        cipher = SimplifiedAES(key_num).encrypt(msg)
        #cipher_num = int.from_bytes(cipher)
        
        inputs.append([cipher])
        labels.append(key_num)
        keys.add(key_num)
    
    return np.array(inputs), np.array(labels)

def estimate_key(cipher, num_byte, inputs, keys):
    model = DecisionTreeRegressor(min_samples_leaf=20)
    model.fit(inputs, keys)
    
    return model.predict([[cipher]])[0]

N_ROUNDS = 1
N_SAMPLES = 1000

def round(msg, cipher, key, num_byte, N_SAMPLES):
    inputs, labels = generate_samples(msg, N_SAMPLES)
    key_bar = math.floor(estimate_key(cipher, num_byte, inputs, labels))
    return key - key_bar

In [217]:
df_train = pd.read_csv('SAES_TRAIN.csv')
df_test = pd.read_csv('SAES_TEST.csv')

In [218]:
def embed(df):
    key = df['Key'].to_numpy()
    msg = df['Message'].to_numpy()
    cipher = df['Ciphertext'].to_numpy()
    
    inputs = np.array([np.array([*x]) for x in zip(msg, cipher)])
    inputs = np.stack(inputs, axis=0)    
    return inputs, key

In [219]:
inputs, labels = embed(df_train)
inputs

array([[19473, 35842],
       [34015, 30832],
       [21625, 29797],
       ...,
       [38848, 15454],
       [ 5530, 17859],
       [24578,  2369]], dtype=int64)

In [220]:
model = DecisionTreeRegressor(min_samples_leaf=50)
model.fit(inputs, labels)

In [221]:
df_train.head()

Unnamed: 0,Message,Ciphertext,Key
0,19473,35842,37502
1,34015,30832,58143
2,21625,29797,25874
3,45437,36127,65047
4,34282,7667,16235


In [222]:
model.predict(inputs[:5])

array([32501.07291667, 33144.83870968, 38266.70422535, 39718.58241758,
       22841.84615385])

In [223]:
inputs_test, labels_test = embed(df_test)

In [224]:
df_test.head()

Unnamed: 0,Message,Ciphertext,Key
0,40367,3434,59658
1,59940,43735,24553
2,57106,40470,3529
3,21998,1481,25023
4,58570,44633,31964


In [225]:
model.predict(inputs_test[:5])

array([32619.68571429, 30614.29487179, 28913.21311475, 36901.85185185,
       30614.29487179])

In [226]:
predictions_test = model.predict(inputs_test)
error = np.abs(predictions_test - labels_test)
print('Average error: ', np.mean(np.array([x for x in error if x > 0])))
print('Max error: ', np.max(error))

Average error:  16609.593222398493
Max error:  43224.294117647056


In [227]:
error = []
N_SAMPLES = 50000

for input, label in zip(inputs_test[:10], labels_test[:10]):
    d = round(input[0], input[1], label, 0, N_SAMPLES)
    error.append(abs(d))
    print('count')

error = np.array(error)
print('Average error: ', np.mean(error))
print('Max error: ', np.max(error))
print('Min error: ', np.min(error))

count
count
count
count
count
count
count
count
count
count
Average error:  13502.7
Max error:  30295
Min error:  741
