In [1]:
import numpy as np
import tensorflow as tf
import random
import keras

import types
import pandas as pd

from sklearn.model_selection import train_test_split

from keras import Sequential
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dense
from keras.layers import Bidirectional
from keras import callbacks, optimizers
from numpy import concatenate, arange

from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

Using TensorFlow backend.


READ AND PREPARE DATA

In [2]:
#Read data
#First row must have the title of the vector
dataset = pd.read_excel("train.xlsx")
dataset.head()

Unnamed: 0,MEMORY ADRESS
0,0xbfb22b18
1,0xbfb22b14
2,0xbfb22b10
3,0xbfb22b0c
4,0xbfb22b18


In [3]:
#Convert dataset to numpy
v = dataset.to_numpy()

#Reshape the input vector to a matrix with 400 rows and 1000 columns(address)
matrix = v.reshape(400,1000)

#convert the address from hex to decimal
for index, item in enumerate(v):
    for index_y, item_y in enumerate(item):
        v[index][index_y] = int(str(item_y),16)

#X consists of the first 999 address 
X = matrix[:,0:-1]

#y contains the 1000 address which must be predicted
y = matrix[:,-1:]

#Split data to train and validation set
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.10, random_state = 0)

Normalize the Data before the training stage 

In [4]:
# Robust Scaler for X. This method is selected since it is give better result when training.
scaler_for_X = RobustScaler()
scaler_for_X.fit(X)
X_train = scaler_for_X.transform(X_train)
X_val = scaler_for_X.transform(X_val)

#Robust Scaler for y
scaler_for_y = RobustScaler()
scaler_for_y.fit(y)

y_train = scaler_for_y.transform(y_train)
y_val = scaler_for_y.transform(y_val)

# Reshape the X_train and X_val
# Change the number of features for the LSTM (input for the LSTM units)
number_of_features = X.shape[1]
number_of_timesteps = 1

X_train = X_train.reshape(X_train.shape[0], number_of_timesteps, X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], number_of_timesteps, X_val.shape[1])
# The shape now is 3d, (#samples, #timesteps, #features/columns)

number_of_predictions = 1

print(X_train)

[[[-1.00000076e+00 -1.00002645e+00 -1.00001416e+00 ...  8.57266950e-08
    1.96130641e-07  8.11803946e-08]]

 [[-1.45474565e-07 -1.69506303e-07 -1.54567068e-07 ...  1.16900039e-07
   -1.00001109e+00 -1.00001726e+00]]

 [[-1.00010506e+00 -1.00011958e+00 -1.00010728e+00 ... -4.40323479e-07
   -3.29915118e-07 -4.54610210e-09]]

 ...

 [[ 1.02611524e-07  8.63767753e-08 -1.00000026e+00 ... -2.98744543e-08
    6.23461639e-08 -5.13060094e-08]]

 [[ 1.03910404e-07 -1.00002357e+00 -1.00001128e+00 ...  1.29888932e-07
    2.26004844e-07  1.12353666e-07]]

 [[ 6.49440023e-09 -1.94834831e-09  1.42877122e-08 ...  1.06508924e-07
   -9.99993070e-01  1.00663689e-07]]]


In [5]:
def bidi_lstm(units, nepochs, dropout):
    model = Sequential()
    input_shape=(None, number_of_features)
    model.add(Bidirectional(LSTM(units[0], activation='relu', dropout=dropout[0], bias_initializer= 'glorot_uniform'),input_shape=input_shape)) 
    model.add(Dense(number_of_predictions)) 
    
    # Cost Function & Optimization
    optimizer= optimizers.RMSprop(lr=0.001)
    model.compile(loss='mae', optimizer=optimizer)
    
    # Early Stopping to avoid overfitting
    early_stopping = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0, patience=2)

    # Fit network with a sample of the dataset and validate using the the validation set
    history = model.fit(X_train, y_train, epochs = 100, batch_size = 1, validation_data = [X_val, y_val], verbose=1, shuffle=True, callbacks = [early_stopping])
    
    score = model.evaluate(X_val, y_val, batch_size=1)
    
    return(score, model)

In [6]:
def random_search(units, lr, nepochs, dropout, niter, units_l, lr_l, nepochs_l, dropout_l):
    
    param_grid = {
    'units': int(),
    'nepochs': int(),
    'dropout': float(),
    }
     
    unitsc = units
    lrc = lr
    nepochsc = nepochs
    dropoutc = dropout
    
    for i in range(niter):
        units = random.sample(range(unitsc,units_l), 1)
        nepochs = random.sample(range(nepochsc,nepochs_l), 1)
        dropout = random.sample(list(arange(dropoutc,dropout_l,0.05)), 1)

        results = bidi_lstm(units = units, nepochs = nepochs, dropout = dropout)
        results_loss = results[0]
        results_model = results[1]
        param_grid[i,] = {"units" : units, "nepochs" : nepochs, "dropout" : dropout}

        if (i == 0) :
            loss_rs = results_loss
            model_rs = results_model
        else :
            if (results_loss < loss_rs) :
                loss_rs = results_loss
                model_rs = results_model
                
    results_model.save('model.h5')
    return(model_rs, param_grid)
    
random_search(units=1, lr=0.001, nepochs=50, dropout=0.2, niter=100, units_l=50, lr_l=0.2, nepochs_l=100, dropout_l=0.5)

Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 

Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Train o

Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 

Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Tr

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/1

Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epo

Epoch 7/100
Train on 360 samples, validate on 40 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


(<keras.engine.sequential.Sequential at 0x23671d6da48>,
 {'units': 0,
  'nepochs': 0,
  'dropout': 0.0,
  (0,): {'units': [30], 'nepochs': [87], 'dropout': [0.3]},
  (1,): {'units': [42], 'nepochs': [75], 'dropout': [0.35]},
  (2,): {'units': [1], 'nepochs': [69], 'dropout': [0.3]},
  (3,): {'units': [46], 'nepochs': [85], 'dropout': [0.3]},
  (4,): {'units': [6], 'nepochs': [67], 'dropout': [0.39999999999999997]},
  (5,): {'units': [7], 'nepochs': [96], 'dropout': [0.25]},
  (6,): {'units': [27], 'nepochs': [95], 'dropout': [0.25]},
  (7,): {'units': [36], 'nepochs': [59], 'dropout': [0.44999999999999996]},
  (8,): {'units': [12], 'nepochs': [75], 'dropout': [0.3]},
  (9,): {'units': [31], 'nepochs': [53], 'dropout': [0.35]},
  (10,): {'units': [45], 'nepochs': [80], 'dropout': [0.44999999999999996]},
  (11,): {'units': [19], 'nepochs': [96], 'dropout': [0.2]},
  (12,): {'units': [20], 'nepochs': [82], 'dropout': [0.3]},
  (13,): {'units': [7], 'nepochs': [57], 'dropout': [0.2]},
  (1

ONLY FOR TESTING

In [17]:
df_data_0 = pd.read_excel("memrefs_testing_Sept.xlsx")

df_data_0.head()

Unnamed: 0,Memory
0,0xbfb229b4
1,0xbfb229b8
2,0xbfb229bc
3,0xbfb22a74
4,0x8249704


In [18]:
y_target = {0}
v_test = df_data_0.to_numpy()

#Reshape vector to matrix with 50 rows and 999 cloumns since the 1000 element not given
matrix_test = v_test.reshape(50,999)

for index_test, item_test in enumerate(matrix_test):
    for index_y_test, item_y_test in enumerate(item_test):
        matrix_test[index_test][index_y_test] = int(str(item_y_test),16)


X_test = matrix_test[:,:]

In [None]:
#The Robust Scaler method is selected since seems to be the best normalizer technique
scaler_for_X = RobustScaler()
scaler_for_X.fit(X)
X_test = scaler_for_X.transform(X_test)

#The Robust Scaler method is selected since seems to be the best normalizer technique
scaler_y = RobustScaler() 
scaler_y.fit(y)

# Reshape the X_test
# Change the number of features for the LSTM (input for the LSTM units)
number_of_features = X_test.shape[1]
number_of_timesteps = 1

X_test = X_test.reshape(X_test.shape[0], number_of_timesteps, X_test.shape[1])
# The shape now is 3d, (#samples, #timesteps, #features/columns)

number_of_predictions = 1

reconstructed_model = keras.models.load_model('model.h5')

predictions = reconstructed_model.predict(X_test)

predictions = scaler_y.inverse_transform(predictions)

#Print the predicted address in decimal format
print(predictions)