In [1]:
import os 
import sys
import pandas as pd
import numpy as np

from sklearn import model_selection
from __future__ import absolute_import

In [2]:
TRAIN_PATH = './dataset_files/hamming_small.txt'  # "/content/drive/Hamming/dataset_files/hamming.txt"

COLUMN_NAMES = ['plainword', 'codeword', 
                'id_error', 'bin_error', 'defective_codeword']
def load_data():
    return pd.read_csv(TRAIN_PATH, sep=';', names=COLUMN_NAMES)

In [3]:
def make_features():
#     data['dec_defective_codeword'] = data['defective_codeword'][:].apply(lambda x: int(x, 2))

    for j in range(len(data['codeword'][0])):
        data['cod_' + str(j)] = data['codeword'][:].apply(lambda x: int(x[j]))

    for j in range(len(data['defective_codeword'][0])):
        data['def_' + str(j)] = data['defective_codeword'][:].apply(lambda x: int(x[j]))

#     for j in range(len(data['bin_error'][0])):
#         data['mask_' + str(j)] = data['bin_error'][:].apply(lambda x: int(x[j]))
        
    for j in range(len(data['plainword'][0])):
        data['pln_' + str(j)] = data['plainword'][:].apply(lambda x: int(x[j]))
#     return data

In [4]:
def split_data(test_size): 
  train_data, test_data, train_labels, test_labels = \
    model_selection.train_test_split(data.loc[:, 'def_0':'def_30'], 
      data.loc[:, 'cod_0':'cod_30'], # 'mask_0':'pln_25' 
      test_size = test_size) 
  return np.array(train_data), np.array(test_data), np.array(train_labels), np.array(test_labels)

In [5]:
%%time
data = load_data()
print(data.shape)
make_features()
print(data.shape)
train_data, test_data, train_labels, test_labels = split_data(test_size=0.3)

(40960, 5)
(40960, 93)
CPU times: user 1.86 s, sys: 34.1 ms, total: 1.89 s
Wall time: 1.9 s


In [6]:
def probs_to_labels(predicted_probs):
    return [1 if x > 0.5 else 0 for x in predicted_probs]
def count_errors(y, y_pred):
  count = 0
  for i in range (0,31):
    labelBit = y[i]
    resultBit = y_pred[i]
    if labelBit != resultBit:
      count += 1
  return count

In [7]:
# errorStats = {'0': 0}
# for i in range(0, y_pred.shape[0]):
#   resultArray = probs_to_labels(y_pred[i])
#   errorNum = count_errors(test_labels[i], resultArray)
#   if errorStats.get(str(errorNum)) == None:
#     errorStats[str(errorNum)] = 0
#   errorStats[str(errorNum)] += 1
# print(errorStats)
def binary_accuracy(y, y_pred):
    return sum(1 if np.array_equal(a, probs_to_labels(b)) else 0 for (a,b) in zip(y, y_pred)) / y.shape[0]


In [8]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys

X = train_data
y = train_labels
X_val = test_data
y_val = test_labels

space = {'choice': hp.choice('num_layers',
                    [ {'layers':'two', },
                    {'layers':'three',
                    'units3': hp.uniform('units3', 64,1024), 
                    'dropout3': hp.uniform('dropout3', .25,.75)}
                    ]),

            'units1': hp.uniform('units1', 64,1024),
            'units2': hp.uniform('units2', 64,1024),

            'dropout1': hp.uniform('dropout1', .25,.75),
            'dropout2': hp.uniform('dropout2',  .25,.75),

            'batch_size' : hp.uniform('batch_size', 28,128),

            'nb_epochs' :  5,
            'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']),
            'activation': 'tanh'
        }



In [9]:
def f_nn(params):   
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation
    from keras.optimizers import Adadelta, Adam, rmsprop
    print ('Params testing: ', params)
    model = Sequential()
    model.add(Dense(output_dim=int(params['units1']), input_dim = X.shape[1])) 
    model.add(Activation(params['activation']))
    model.add(Dropout(params['dropout1']))

    model.add(Dense(output_dim=int(params['units2']), init = "glorot_uniform")) 
    model.add(Activation(params['activation']))
    model.add(Dropout(params['dropout2']))

    if params['choice']['layers']== 'three':
        model.add(Dense(output_dim=int(params['choice']['units3']), init = "glorot_uniform")) 
        model.add(Activation(params['activation']))
        model.add(Dropout(params['choice']['dropout3']))    

    model.add(Dense(31))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=params['optimizer'])

    model.fit(X, y, epochs=params['nb_epochs'], batch_size=int(params['batch_size']), verbose = 0)

    pred_auc =model.predict(X_val)
#     acc = roc_auc_score(y_val, pred_auc)
    acc = binary_accuracy(y_val, pred_auc)
    print('ACCURACY:', acc)
    sys.stdout.flush() 
    return {'loss': -acc, 'status': STATUS_OK}


trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials)
print('best: ')
print(best)


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Params testing:  {'activation': 'tanh', 'batch_size': 40.804463406763844, 'choice': {'layers': 'two'}, 'dropout1': 0.36142866251128597, 'dropout2': 0.4550319947835244, 'nb_epochs': 5, 'optimizer': 'adam', 'units1': 957.0339145567489, 'units2': 486.9762568522933}


  import sys
  # This is added back by InteractiveShellApp.init_path()


ACCURACY: 0.06486002604166667
Params testing:  {'activation': 'tanh', 'batch_size': 126.86705904899611, 'choice': {'layers': 'two'}, 'dropout1': 0.2643986828727314, 'dropout2': 0.6622881654902004, 'nb_epochs': 5, 'optimizer': 'rmsprop', 'units1': 623.4445842014196, 'units2': 130.04549172273707}


  import sys
  # This is added back by InteractiveShellApp.init_path()


ACCURACY: 0.06486002604166667
Params testing:  {'activation': 'tanh', 'batch_size': 41.749506592295276, 'choice': {'layers': 'two'}, 'dropout1': 0.5981348350660398, 'dropout2': 0.5186502555642409, 'nb_epochs': 5, 'optimizer': 'adam', 'units1': 503.7394302027022, 'units2': 946.0932799407137}


  import sys
  # This is added back by InteractiveShellApp.init_path()


KeyboardInterrupt: 