In [1]:
import pickle
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
from keras.utils import to_categorical
from numpy import dstack

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# load models from file
def load_all_models(n_models, model_dir):
    all_models = list()
    for i in range(n_models):
        # define filename for this ensemble
        filename = model_dir + '/model_' + str(i + 1) + '.nn'
        # load model from file
        model = load_model(filename)
        # add to list of members
        all_models.append(model)
        print('loaded %s' % filename)
    return all_models

In [3]:
scaler = MinMaxScaler()
def get_x_data(fields):
    x_train = train[fields]
    x_train = scaler.fit_transform(x_train)
    x_valid = valid[fields]
    x_valid = scaler.fit_transform(x_valid)
    x_test = test[fields]
    x_test = scaler.fit_transform(x_test)
    return (x_train, x_valid, x_test)

In [19]:
# create stacked model input dataset as outputs from the ensemble
# n: 0=train 1=valid 2=test
def stacked_dataset(members, n):
    stackX = None
    for i in range(n_members):
        x_data = get_x_data(target_fields[i])
        inputX = x_data[n]
        # make prediction
        yhat = members[i].predict(inputX, verbose=1)
        # stack predictions into [rows, members, probabilities]
        if stackX is None:
            stackX = yhat
        else:
            stackX = dstack((stackX, yhat))
    # flatten predictions to [rows, members x probabilities]
    stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
    stackX[np.isnan(stackX)] = 0
    return stackX

In [6]:
# fit a model based on the outputs from the ensemble members
def fit_stacked_model(members, inputy):
    # create dataset using ensemble
    stackedX = stacked_dataset(members, 0)
    # fit standalone model
    model = LogisticRegression()
    model.fit(stackedX, inputy)
    return model

In [7]:
# make a prediction with the stacked model
def stacked_prediction(members, model):
    # create dataset using ensemble
    stackedX = stacked_dataset(members, 2)
    # make a prediction
    yhat = model.predict(stackedX)
    return yhat

In [8]:
# load all models
n_members = 11
members = load_all_models(n_members, 'nn_ensemble')
print('Loaded %d models' % len(members))

loaded nn_ensemble/model_1.nn
loaded nn_ensemble/model_2.nn
loaded nn_ensemble/model_3.nn
loaded nn_ensemble/model_4.nn
loaded nn_ensemble/model_5.nn
loaded nn_ensemble/model_6.nn
loaded nn_ensemble/model_7.nn
loaded nn_ensemble/model_8.nn
loaded nn_ensemble/model_9.nn
loaded nn_ensemble/model_10.nn
loaded nn_ensemble/model_11.nn
Loaded 11 models


In [9]:
train, valid, test = pickle.load(open('dataset.pickle', 'rb'))
train = train.fillna(0)
valid = valid.fillna(0)
test = test.fillna(0)

y_train = np.asarray(train.iloc[:,-8:])
y_valid = valid.iloc[:,-8:]
y_test = np.asarray(test.iloc[:,-8:])


In [11]:
# evaluate standalone models on test dataset
target_fields = ['raw_acc','proc_gyro','raw_magnet','watch_acceleration','watch_heading', 'location', 'location_quick_features', 'audio_naive', 'audio_properties', 'discrete', 'lf_measurements']
for i in range(n_members):
    x_data = get_x_data(target_fields[i])
    x_test = x_data[2]
    _, acc = members[i].evaluate(x_test, y_test, verbose=0)
    print('Model Accuracy: %.3f' % acc)


Model Accuracy: 0.361
Model Accuracy: 0.414


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308
Model Accuracy: 0.509


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308
Model Accuracy: 0.472


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308
Model Accuracy: 0.574


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Model Accuracy: 0.308


In [21]:
# fit stacked model using the ensemble
y_train_decode = np.argmax(y_train, axis=1)
model = fit_stacked_model(members, y_train_decode)

  1024/297773 [..............................] - ETA: 16s 

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  2080/297773 [..............................] - ETA: 15s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  2144/297773 [..............................] - ETA: 15s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  3232/297773 [..............................] - ETA: 14s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  3264/297773 [..............................] - ETA: 14s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  2112/297773 [..............................] - ETA: 16s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)






In [22]:
# evaluate model on test set
y_test_decode = np.argmax(y_test, axis=1)
yhat = stacked_prediction(members, model)
acc = accuracy_score(y_test_decode, yhat)
print('Stacked Test Accuracy: %.3f' % acc)

 1024/45385 [..............................] - ETA: 2s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 3232/45385 [=>............................] - ETA: 2s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 3296/45385 [=>............................] - ETA: 2s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 3232/45385 [=>............................] - ETA: 2s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 3360/45385 [=>............................] - ETA: 1s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 3264/45385 [=>............................] - ETA: 2s

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Stacked Test Accuracy: 0.448


In [16]:
np.isnan(valid).any().sum()

0