In [10]:
import pandas as pd
import numpy as np
import struct
import codecs
import matplotlib.pyplot as plt
import sys #only needed to determine Python version number
import matplotlib #only needed to determine Matplotlib version number
from scipy import signal
import h5py
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.models import load_model
from sklearn import svm
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.externals import joblib
import xgboost as xgb
import gcforest.gcforest
from gcforest.utils.config_utils import load_json
from phase_utils import print_cm
from phase_features_loader import PhaseFeaturesLoader
from imblearn.metrics import classification_report_imbalanced
from imblearn.combine import SMOTETomek, SMOTEENN
from imblearn.under_sampling import EditedNearestNeighbours
import autosklearn.classification
from collections import Counter
    
# Enable inline plotting
%matplotlib inline

In [46]:
max_length = 100
FEATURES_TINY = "data/phase/ml_features_tiny.csv"
FEATURES = "data/phase/ml_features.csv"
dataset_train = "data/phase/ml_features_train.csv"
dataset_test = "data/phase/ml_features_test.csv"
STA = "URZ"
phases = ["regP", "regS", "tele", "N"]
channels = ["BHE", "BHZ", "BHN"]
validation_split = 0.1
seed = 10
file_stack1 = "results/phase_stack_1.hdf5"

# parameters for NN:
batch_size = 1024
epochs = 500
dropout = 0.25
layers = [64, 64]
phase_length = {"URZ": {"regP": 6840, "regS": 6840, "tele": 6840, "N": 6840*10}}
model_file_path_nn = "results/phase_nn.hdf5"
verbose = 0
cross_validation = False

In [47]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)

Python version 3.5.2 (default, Nov 23 2017, 16:37:01) 
[GCC 5.4.0 20160609]
Pandas version 0.22.0
Matplotlib version 2.1.2


In [13]:
def print_report(model, x_test, y_test):
    print("Best parameters set found on development set:")
    print()
    print(model.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = model.cv_results_['mean_test_score']
    stds = model.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, model.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()
    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, model.predict(x_test)
    print(classification_report(y_true, y_pred))
    print()

In [14]:
def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    column_width = max([len(x) for x in labels] + [5])  # 5 is value length
    empty_cell = " " * column_width
    # Print header
    print("    " + empty_cell, end=" ")
    for label in labels:
        print("%{0}s".format(column_width) % label, end=" ")
    print()
    # Print rows
    for i, label1 in enumerate(labels):
        print("    %{0}s".format(column_width) % label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}.1f".format(column_width) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print(cell, end=" ")
        print()

In [15]:
class Stacking():
    def __init__(self, filename, classifiers, n_classes, data_length):
        self.h5f = h5py.File(filename, "w")
        try:
            dset_classifier = self.h5f['/classifier']
        except KeyError:
            dset_classifier = self.h5f.create_dataset("classifier", data = [c.encode() for c in classifiers])
        probability = np.zeros(shape=(data_length, len(classifiers), n_classes))
        try:
            dset_probability = self.h5f['/probability']
        except KeyError:
            dset_probability = self.h5f.create_dataset("/probability", data = probability)
        try:
            dset_y = self.h5f['/y']
        except KeyError:
            dset_y = self.h5f.create_dataset("/y", data = np.zeros(data_length, dtype="int16"))
    
    def save_prob(self, offset, classifier_index, probability):
        dset_probability = self.h5f['/probability']
        for i, prob in enumerate(probability):
            dset_probability[offset+i, classifier_index, :] = prob
            
    def save_y(self, offset, y):
        dset_y = self.h5f['/y']
        dset_y[offset:offset+len(y)] = y
    
    def close(self):
        self.h5f.close()

In [16]:
def sparsify(y, n_classes=4):
        'Returns labels in binary NumPy array'
        return np.array([[1 if y[i] == j else 0 for j in range(n_classes)]
                         for i in range(len(y))])

In [17]:
def resample(x, y, sampling_type=None):
    if sampling_type == "smoteenn":
        sme = SMOTEENN(random_state=1)
        x_out, y_out = sme.fit_sample(x, y)
    else:
        if sampling_type == "enn":
            enn = EditedNearestNeighbours(random_state=1)
            x_out, y_out = enn.fit_sample(x, y)
        else:
            if sampling_type is None or sampling_type == "nosampling":
                x_out, y_out = x, y
        
    print("Bevor reduction:", sorted(Counter(y).items()))
    print("After reduction:", sorted(Counter(y_out).items()))
    return x_out, y_out
    

In [18]:
from abc import ABCMeta, abstractmethod

ABC = ABCMeta('ABC', (object,), {})

class Classifier(ABC):
    __instances__ = dict()

    def __init__(self):
        Classifier.__instances__[self.__class__.__name__] = self
    
    def class_name(self):
        return self.__class__.__name__

    @staticmethod
    def create_model():
        pass
        
    @abstractmethod
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        pass

    @abstractmethod
    def predict(self, x_test, y_test=None, sampling_type=None):
        pass

    @abstractmethod
    def load(self, filename):
        pass

    @abstractmethod
    def save(self, filename):
        pass
    
    @staticmethod
    def resample(x, y, sampling_type=None):        
        if sampling_type == "smoteenn":
            sme = SMOTEENN(random_state=1)
            x_out, y_out = sme.fit_sample(x, y)
        else:
            if sampling_type == "enn":
                enn = EditedNearestNeighbours(random_state=1)
                x_out, y_out = enn.fit_sample(x, y)
            else:
                if sampling_type is None or sampling_type == "nosampling":
                    x_out, y_out = x, y
        
        print("Bevor reduction:", sorted(Counter(y).items()))
        print("After reduction:", sorted(Counter(y_out).items()))
        return x_out, y_out

In [55]:
class CNN(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
       
    @staticmethod
    def create_model(layers, dropout=0.1, n_features=16, input_height=4, input_width=4):
        model = Sequential()
        model.add(Conv2D(64, (3, 3), input_shape=(1, input_height, input_width), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
        model.add(Dropout(0.2))
        model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
        model.add(Dropout(0.3))
        model.add(Conv2D(256, (3, 3), activation='relu', padding='same', kernel_constraint=maxnorm(3)))
        model.add(Flatten())
        model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
        model.add(Dropout(0.5))
        model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
        model.add(Dropout(0.5))
        model.add(Dense(4, activation='softmax'))
        
        #lrate = 0.01
        #decay = lrate/epochs
        #sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model
    
    def fit(self, x_train, y_train, layers=[32, 64], verbose=0):
        input_height = x_train.shape[1]
        input_width = x_train.shape[2]
        x_train = np.expand_dims(x_train, axis=1)
        y_train = sparsify(y_train)
        #y_train = np.expand_dims(y_train, axis=1)   
        tensorboard = TensorBoard(log_dir='graph', histogram_freq=0, write_graph=True, write_images=True)
        checkpoint = ModelCheckpoint(model_file_path_nn, monitor='acc', verbose=verbose,
                                     save_best_only=True, mode='max')
        self.model = CNN.create_model(layers=layers, dropout=dropout, input_height=5, 
                                     input_height=input_height, input_width=input_width)            
        history = self.model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs, verbose=verbose,
                  validation_split=0.1, callbacks=[checkpoint, tensorboard])

        print("Max of acc: {}, val_acc: {}".
              format(max(history.history["acc"]), max(history.history["val_acc"])))
        print("Min of loss: {}, val_loss: {}".
              format(min(history.history["loss"]), min(history.history["val_loss"])))
    
    def predict(self, x_test, y_test=None):
        x_test = np.expand_dims(x_test, axis=1) 
        if y_test is not None:
            y_test = sparsify(y_test)
            #y_test = np.expand_dims(y_test, axis=1)
            score = self.model.evaluate(x_test, y_test, verbose=0)
            print("Accuracy: {}".format(score[1]*100))
        probability = self.model.predict(x_test, verbose=0)
        return probability  
        
    def load(self, model_file_path):
        self.model = load_model(model_file_path)
        
    def save(self, model_file_path):        
        # save model to file
        self.model.save(model_file_path)

SyntaxError: keyword argument repeated (<ipython-input-55-9b7b2749714a>, line 37)

In [52]:
class NN(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
        self.layers = [32, 32]
       
    @staticmethod
    def create_model(layers, dropout=0.1, n_features=16):
        # create model
        model = Sequential()
        model.add(Dense(layers[0], input_shape=(1, n_features), activation='relu'))
        model.add(Dropout(dropout))
        for units in layers[1:]:
            model.add(Dense(units, activation='relu'))
            model.add(Dropout(dropout))
        model.add(Dense(4, activation='softmax'))

        # Compile model
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model
    
    def set_layers(self, layers):
        self.layers = layers
    
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        x_train, y_train = Classifier.resample(x_train, y_train, sampling_type)
        x_train = np.expand_dims(x_train, axis=1)
        y_train = sparsify(y_train)
        y_train = np.expand_dims(y_train, axis=1)   
        tensorboard = TensorBoard(log_dir='graph', histogram_freq=0, write_graph=True, write_images=True)
        checkpoint = ModelCheckpoint(model_file_path_nn, monitor='acc', verbose=verbose,
                                     save_best_only=True, mode='max')
        self.model = NN.create_model(layers=self.layers, dropout=dropout)            
        history = self.model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs, verbose=verbose,
                  validation_split=0.1, callbacks=[checkpoint, tensorboard])

        print("Max of acc: {}, val_acc: {}".
              format(max(history.history["acc"]), max(history.history["val_acc"])))
        print("Min of loss: {}, val_loss: {}".
              format(min(history.history["loss"]), min(history.history["val_loss"])))
    
    def predict(self, x_test, y_test=None, sampling_type=None):
        x_test, y_test = Classifier.resample(x_test, y_test, sampling_type)
        x_test = np.expand_dims(x_test, axis=1) 
        if y_test is not None:
            y_test = sparsify(y_test)
            y_test = np.expand_dims(y_test, axis=1)
            score = self.model.evaluate(x_test, y_test, verbose=0)
            print("Accuracy: {}".format(score[1]*100))
        probability = self.model.predict(x_test, verbose=0)
        return probability  
        
    def load(self, model_file_path):
        self.model = load_model(model_file_path)
        self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
    def save(self, model_file_path):        
        # save model to file
        self.model.save(model_file_path)
        

In [21]:
class SVM(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
     
    @staticmethod
    def create_model():
        params_grid = [
            #{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
            #{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
            {'C': [1000], 'gamma': [0.001], 'kernel': ['rbf'], 'probability': [True]}
        ]

        model = GridSearchCV(svm.SVC(), params_grid, cv=5, scoring='accuracy', n_jobs=-1)
        return model
    
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        x_train, y_train = Classifier.resample(x_train, y_train, sampling_type)
        self.model = SVM.create_model()
        print(self.model)
        self.model.fit(x_train, y_train)

    def predict(self, x_test, y_test=None, sampling_type=None):
        x_test, y_test = Classifier.resample(x_test, y_test, sampling_type)
        probability = self.model.predict_proba(x_test)
        if y_test is not None:
            y_pred = self.model.predict(x_test)
            prediction = [np.round(value) for value in y_pred]
            accuracy = accuracy_score(y_test, prediction)
            print("Accuracy: %.2f%%" % (accuracy * 100.0))
        return probability
        
    def load(self, model_file_path):
        self.model = joblib.load(model_file_path)
        
    def save(self, model_file_path):     
        # save model to file
        joblib.dump(self.model, model_file_path)
        

In [22]:
class XGBoost(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
    
    @staticmethod
    def create_model():
        seed = 10
        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
        # set xgboost params
        params_grid = {
            'max_depth': [5, 6, 7, 8],
            'n_estimators': [i for i in range(88, 92, 1)],
            'learning_rate': np.linspace(0.1, 1, 20),
            #'max_depth': [6],
            #'n_estimators': [i for i in range(90, 91, 1)],
            #'learning_rate': np.linspace(0.1, 1, 2),
        }

        params_fixed = {
            'objective': 'multi:softprob',
            'silent': 1,
            'n_jobs': -1,
            'verbose_eval': True
        }

        num_round = 30  # the number of training iterations

        model = GridSearchCV(
            estimator=xgb.XGBClassifier(**params_fixed, seed=seed),
            param_grid=params_grid,
            cv=cv,
            scoring='accuracy'
        )
        return model
    
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        x_train, y_train = Classifier.resample(x_train, y_train, sampling_type)
        self.model = XGBoost.create_model()
        print(self.model)
        self.model.fit(x_train, y_train)
        
    def predict(self, x_test, y_test=None, sampling_type=None):
        x_test, y_test = Classifier.resample(x_test, y_test, sampling_type)
        probability = self.model.predict_proba(x_test)
        print(y_test.shape)
        y_list = np.zeros(4, dtype=int)
        if y_test is not None:
            for i in range(10):
                print(y_test[len(y_test)-i-1], probability[len(y_test)-i-1])
                print(x_test[len(y_test)-i-1])
            for i in range(len(y_test)):
                y_list[y_test[i]] += 1
            print(y_list)
            y_pred = self.model.predict(x_test)
            prediction = [np.round(value) for value in y_pred]
            # evaluate predictions
            accuracy = accuracy_score(y_test, prediction)
            print("Accuracy: {}".format(accuracy * 100.0))
        return probability
        
    def load(self, model_file_path):
        self.model = joblib.load(model_file_path)
        
    def save(self, model_file_path):     
        # save model to file
        joblib.dump(self.model, model_file_path)

In [23]:
class GCForest(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
     
    @staticmethod
    def create_model():
        config = {
              "cascade": {
                  "random_state": 0,
                  "max_layers": 100,
                  "early_stopping_rounds": 3,
                  "n_classes": 4,
                  "estimators": [
                      {"n_folds":5,"type":"RandomForestClassifier","n_estimators":10,"max_depth":None,"n_jobs":-1},
                      {"n_folds":5,"type":"XGBClassifier","n_estimators":10,"max_depth":5,
                           "objective":"multi:softprob", "silent":True, "nthread":-1, 
                           "learning_rate":0.1},
                      {"n_folds":5,"type":"ExtraTreesClassifier","n_estimators":10,"max_depth":None,"n_jobs":-1},
                      {"n_folds":5,"type":"LogisticRegression"}
                  ]
              }
            }

        model = gcforest.gcforest.GCForest(config)
        return model
    
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        x_train, y_train = Classifier.resample(x_train, y_train, sampling_type)
        self.model = GCForest.create_model()
        print(self.model)
        self.model.fit_transform(x_train, y_train)
        
    def predict(self, x_test, y_test=None, sampling_type=None):
        x_test, y_test = Classifier.resample(x_test, y_test, sampling_type)
        probability = self.model.predict_proba(x_test)
        if y_test is not None:
            y_pred = self.model.predict(x_test)
            prediction = [np.round(value) for value in y_pred]
            accuracy = accuracy_score(y_test, prediction)
            print("Accuracy: %.2f%%" % (accuracy * 100.0))
        return probability
        
    def load(self, model_file_path):
        self.model = joblib.load(model_file_path)
        
    def save(self, model_file_path):     
        # save model to file
        joblib.dump(self.model, model_file_path)

In [24]:
class AutoML(Classifier):
    def __init__(self):
        super().__init__()
        self.model = None
     
    @staticmethod
    def create_model():
        model = autosklearn.classification.AutoSklearnClassifier()
        return model
    
    def fit(self, x_train, y_train, verbose=0, sampling_type=None):
        x_train, y_train = Classifier.resample(x_train, y_train, sampling_type)
        self.model = AutoML.create_model()
        print(self.model)
        self.model.fit(x_train, y_train)
        
    def predict(self, x_test, y_test=None, sampling_type=None):
        x_test, y_test = Classifier.resample(x_test, y_test, sampling_type)
        probability = self.model.predict_proba(x_test)
        if y_test is not None:
            y_pred = self.model.predict(x_test)
            prediction = [np.round(value) for value in y_pred]
            accuracy = accuracy_score(y_test, prediction)
            print("Accuracy: %.2f%%" % (accuracy * 100.0))
        return probability
        
    def load(self, model_file_path):
        self.model = joblib.load(model_file_path)
        
    def save(self, model_file_path):     
        # save model to file
        joblib.dump(self.model, model_file_path)

In [25]:
aml=AutoML()
print(aml.class_name())

AutoML


In [26]:
# load train dataset
pd_train = PhaseFeaturesLoader(filename=dataset_train, validation_split=validation_split,
                         phase_length=phase_length, batch_size=batch_size)

x_train, y_train = pd_train.get_dataset(expand_dim=False, y_onehot=False)

length regP:6840
length regS:6840
length tele:6840
length N:68400


In [27]:
# load test dataset
pd_test = PhaseFeaturesLoader(filename=dataset_test, phase_length=phase_length, batch_size=batch_size)
x_test, y_test = pd_test.get_dataset(expand_dim=False, y_onehot=False)
print(pd_test.get_phase_index(100089180))

length regP:2280
length regS:2280
length tele:2280
length N:6840
0


In [28]:
print(x_train.shape)
print(x_test.shape)
print(x_train[0:5])
print(x_test[0:5])

(88920, 16)
(13680, 16)
[[ 2.24905878e-01  8.39121044e-01  2.51626980e-01 -4.57737841e-01
  -2.44708634e-01 -4.58808887e-01 -1.60656316e-01 -1.12517864e-01
  -2.37847791e-01 -8.44534544e-01  3.33333330e-01  7.24465430e-01
   8.37405500e-01  1.00000000e-01  1.00000000e-01  2.20000000e+01]
 [ 5.98809844e-02  8.79091967e-01  1.85321354e-01 -1.70179184e+00
  -9.99861612e-01  1.07490406e+00 -8.75815353e-01 -3.96305040e-01
  -7.87249962e-01 -1.22670668e+00  3.33333330e-01  9.84692670e-01
   9.86643400e-01  3.00000000e-01  3.96666670e-01  3.00000000e+00]
 [ 7.67638933e-01  6.75791133e-02  2.32719394e-01  2.03857427e-01
   1.09118566e+00 -1.18806115e-01  6.12849642e-01  4.12443285e-02
   8.72938755e-01 -6.11274756e-03  4.44444440e-01  7.24069300e-01
   9.29103640e-01  1.00000000e-01  1.30000000e-01  1.90000000e+01]
 [ 2.35439667e-01  8.42885800e-01  4.27079100e-01 -1.06165944e+00
  -1.33441403e+00  5.42280076e-01 -5.83749568e-01 -7.27541576e-01
  -3.36668052e-01 -5.83395519e-01  1.00000000e+00

In [29]:
classifiers = ["NN", "SVM", "XGBoost", "GCForest", "AutoML"]
classifier_index = {classifier: i for i, classifier in enumerate(classifiers)}
functions = globals().copy()
classifier_class = {c: functions.get(c) for c in classifiers}
print(classifier_class)
print(classifier_index)

{'SVM': <class '__main__.SVM'>, 'NN': <class '__main__.NN'>, 'XGBoost': <class '__main__.XGBoost'>, 'AutoML': <class '__main__.AutoML'>, 'GCForest': <class '__main__.GCForest'>}
{'SVM': 1, 'NN': 0, 'XGBoost': 2, 'AutoML': 4, 'GCForest': 3}


In [30]:
print(x_train.shape)

(88920, 16)


In [None]:
def run_model_fit():
    model = NN()
    sampling_type="smoteenn"
    model.fit(x_train, y_train, verbose=0, sampling_type=sampling_type)
    model.save("results/phase_train_{}_{}.mdl".format(model.class_name().lower(), sampling_type))
    model.predict(x_test, y_test, sampling_type="smoteenn")

run_model_fit()

In [32]:
def run_model_predict():
    model = NN()
    model.load("results/phase_train_{}.mdl".format(model.class_name().lower()))
    print(y_test.shape)
    model.predict(x_test, y_test, sampling_type="smoteenn")

run_model_predict()

(13680,)
Bevor reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
After reduction: [(0, 5852), (1, 5856), (2, 5372), (3, 3247)]
Accuracy: 83.43090470861027


In [34]:
def run_model_all_fit():
    for name in classifier_class:
        print(classifier_class[name])
        model = classifier_class[name]()
        sampling_type="enn"
        model.fit(x_train, y_train, verbose=0, sampling_type=sampling_type)
        model.save("results/phase_train_{}_{}.mdl".format(model.class_name().lower(), sampling_type))
        model.predict(x_test, y_test, sampling_type=sampling_type)

run_model_all_fit()

<class '__main__.SVM'>


KeyboardInterrupt: 

In [73]:
def run_layer1_prediction():
    sampling_type = "enn"
    x_test_resampled, y_test_resampled = resample(x_test, y_test, sampling_type="nosampling")
    stacking_file = "results/phase_test.hdf5"
    stacking = Stacking(stacking_file, classifiers, 4, len(x_test_resampled))
    offset = 0
    stacking.save_y(offset, y_test_resampled)
    for index, classifier in enumerate(classifiers):
        print(classifier)
        model = classifier_class[classifier]()
        model.load("results/phase_train_{}_{}.mdl".format(classifier.lower(), sampling_type))
        probability = model.predict(x_test_resampled, y_test_resampled, sampling_type=None)
        stacking.save_prob(offset, index, probability)
    stacking.close()
    
run_layer1_prediction()

Bevor reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
After reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
NN
Bevor reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
After reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
Accuracy: 69.27631578947368
SVM
Bevor reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
After reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
Accuracy: 64.29%
XGBoost
Bevor reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
After reduction: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
(13680,)
1 [0.00289707 0.646213   0.01742636 0.33346358]
[ 0.99100299  0.07024807  0.54329155  1.50837247  1.15566701 -0.69228997
  0.19244289  0.53289275  1.11287722  0.51339931  0.33333333  0.94810108
  0.9442341   0.1        -0.04        1.        ]
3 [4.6110526e-03 1.2545275e-04 1.2481477e-04 9.9513865e-01]
[ 0.99999908  0.85813236  3.73638315  8.37489032  8.37489032  4.92857758
  5.48635213  6.51366594  7.86660195  8.4639867   0.166666

KeyboardInterrupt: 

In [38]:
def run_stacking_train():
    kf = KFold(n_splits=5, shuffle=True)
    split_counter = 0
    sampling_type="enn"
    x_train_resampled, y_train_resampled = resample(x_train, y_train, sampling_type=sampling_type)
    print(x_train.shape, x_train_resampled.shape)
    for train_index, test_index in kf.split(x_train_resampled):
        print("Fold", split_counter)
        x_train_train = x_train_resampled[train_index]
        y_train_train = y_train_resampled[train_index]
        x_train_test = x_train_resampled[test_index]
        y_train_test = y_train_resampled[test_index]
        for classifier in classifiers:
            print("Training of", classifier)
            model = classifier_class[classifier]()
            model.fit(x_train_train, y_train_train)
            model.save("results/phase_train_train_{}_{}.mdl".format(classifier.lower(), split_counter))
            probability = model.predict(x_train_test, y_train_test)
            #print("results/phase_train_train_{}_{}.mdl".format(classifier.lower(), split_counter))
        split_counter += 1
    
run_stacking_train()

Bevor reduction: [(0, 6840), (1, 6840), (2, 6840), (3, 68400)]
After reduction: [(0, 6840), (1, 411), (2, 779), (3, 52277)]
(88920, 16) (60307, 16)
Fold 0
Training of NN
Bevor reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
After reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
Max of acc: 0.958728696206153, val_acc: 1.0
Min of loss: 0.11707239984463899, val_loss: 0.032452982666810556
Bevor reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
After reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
Accuracy: 96.0122699386503
Training of SVM
Bevor reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
After reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=Tru

[ 2018-04-10 16:24:39,140][cascade_classifier.fit_transform] X_groups_train.shape=[(48245, 16)],y_train.shape=(48245,),X_groups_test.shape=no_test,y_test.shape=no_test
[ 2018-04-10 16:24:39,143][cascade_classifier.fit_transform] group_dims=[16]
[ 2018-04-10 16:24:39,143][cascade_classifier.fit_transform] group_starts=[0]
[ 2018-04-10 16:24:39,144][cascade_classifier.fit_transform] group_ends=[16]
[ 2018-04-10 16:24:39,144][cascade_classifier.fit_transform] X_train.shape=(48245, 16),X_test.shape=(0, 16)
[ 2018-04-10 16:24:39,147][cascade_classifier.fit_transform] [layer=0] look_indexs=[0], X_cur_train.shape=(48245, 16), X_cur_test.shape=(0, 16)


Bevor reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
After reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
(12062,)
3 [2.0056302e-03 7.7784113e-03 4.4803430e-05 9.9017113e-01]
[ 0.45329179  0.46690254  0.35767553 -0.20217528  0.32505494  0.50021904
  0.12980171  0.04890782  0.15086452  0.33752555  0.44444444  0.77106119
  0.83174534  0.          0.1        23.        ]
3 [3.6125857e-04 3.9190785e-05 4.7291305e-06 9.9959487e-01]
[ 0.81922789  0.29733404  0.64078448  0.60670014  0.70891086 -0.30428109
 -0.72216119 -0.13073366 -0.06405375 -0.23134637  1.          0.94612648
  0.96313463  0.          0.         13.        ]
3 [3.9348871e-04 5.8215005e-06 5.2200867e-06 9.9959546e-01]
[ 0.27863799  0.7821791   0.4285989  -0.73011568 -0.4690811  -0.01051113
 -0.34130233 -0.23076488  0.11594554 -0.15242078  1.          0.92731593
  0.99014002  0.          0.         10.        ]
3 [4.5068486e-04 1.7706216e-05 4.3356572e-06 9.9952734e-01]
[ 0.18659694  0.80907161  0.89047921 -0.928

[ 2018-04-10 16:24:39,582][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_0.predict)=95.50%
[ 2018-04-10 16:24:40,006][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_1.predict)=95.82%
[ 2018-04-10 16:24:40,426][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_2.predict)=95.66%
[ 2018-04-10 16:24:40,848][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_3.predict)=95.62%
[ 2018-04-10 16:24:41,271][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_4.predict)=95.82%
[ 2018-04-10 16:24:41,273][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_0 - 5_folds.train_cv.predict)=95.68%
[ 2018-04-10 16:24:42,856][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_1 - 5_folds.train_0.predict)=95.32%
[ 2018-04-10 16:24:44,431][kfold_wrapper.log_eval_metrics] Accuracy(layer_0 - estimator_1 - 5_folds.train_1.predict)=95.08%
[ 2018-

[ 2018-04-10 16:25:39,647][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_2 - 5_folds.train_2.predict)=95.95%
[ 2018-04-10 16:25:39,870][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_2 - 5_folds.train_3.predict)=95.94%
[ 2018-04-10 16:25:40,093][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_2 - 5_folds.train_4.predict)=95.68%
[ 2018-04-10 16:25:40,095][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_2 - 5_folds.train_cv.predict)=95.89%
[ 2018-04-10 16:25:42,132][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_3 - 5_folds.train_0.predict)=96.27%
[ 2018-04-10 16:25:44,153][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_3 - 5_folds.train_1.predict)=96.16%
[ 2018-04-10 16:25:46,175][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_3 - 5_folds.train_2.predict)=96.21%
[ 2018-04-10 16:25:48,071][kfold_wrapper.log_eval_metrics] Accuracy(layer_2 - estimator_3 - 5_folds.train_3.predict)=96.49%
[ 2018-

[ 2018-04-10 16:26:41,579][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_0 - 5_folds.train_2.predict)=95.70%
[ 2018-04-10 16:26:41,902][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_0 - 5_folds.train_3.predict)=95.80%
[ 2018-04-10 16:26:42,327][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_0 - 5_folds.train_4.predict)=96.03%
[ 2018-04-10 16:26:42,329][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_0 - 5_folds.train_cv.predict)=95.89%
[ 2018-04-10 16:26:44,776][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_1 - 5_folds.train_0.predict)=96.54%
[ 2018-04-10 16:26:47,219][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_1 - 5_folds.train_1.predict)=95.93%
[ 2018-04-10 16:26:49,600][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_1 - 5_folds.train_2.predict)=96.05%
[ 2018-04-10 16:26:51,998][kfold_wrapper.log_eval_metrics] Accuracy(layer_5 - estimator_1 - 5_folds.train_3.predict)=95.97%
[ 2018-

Bevor reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
After reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]


[ 2018-04-10 16:27:06,492][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(12062, 32)
[ 2018-04-10 16:27:07,611][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(12062, 32)
[ 2018-04-10 16:27:08,727][cascade_classifier.transform] X_groups_test.shape=[(12062, 16)]
[ 2018-04-10 16:27:08,728][cascade_classifier.transform] group_dims=[16]
[ 2018-04-10 16:27:08,728][cascade_classifier.transform] X_test.shape=(12062, 16)
[ 2018-04-10 16:27:08,729][cascade_classifier.transform] [layer=0] look_indexs=[0], X_cur_test.shape=(12062, 16)
[ 2018-04-10 16:27:09,840][cascade_classifier.transform] [layer=1] look_indexs=[0], X_cur_test.shape=(12062, 32)
[ 2018-04-10 16:27:10,958][cascade_classifier.transform] [layer=2] look_indexs=[0], X_cur_test.shape=(12062, 32)


Accuracy: 96.34%
Training of AutoML
Bevor reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
After reduction: [(0, 5466), (1, 332), (2, 631), (3, 41816)]
AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_nbest=50,
           ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25,
           ml_memory_limit=3072, output_folder=None,
           per_run_time_limit=360, resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=3600,
           tmp_folder=None)
Bevor reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
After reduction: [(0, 1374), (1, 79), (2, 148), (3, 10461)]
Accuracy: 96.4

KeyboardInterrupt: 

In [42]:
#
def run_stacking_save(x_train, y_train):
    print(x_train.shape)
    stacking_file = "results/phase_train_train.hdf5"
    stacking = Stacking(stacking_file, classifiers, 4, len(x_train))
    kf = KFold(n_splits=5, shuffle=True)
    split_counter = 0
    offset = 0
    sampling_type="enn"
    x_train_resampled, y_train_resampled = resample(x_train, y_train, sampling_type=sampling_type)
    print(x_train.shape, x_train_resampled.shape)
    for train_index, test_index in kf.split(x_train_resampled):
        print("Fold", split_counter)
        x_train_train = x_train_resampled[train_index]
        y_train_train = y_train_resampled[train_index]
        x_train_test = x_train_resampled[test_index]
        y_train_test = y_train_resampled[test_index]
        stacking.save_y(offset, y_train_test)
        for index, classifier in enumerate(classifiers):
            model = classifier_class[classifier]()
            model.load("results/phase_train_train_{}_{}.mdl".format(classifier.lower(), split_counter))
            probability = model.predict(x_train_test, y_train_test)
            stacking.save_prob(offset, index, probability)
        offset += len(y_train_test)
        split_counter += 1
    stacking.close()
    
run_stacking_save(x_train, y_train)

(88920, 16)
Bevor reduction: [(0, 6840), (1, 6840), (2, 6840), (3, 68400)]
After reduction: [(0, 6840), (1, 411), (2, 779), (3, 52277)]
(88920, 16) (60307, 16)
Fold 0
Bevor reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
After reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
Accuracy: 96.38534239860064
Bevor reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
After reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
Accuracy: 95.66%
Bevor reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
After reduction: [(0, 1371), (1, 77), (2, 145), (3, 10469)]
(12062,)
3 [2.0056302e-03 7.7784113e-03 4.4803430e-05 9.9017113e-01]
[ 0.45329179  0.46690254  0.35767553 -0.20217528  0.32505494  0.50021904
  0.12980171  0.04890782  0.15086452  0.33752555  0.44444444  0.77106119
  0.83174534  0.          0.1        23.        ]
3 [2.4783371e-03 8.0803479e-04 1.4124614e-05 9.9669951e-01]
[ 0.77894914  0.7317228   0.61671509  0.22924141  0.13544126 -0.31145177
 -0.98353675 -0.15852907  0.39

KeyboardInterrupt: 

In [53]:
stacking_file = "results/phase_train_train.hdf5"
h5f = h5py.File(stacking_file, "r")
dset_probability = h5f['/probability']
print(dset_probability[0:3,:,:])
print(dset_probability.shape)
dset_y = h5f['/y']
print(dset_y[0:3])
print(dset_y.shape)
y_counter = [0,0,0,0]
for i, y in enumerate(dset_y):
    y_counter[int(y)] += 1
print(y_counter)
h5f.close()

[[[9.97790456e-01 3.21820172e-32 2.20952998e-03 4.91114562e-08]
  [9.53856562e-01 3.15069537e-05 4.59801149e-02 1.31816064e-04]
  [9.99387860e-01 8.13680890e-06 4.58136608e-04 1.45875223e-04]
  [9.06184733e-01 2.79739648e-02 3.31049338e-02 3.27363797e-02]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]

 [[9.95055676e-01 1.26542756e-25 4.93491767e-03 9.45326519e-06]
  [9.70011668e-01 2.42403309e-04 2.76831996e-02 2.06272875e-03]
  [9.99254763e-01 7.94698190e-06 4.17023723e-04 3.20245192e-04]
  [9.05653298e-01 2.81162709e-02 3.23107913e-02 3.39196436e-02]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]

 [[9.73188221e-01 9.08296535e-36 2.68082228e-02 3.54328949e-06]
  [9.50109935e-01 3.13789410e-05 4.95802368e-02 2.78448818e-04]
  [9.90337253e-01 2.06483801e-05 9.31871869e-03 3.23410815e-04]
  [8.97602081e-01 2.81416588e-02 4.24370393e-02 3.18192579e-02]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]]
(88920, 5, 4)
[0 0 0]
(88920,)
[78

In [56]:

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.constraints import maxnorm

def run_train_stacking():
    stacking_file = "results/phase_train_train.hdf5"
    h5f = h5py.File(stacking_file, "r")
    dset_probability = h5f['/probability']
    dset_y = h5f['/y']
    x_train_stacking = dset_probability[:]
    # x_train_stacking = np.reshape(x_train_stacking, (x_train_stacking.shape[0], 16))
    y_train_stacking = dset_y[:]
    print(x_train_stacking.shape)
    print(x_train_stacking.shape)
    model = CNN()
    model.fit(x_train_stacking, y_train_stacking, layers=[64, 128, 256], verbose=1)
    #model.fit(x_train_stacking, y_train_stacking)
    model.save("results/phase_stacking.mdl")

run_train_stacking()

(88920, 5, 4)
(88920, 5, 4)
Train on 80028 samples, validate on 8892 samples
Epoch 1/500
Epoch 00001: acc improved from -inf to 0.94538, saving model to results/phase_nn.hdf5
Epoch 2/500
Epoch 00002: acc improved from 0.94538 to 0.99703, saving model to results/phase_nn.hdf5
Epoch 3/500
Epoch 00003: acc improved from 0.99703 to 0.99775, saving model to results/phase_nn.hdf5
Epoch 4/500
Epoch 00004: acc improved from 0.99775 to 0.99860, saving model to results/phase_nn.hdf5
Epoch 5/500
Epoch 00005: acc improved from 0.99860 to 0.99888, saving model to results/phase_nn.hdf5
Epoch 6/500
Epoch 00006: acc improved from 0.99888 to 0.99894, saving model to results/phase_nn.hdf5
Epoch 7/500
Epoch 00007: acc improved from 0.99894 to 0.99905, saving model to results/phase_nn.hdf5
Epoch 8/500
Epoch 00008: acc did not improve
Epoch 9/500
Epoch 00009: acc did not improve
Epoch 10/500
Epoch 00010: acc improved from 0.99905 to 0.99909, saving model to results/phase_nn.hdf5
Epoch 11/500
Epoch 00011: a

Epoch 00061: acc did not improve
Epoch 62/500
Epoch 00062: acc did not improve
Epoch 63/500
Epoch 00063: acc did not improve
Epoch 64/500
Epoch 00064: acc did not improve
Epoch 65/500
Epoch 00065: acc did not improve
Epoch 66/500
Epoch 00066: acc did not improve
Epoch 67/500
Epoch 00067: acc improved from 0.99919 to 0.99923, saving model to results/phase_nn.hdf5
Epoch 68/500
Epoch 00068: acc did not improve
Epoch 69/500
Epoch 00069: acc did not improve
Epoch 70/500
Epoch 00070: acc did not improve
Epoch 71/500
Epoch 00071: acc did not improve
Epoch 72/500
Epoch 00072: acc did not improve
Epoch 73/500
Epoch 00073: acc did not improve
Epoch 74/500
Epoch 00074: acc did not improve
Epoch 75/500
Epoch 00075: acc did not improve
Epoch 76/500
Epoch 00076: acc did not improve
Epoch 77/500
Epoch 00077: acc did not improve
Epoch 78/500
Epoch 00078: acc did not improve
Epoch 79/500
Epoch 00079: acc did not improve
Epoch 80/500
Epoch 00080: acc did not improve
Epoch 81/500
Epoch 00081: acc did not

Epoch 00123: acc did not improve
Epoch 124/500
Epoch 00124: acc did not improve
Epoch 125/500
Epoch 00125: acc did not improve
Epoch 126/500
Epoch 00126: acc did not improve
Epoch 127/500
Epoch 00127: acc did not improve
Epoch 128/500
Epoch 00128: acc improved from 0.99926 to 0.99933, saving model to results/phase_nn.hdf5
Epoch 129/500
Epoch 00129: acc did not improve
Epoch 130/500
Epoch 00130: acc did not improve
Epoch 131/500
Epoch 00131: acc did not improve
Epoch 132/500
Epoch 00132: acc did not improve
Epoch 133/500
Epoch 00133: acc did not improve
Epoch 134/500
Epoch 00134: acc did not improve
Epoch 135/500
Epoch 00135: acc did not improve
Epoch 136/500
Epoch 00136: acc did not improve
Epoch 137/500
Epoch 00137: acc did not improve
Epoch 138/500
Epoch 00138: acc did not improve
Epoch 139/500
Epoch 00139: acc did not improve
Epoch 140/500
Epoch 00140: acc did not improve
Epoch 141/500
Epoch 00141: acc did not improve
Epoch 142/500
Epoch 00142: acc did not improve
Epoch 143/500
Epoc

Epoch 186/500
Epoch 00186: acc did not improve
Epoch 187/500
Epoch 00187: acc did not improve
Epoch 188/500
Epoch 00188: acc did not improve
Epoch 189/500
Epoch 00189: acc did not improve
Epoch 190/500
Epoch 00190: acc did not improve
Epoch 191/500
Epoch 00191: acc improved from 0.99933 to 0.99935, saving model to results/phase_nn.hdf5
Epoch 192/500
Epoch 00192: acc did not improve
Epoch 193/500
Epoch 00193: acc did not improve
Epoch 194/500
Epoch 00194: acc did not improve
Epoch 195/500
Epoch 00195: acc did not improve
Epoch 196/500
Epoch 00196: acc did not improve
Epoch 197/500
Epoch 00197: acc did not improve
Epoch 198/500
Epoch 00198: acc did not improve
Epoch 199/500
Epoch 00199: acc did not improve
Epoch 200/500
Epoch 00200: acc did not improve
Epoch 201/500
Epoch 00201: acc did not improve
Epoch 202/500
Epoch 00202: acc did not improve
Epoch 203/500
Epoch 00203: acc did not improve
Epoch 204/500
Epoch 00204: acc did not improve
Epoch 205/500
Epoch 00205: acc did not improve
Epoc

Epoch 249/500
Epoch 00249: acc did not improve
Epoch 250/500
Epoch 00250: acc did not improve
Epoch 251/500
Epoch 00251: acc did not improve
Epoch 252/500
Epoch 00252: acc did not improve
Epoch 253/500
Epoch 00253: acc improved from 0.99935 to 0.99936, saving model to results/phase_nn.hdf5
Epoch 254/500
Epoch 00254: acc did not improve
Epoch 255/500
Epoch 00255: acc did not improve
Epoch 256/500
Epoch 00256: acc did not improve
Epoch 257/500
Epoch 00257: acc did not improve
Epoch 258/500
Epoch 00258: acc did not improve
Epoch 259/500
Epoch 00259: acc did not improve
Epoch 260/500
Epoch 00260: acc did not improve
Epoch 261/500
Epoch 00261: acc did not improve
Epoch 262/500
Epoch 00262: acc did not improve
Epoch 263/500
Epoch 00263: acc did not improve
Epoch 264/500
Epoch 00264: acc did not improve
Epoch 265/500
Epoch 00265: acc did not improve
Epoch 266/500
Epoch 00266: acc did not improve
Epoch 267/500
Epoch 00267: acc did not improve
Epoch 268/500
Epoch 00268: acc did not improve
Epoc

Epoch 00311: acc did not improve
Epoch 312/500
Epoch 00312: acc did not improve
Epoch 313/500
Epoch 00313: acc did not improve
Epoch 314/500
Epoch 00314: acc did not improve
Epoch 315/500
Epoch 00315: acc did not improve
Epoch 316/500
Epoch 00316: acc did not improve
Epoch 317/500
Epoch 00317: acc did not improve
Epoch 318/500
Epoch 00318: acc did not improve
Epoch 319/500
Epoch 00319: acc did not improve
Epoch 320/500
Epoch 00320: acc did not improve
Epoch 321/500
Epoch 00321: acc did not improve
Epoch 322/500
Epoch 00322: acc did not improve
Epoch 323/500
Epoch 00323: acc did not improve
Epoch 324/500
Epoch 00324: acc did not improve
Epoch 325/500
Epoch 00325: acc did not improve
Epoch 326/500
Epoch 00326: acc did not improve
Epoch 327/500
Epoch 00327: acc did not improve
Epoch 328/500
Epoch 00328: acc did not improve
Epoch 329/500
Epoch 00329: acc did not improve
Epoch 330/500
Epoch 00330: acc did not improve
Epoch 331/500
Epoch 00331: acc did not improve
Epoch 332/500
Epoch 00332: 

Epoch 343/500
Epoch 00343: acc did not improve
Epoch 344/500
Epoch 00344: acc improved from 0.99939 to 0.99944, saving model to results/phase_nn.hdf5
Epoch 345/500
Epoch 00345: acc did not improve
Epoch 346/500
Epoch 00346: acc did not improve
Epoch 347/500
Epoch 00347: acc did not improve
Epoch 348/500
Epoch 00348: acc did not improve
Epoch 349/500
Epoch 00349: acc did not improve
Epoch 350/500
Epoch 00350: acc did not improve
Epoch 351/500
Epoch 00351: acc did not improve
Epoch 352/500
Epoch 00352: acc did not improve
Epoch 353/500
Epoch 00353: acc did not improve
Epoch 354/500
Epoch 00354: acc did not improve
Epoch 355/500
Epoch 00355: acc did not improve
Epoch 356/500
Epoch 00356: acc did not improve
Epoch 357/500
Epoch 00357: acc did not improve
Epoch 358/500
Epoch 00358: acc did not improve
Epoch 359/500
Epoch 00359: acc did not improve
Epoch 360/500
Epoch 00360: acc did not improve
Epoch 361/500
Epoch 00361: acc did not improve
Epoch 362/500
Epoch 00362: acc did not improve
Epoc

Epoch 406/500
Epoch 00406: acc did not improve
Epoch 407/500
Epoch 00407: acc did not improve
Epoch 408/500
Epoch 00408: acc did not improve
Epoch 409/500
Epoch 00409: acc did not improve
Epoch 410/500
Epoch 00410: acc did not improve
Epoch 411/500
Epoch 00411: acc did not improve
Epoch 412/500
Epoch 00412: acc did not improve
Epoch 413/500
Epoch 00413: acc did not improve
Epoch 414/500
Epoch 00414: acc did not improve
Epoch 415/500
Epoch 00415: acc did not improve
Epoch 416/500
Epoch 00416: acc did not improve
Epoch 417/500
Epoch 00417: acc did not improve
Epoch 418/500
Epoch 00418: acc did not improve
Epoch 419/500
Epoch 00419: acc did not improve
Epoch 420/500
Epoch 00420: acc did not improve
Epoch 421/500
Epoch 00421: acc did not improve
Epoch 422/500
Epoch 00422: acc did not improve
Epoch 423/500
Epoch 00423: acc did not improve
Epoch 424/500
Epoch 00424: acc did not improve
Epoch 425/500
Epoch 00425: acc did not improve
Epoch 426/500
Epoch 00426: acc did not improve
Epoch 427/500

Epoch 438/500
Epoch 00438: acc did not improve
Epoch 439/500
Epoch 00439: acc did not improve
Epoch 440/500
Epoch 00440: acc did not improve
Epoch 441/500
Epoch 00441: acc did not improve
Epoch 442/500
Epoch 00442: acc did not improve
Epoch 443/500
Epoch 00443: acc did not improve
Epoch 444/500
Epoch 00444: acc did not improve
Epoch 445/500
Epoch 00445: acc did not improve
Epoch 446/500
Epoch 00446: acc did not improve
Epoch 447/500
Epoch 00447: acc did not improve
Epoch 448/500
Epoch 00448: acc did not improve
Epoch 449/500
Epoch 00449: acc did not improve
Epoch 450/500
Epoch 00450: acc did not improve
Epoch 451/500
Epoch 00451: acc did not improve
Epoch 452/500
Epoch 00452: acc did not improve
Epoch 453/500
Epoch 00453: acc did not improve
Epoch 454/500
Epoch 00454: acc did not improve
Epoch 455/500
Epoch 00455: acc did not improve
Epoch 456/500
Epoch 00456: acc did not improve
Epoch 457/500
Epoch 00457: acc did not improve
Epoch 458/500
Epoch 00458: acc did not improve
Epoch 459/500

Epoch 470/500
Epoch 00470: acc did not improve
Epoch 471/500
Epoch 00471: acc did not improve
Epoch 472/500
Epoch 00472: acc did not improve
Epoch 473/500
Epoch 00473: acc did not improve
Epoch 474/500
Epoch 00474: acc did not improve
Epoch 475/500
Epoch 00475: acc did not improve
Epoch 476/500
Epoch 00476: acc did not improve
Epoch 477/500
Epoch 00477: acc improved from 0.99944 to 0.99946, saving model to results/phase_nn.hdf5
Epoch 478/500
Epoch 00478: acc did not improve
Epoch 479/500
Epoch 00479: acc did not improve
Epoch 480/500
Epoch 00480: acc did not improve
Epoch 481/500
Epoch 00481: acc did not improve
Epoch 482/500
Epoch 00482: acc did not improve
Epoch 483/500
Epoch 00483: acc did not improve
Epoch 484/500
Epoch 00484: acc did not improve
Epoch 485/500
Epoch 00485: acc did not improve
Epoch 486/500
Epoch 00486: acc did not improve
Epoch 487/500
Epoch 00487: acc did not improve
Epoch 488/500
Epoch 00488: acc did not improve
Epoch 489/500
Epoch 00489: acc did not improve
Epoc

In [75]:
def run_final_prediction():
    stacking_file = "results/phase_test.hdf5"
    h5f = h5py.File(stacking_file, "r")
    dset_probability = h5f['/probability']
    dset_y = h5f['/y']
    x_test_stacking = dset_probability[:]
    #x_test_stacking = np.reshape(x_test_stacking, (x_test_stacking.shape[0], 16))
    y_test_stacking = dset_y[:]
    print(x_test_stacking.shape)
    print("Shape:", sorted(Counter(y_test_stacking).items()))
    model = CNN()
    model.load("results/phase_stacking.mdl")
    model.predict(x_test_stacking, y_test_stacking)

run_final_prediction()

(13680, 5, 4)
Shape: [(0, 2280), (1, 2280), (2, 2280), (3, 6840)]
Accuracy: 69.67105263157895


In [74]:
# [(0, 2280), (1, 323), (2, 226), (3, 3738)]
2280+323+226+3738

6567