# Use tensorflow.keras

In [1]:
import pickle
from string import Template

from tensorflow.contrib import keras as keras
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io
import scipy.io
from tensorflow.contrib.keras.python.keras.layers import Conv1D
from tensorflow.contrib.keras.python.keras.layers import Dense, Activation, Flatten
from tensorflow.contrib.keras.python.keras.layers import MaxPooling1D
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.regularizers import l2
from tensorflow.contrib.keras.python.keras.utils import to_categorical
from sklearn.metrics.classification import confusion_matrix

In [2]:
all_defect_types = ("HB","HH")
sensors = tuple([u'BL_AE', u'BR_AE', u'BL_Y', u'BR_Mic', u'BL_Mic', u'MR_Y', u'MR_Z', u'BR_Y', u'BR_X', u'BR_Z', u'MR_X', u'BL_Z'])
br_sensors = tuple([u'BR_Mic'])
speeds = tuple([u'300', u'2460', u'2580', u'1620', u'2700', u'1740', u'1860', u'1020', u'1980', u'1140', u'2100', u'1260', u'2220', u'1380', u'2340'])
mass_locations = tuple([u'LM'])

In [3]:
def load_data(sensors, mass_locations, speeds, defect_types, runs=(1, 2), domain='AD', window_size=360):
    def load_mat(file_name, columns):
        # the way the data is encoded in the matlab file is a bit cryptic and the following
        # lines have been derived by reverse engineering the data structure
        mat = scipy.io.loadmat(file_name)['data'][0][0]
        data = {c[0]: a.flatten() for c, a in zip(mat.dtype.descr, mat) if c[0] in columns}
        return np.stack([data[c] for c in columns]).T
    data_path = 'C:/Users/ty8btn/Desktop/Python Notebook/LMS_predictive_data/dataset/'
    xs = []
    ys = []
    # run through the matlab files to load accring to arguments
    for defect_type_num, defect_type in enumerate(defect_types):
        for mass_location in mass_locations:
            for speed in speeds:
                for run in runs:
                    file_name = '%s_%s_S%s_%s_%s.mat' % (str(defect_type), mass_location, speed, run, domain)
                    ##print("loading " + file_name)
                    x = load_mat(data_path + file_name, columns=sensors)
                    x = x[:int(len(x) / window_size) * window_size]
                    xs.append(x)
                    ys.append(np.array([defect_type_num] * int(len(x) / window_size)))
    # concatenate and normalize data
    x = np.concatenate(xs)
    x = x / x.var(axis=0)
    x = x - x.mean(axis=0)
    x = x.reshape(-1, window_size, x.shape[-1])
    y = to_categorical(np.concatenate(ys).flatten())
    # shuffle patterns to remove temporal correlation
    p = np.random.permutation(len(x))
    x = x[p]
    y = y[p]
    return x, y

In [4]:
def train_model(train_data, val_data, model_name="model"):
    x, y = train_data
    print("creating model")
    model = Sequential([
#        Conv1D(50, 30, input_shape=x.shape[1:], kernel_regularizer=l2()),
        Conv1D(50, 30, padding='same', input_shape=x.shape[1:], kernel_regularizer=l2()),
#        MaxPooling1D(pool_size=10, strides=None),
        MaxPooling1D(pool_size=10, strides=None, padding='valid'),
        Activation('relu'),
#        Conv1D(10, 15, kernel_regularizer=l2()),
        Conv1D(10, 15, padding='same', kernel_regularizer=l2()),
#        MaxPooling1D(pool_size=3, strides=None),
        MaxPooling1D(pool_size=3, strides=None, padding='valid'),
        Activation('relu'),
        Flatten(),
        Dense(25, kernel_regularizer=l2()),
        Activation('tanh'),
        Dense(y.shape[1]),
        Activation('softmax')
    ])

    print("compiling model")
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

    print("fitting model")
    callback_history = model.fit(x, y, validation_data=val_data, epochs=50, batch_size=64,
                                 callbacks=[keras.callbacks.BaseLogger(),
                                            keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, mode='max'),
                                            keras.callbacks.ModelCheckpoint("C:/Users/ty8btn/Desktop/Python Notebook/lms/results/models/%s_weights.hdf5" % model_name,
                                                                            monitor='val_acc', verbose=2,
                                                                            save_best_only=True, mode='max')])

    print("saving model")
    model.save("C:/Users/ty8btn/Desktop/Python Notebook/lms/results/models/%s_architecture.config" % model_name)

    # restore best model
    model.load_weights("C:/Users/ty8btn/Desktop/Python Notebook/lms/results/models/%s_weights.hdf5" % model_name)

    # calculate confusion matrix
    yt = callback_history.validation_data[1].argmax(axis=1)
    yp = model.predict_classes(callback_history.validation_data[0])
    cm = confusion_matrix(yt, yp)
    cm = ((100.0 * cm.T) / cm.sum(axis=1)).T
    cm_df = pd.DataFrame(cm)
    cm_df.columns = all_defect_types
    cm_df.index = all_defect_types

    num_val = len(val_data[0])
    max_val_acc = max(callback_history.history['val_acc'])
    num_train = len(x)
    confusion = cm_df.to_html()
    learning_curve_acc = callback_history.history['acc']
    learning_curve_val_acc = callback_history.history['val_acc']

    # package all information needed for reporting
    info = {'model_name': model_name, 'learning_curve_val_acc': learning_curve_val_acc,
            'learning_curve_acc': learning_curve_acc, 'num_train': num_train, 'confusion': confusion,
            'num_val': num_val, 'max_val_acc': max_val_acc}

    return info


In [5]:
def run_experiment(model_name, train_data_spec, val_data_spec=None):
    train_data = load_data(**train_data_spec)

    # if no validation data spec, use same spec as training data and split training data accordingly
    if val_data_spec is None:
        l = len(train_data[0])
        val_data = (train_data[0][int(l * 0.9):], train_data[1][int(l * 0.9):])
        train_data = (train_data[0][:int(l * 0.9)], train_data[1][:int(l * 0.9)])
    else:
        val_data = load_data(**val_data_spec)

    #train and get info for report
    info = train_model(train_data, val_data, model_name=model_name)
    info.update({'train_data_spec': train_data_spec, 'val_data_spec': val_data_spec})

    # save info just in case we want to change report format later (without rerunning al experiments)
    with open('C:/Users/ty8btn/Desktop/Python Notebook/lms/models/' + model_name + ".pkl", 'wb') as f:
        pickle.dump(info, f)

In [6]:
run_experiment(model_name="test", train_data_spec = {'sensors': sensors, 'speeds': speeds, 'defect_types': all_defect_types, 'mass_locations': mass_locations})

creating model
compiling model
fitting model
Train on 48628 samples, validate on 5404 samples
Epoch 1/50


OSError: Unable to create file (Unable to open file: name = 'c:/users/ty8btn/desktop/python notebook/lms/results/models/test_weights.hdf5', errno = 2, error message = 'no such file or directory', flags = 13, o_flags = 302)