In [1]:
import os
import sys
import gc
import psutil

from os.path import isdir, join
from time import time, sleep
from pathlib import Path
import pandas as pd

import numpy as np
from scipy import signal
from sklearn.decomposition import PCA

from IPython.display import display

from time import time
import gc
import tensorflow as tf
from random import randint
from tensorflow import keras
from tensorflow.keras import activations, models, regularizers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dense, Input, Dropout, Flatten
from tensorflow.python.keras.callbacks import TensorBoard, ReduceLROnPlateau
print('Finish import model library')

Finish import model library


In [30]:
class Data:
    def __init__(self, path):
        self.path = path
        self.channels = ['ch1', 'ch2', 'ch3', 'ch4']
        self.labels = ['lights-on', 'turn-off']
        
        self.df = self.get_data()
        self.data_split() # Split df to x_train, y_train, x_valid, y_valid
        print("Finish getting data, gc.collect = %d" % gc.collect())
        
    def get_data(self):
        files = [(str(file), file.parts[-2]) for file in Path(self.path).glob("**/*.txt") if file]
        
        label = []
        arr = []
        for fname in files[:None]:
            file = pd.read_csv(fname[0], index_col=0)
            tmp_arr = []
            for ch in self.channels:
                tmp = self.log_specgram(file[ch])
                tmp = tmp.reshape(len(tmp), len(tmp[0]), -1)
                tmp_arr.append(tmp)
                del tmp
            tmp = np.concatenate(tmp_arr, axis=2)
            arr.append(tmp)
            del tmp, tmp_arr
            label.append(fname[1])
        df = pd.DataFrame({'data':arr})
        return pd.concat([df, pd.get_dummies(label, dtype=bool)], axis=1)
    
    def data_split(self):
        ''' My X has to be numpy array, and Y has to be pandas dataframe'''
        train_set = self.df.sample(frac=0.8, replace=False, random_state=60)
        valid_set = self.df.loc[set(self.df.index) - set(train_set.index)]
        
        predict_set = valid_set.sample(frac=0.5, replace=False, random_state=60)
        valid_set = valid_set.loc[set(valid_set.index) - set(predict_set.index)]

        self.y_train = train_set.drop('data', axis=1)
        self.x_train = np.array([data for data in train_set.data])

        self.y_valid = valid_set.drop('data', axis=1)
        self.x_valid = np.array([data for data in valid_set.data])
        
        self.y_predict = predict_set.drop('data', axis=1)
        self.y_predict.reindex([i for i in range(len(self.y_predict))])
        self.x_predict = np.array([data for data in predict_set.data])
        
    def log_specgram(self, audio, sample_rate=400, window_size=20,
                 step_size=10, eps=1e-10):
        _, _, spec = signal.spectrogram(audio,
                                        fs=sample_rate,
                                        window='hann',
                                        nperseg=25,
                                        noverlap=20,
                                        detrend=False)
        return np.log(spec.T.astype(np.float32) + eps)

In [43]:
class Model:
    def __init__(self, data, shape):
        self.shape = shape
        self.x_train = data.x_train
        self.y_train = data.y_train
        self.x_valid = data.x_valid
        self.y_valid = data.y_valid
        self.x_predict = data.x_predict
        self.y_predict = data.y_predict
        self.model = self.get_model()

    def get_model(self):
        '''Create a keras functional model'''

        inputlayer = Input(shape=self.shape)

        # Nornal model
        nclass = 2

        norm_input = BatchNormalization()(inputlayer)
        model = Conv2D(16, kernel_size=2, padding='same', activation=activations.relu)(norm_input)
        model = Conv2D(16, kernel_size=2, padding='same', activation=activations.relu)(model)
        model = MaxPool2D(pool_size=(2, 2))(model)
        model = Dropout(rate=0.2)(model)
        model = Conv2D(32, kernel_size=3, padding='same', activation=activations.relu)(model)
        model = Conv2D(32, kernel_size=3, padding='same', activation=activations.relu)(model)
        model = MaxPool2D(pool_size=(2, 2))(model)
        model = Dropout(rate=0.2)(model)
        model = Conv2D(64, kernel_size=3, padding='same', activation=activations.relu)(model)
        model = MaxPool2D(pool_size=(2, 2))(model)
        model = Dropout(rate=0.2)(model)
        model = Flatten()(model) 

        dense_1 = BatchNormalization()(Dense(128, kernel_regularizer=regularizers.l2(0.01), activation=activations.relu)(model))
        dense_1 = BatchNormalization()(Dense(128, kernel_regularizer=regularizers.l2(0.01), activation=activations.relu)(dense_1))
        dense_1 = Dense(nclass, activation=activations.softmax)(dense_1)

        model = models.Model(inputs=inputlayer, outputs=dense_1)
        model.compile(optimizer='adam', loss=tf.losses.binary_crossentropy, metrics=['accuracy'])

        return model
    
    def compile_model(self, batch_size=16, epochs=12):
        batch_size = batch_size
        epochs = epochs
        path = './tensorboard/keras_' + str(time())
        
        print("Start training")
        self.history = self.model.fit(
            x=self.x_train,
            y=self.y_train,
            batch_size=batch_size,
            epochs=epochs,
            validation_data=(self.x_valid, self.y_valid),
            callbacks=[TensorBoard(log_dir=path)],
            verbose=1,
        )
    
    def predict_model(self):
        prediction = np.argmax(self.model.predict(self.x_predict), axis=1)
        
        count = 0
        index = 0
        length = len(prediction)
        for i in self.y_predict.index:
            if not prediction[index] and self.y_predict['lights-on'].loc[i]:
                count = count + 1
            elif prediction[index] and not self.y_predict['lights-on'].loc[i]:
                count = count + 1
            index = index + 1
        print("Accuracy of prediction: %.4f%%, (%d, %d)" % (count / length * 100, count, length))
    
    def save_model(self):
        path = './model/model_' + str(time()) + '.h5'
        self.model.save(path)
    
    def load_model(self, path):
        self.model = load_model(path)

In [44]:
# Get data
path = './dataSet/'
data = Data(path)

# Model stuff
model = Model(data, data.x_train.shape[1:])
model.compile_model(epochs=20)
model.predict_model()

Finish getting data, gc.collect = 77174
Start training
Train on 801 samples, validate on 100 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Accuracy of prediction: 87.0000%, (87, 100)


In [32]:
# data.y_predict

Unnamed: 0,lights-on,turn-off
717,False,True
114,True,False
379,True,False
444,True,False
88,True,False
6,True,False
116,True,False
188,True,False
799,False,True
322,True,False


In [35]:
data.y_predict.reindex([i for i in range(100)])

Unnamed: 0,lights-on,turn-off
0,,
1,,
2,,
3,,
4,,
5,,
6,True,False
7,True,False
8,True,False
9,True,False
