In [180]:
import tensorflow as tf
import numpy as np
import functools
from tqdm import tqdm 
import pandas as pd
import sklearn

In [181]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional
from tensorflow.keras import metrics, losses
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [182]:
df = pd.read_csv('./data/bubble_detection.csv')

In [183]:
scaler = MinMaxScaler(feature_range=(0, 1))
features = scaler.fit_transform(df.iloc[:,1:-1])
output = np.array(df.iloc[:,-1], dtype='float64')

In [184]:
time_steps = 36
x = []
y = []
for row in range(len(features) - time_steps):
    x.append(features[row : row + time_steps])
    y.append(output[row + time_steps - 1])

In [185]:
x = np.array(x, dtype = 'float64')
y = np.array(y, dtype = 'float64')

In [186]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=202144)

In [187]:
1 - sum(y_train) / len(y_train)

0.9460431654676259

In [188]:
model = Sequential([
    Bidirectional(LSTM(units=50,
                    input_shape=(x_train.shape[1], x_train.shape[2]),
                    recurrent_activation='sigmoid',
                    recurrent_initializer='glorot_uniform')),
    Dense(units=1, activation='sigmoid')
])

In [189]:
model.compile(optimizer=Adam(learning_rate=1e-2),
            loss=binary_focal_loss(alpha=0.75),
            metrics=[metrics.BinaryAccuracy(), metrics.Precision(), metrics.Recall()])

In [190]:
model.fit(x_train, y_train)

Train on 556 samples


<tensorflow.python.keras.callbacks.History at 0x7fca0cb193d0>

In [191]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_9 (Bidirection multiple                  25200     
_________________________________________________________________
dense_10 (Dense)             multiple                  101       
Total params: 25,301
Trainable params: 25,301
Non-trainable params: 0
_________________________________________________________________


In [192]:
yhat = model.predict_classes(x_test)

In [193]:
model.predict(x_test)

array([[0.37766057],
       [0.33260363],
       [0.37189746],
       [0.35877505],
       [0.3638599 ],
       [0.37044907],
       [0.3518231 ],
       [0.3715133 ],
       [0.3398953 ],
       [0.36495477],
       [0.36285537],
       [0.34865397],
       [0.36037254],
       [0.34839475],
       [0.37029207],
       [0.3518267 ],
       [0.34826645],
       [0.36696815],
       [0.35709763],
       [0.3407362 ],
       [0.36130857],
       [0.36293617],
       [0.34074622],
       [0.3493533 ],
       [0.3359724 ],
       [0.34984785],
       [0.36220753],
       [0.34523058],
       [0.32218894],
       [0.39254412],
       [0.33526725],
       [0.3589903 ],
       [0.36786404],
       [0.37393075],
       [0.33414018],
       [0.37377554],
       [0.37522924],
       [0.38861397],
       [0.366619  ],
       [0.33519566],
       [0.36803883],
       [0.35374326],
       [0.34463346],
       [0.3626184 ],
       [0.37787366],
       [0.3568322 ],
       [0.3629298 ],
       [0.375

In [194]:
sklearn.metrics.confusion_matrix(y_test, yhat)

array([[134,   0],
       [  6,   0]])

In [42]:
# imported from: https://github.com/aldi-dimara/keras-focal-loss/blob/master/focal_loss.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 19 08:20:58 2018
@OS: Ubuntu 18.04
@IDE: Spyder3
@author: Aldi Faizal Dimara (Steam ID: phenomos)
"""

import tensorflow.keras.backend as K
import tensorflow as tf

def binary_focal_loss(gamma=2.0, alpha=0.25):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha_t*((1-p_t)^gamma)*log(p_t)
        
        p_t = y_pred, if y_true = 1
        p_t = 1-y_pred, otherwise
        
        alpha_t = alpha, if y_true=1
        alpha_t = 1-alpha, otherwise
        
        cross_entropy = -log(p_t)
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        # Clip the prediciton value
        y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
        # Calculate p_t
        p_t = tf.where(K.equal(y_true, 1), y_pred, 1-y_pred)
        # Calculate alpha_t
        alpha_factor = K.ones_like(y_true)*alpha
        alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1-alpha_factor)
        # Calculate cross entropy
        cross_entropy = -K.log(p_t)
        weight = alpha_t * K.pow((1-p_t), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.sum(loss, axis=1)
        return loss
    
    return focal_loss