In [2]:
import tensorflow as tf
import numpy as np
import functools
from tqdm import tqdm 
import pandas as pd
import sklearn

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional
from tensorflow.keras import metrics, losses
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv('./data/bubble_detection.csv')

In [6]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit_transform(df[['real_gdp_growth', 'inflation']])

array([[0.62753999, 0.40808162],
       [0.62753999, 0.48350019],
       [0.62753999, 0.44548298],
       ...,
       [0.54724631, 0.4790387 ],
       [0.54724631, 0.49543418],
       [0.54724631, 0.51360007]])

In [7]:
time_steps = 36
batch_size = 16

In [8]:
x = []
y = []
for row in range(len(df) - time_steps):
    x.append(df.iloc[row : row + time_steps, 1:8])
    y.append(df.iloc[row + time_steps - 1, 9])

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=202144)

In [11]:
df

Unnamed: 0,Date,real_gdp_growth,inflation,tbill_yield,shiller_pe,consumer_confidence,mktcap_gdp_ratio,sp500_return,crash,bubble
0,1960-01-01,9.296777,-1.619944,4.72,18.34,101.702825,0.106939,-1.743989,0,0
1,1960-02-01,9.296777,1.646619,4.49,17.55,101.414946,0.102792,-3.877305,0,0
2,1960-03-01,9.296777,0.000000,4.25,17.29,101.166242,0.101392,-1.362496,0,0
3,1960-04-01,-2.140153,5.435193,4.28,17.43,101.029054,0.102998,1.290440,0,0
4,1960-05-01,-2.140153,1.225517,4.35,17.26,101.063690,0.102055,-0.915127,0,0
...,...,...,...,...,...,...,...,...,...,...
727,2020-08-01,33.441306,4.290896,0.65,31.16,98.551025,0.160211,5.739146,0,0
728,2020-09-01,33.441306,2.990384,0.68,30.84,98.895809,0.158974,-0.772177,0,0
729,2020-10-01,4.091780,1.453380,0.79,31.28,99.069467,0.159099,1.580142,0,0
730,2020-11-01,4.091780,2.163509,0.87,32.44,99.043532,0.165162,3.811098,0,0


In [12]:
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [13]:
x_train.shape
y_train.shape

(556,)

In [14]:
1 - sum(y_train) / len(y_train)

0.9460431654676259

In [15]:
model = Sequential([
    Bidirectional(LSTM(units=50,
                    input_shape=(x_train.shape[1], x_train.shape[2]),
                    recurrent_activation='sigmoid',
                    recurrent_initializer='glorot_uniform')),
    Dense(units=1, activation='sigmoid')
])

In [40]:
model.compile(optimizer=Adam(learning_rate=1e-2),
            loss=binary_focal_loss(),
            metrics=[metrics.BinaryAccuracy(), metrics.Precision(), metrics.Recall()])

In [41]:
model.fit(x_train, y_train)

Train on 556 samples
 32/556 [>.............................] - ETA: 7s

TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type int64 of argument 'x'.

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional multiple                  23200     
_________________________________________________________________
dense (Dense)                multiple                  101       
Total params: 23,301
Trainable params: 23,301
Non-trainable params: 0
_________________________________________________________________


In [24]:
yhat = model.predict_classes(x_test)

In [25]:
sklearn.metrics.confusion_matrix(y_test, yhat)

array([[134,   0],
       [  6,   0]])

In [39]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 19 08:20:58 2018
@OS: Ubuntu 18.04
@IDE: Spyder3
@author: Aldi Faizal Dimara (Steam ID: phenomos)
"""

import tensorflow.keras.backend as K
import tensorflow as tf

def categorical_focal_loss(gamma=2.0, alpha=0.25):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha*((1-p)^gamma)*log(p)
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        # Clip the prediction value
        y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
        # Calculate cross entropy
        cross_entropy = -y_true*K.log(y_pred)
        # Calculate weight that consists of  modulating factor and weighting factor
        weight = alpha * y_true * K.pow((1-y_pred), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.sum(loss, axis=1)
        return loss
    
    return focal_loss

def binary_focal_loss(gamma=2.0, alpha=0.25):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha_t*((1-p_t)^gamma)*log(p_t)
        
        p_t = y_pred, if y_true = 1
        p_t = 1-y_pred, otherwise
        
        alpha_t = alpha, if y_true=1
        alpha_t = 1-alpha, otherwise
        
        cross_entropy = -log(p_t)
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        # Clip the prediciton value
        y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
        # Calculate p_t
        p_t = tf.where(K.equal(y_true, 1), y_pred, 1-y_pred)
        # Calculate alpha_t
        alpha_factor = K.ones_like(y_true)*alpha
        alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1-alpha_factor)
        # Calculate cross entropy
        cross_entropy = -K.log(p_t)
        weight = alpha_t * K.pow((1-p_t), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.sum(loss, axis=1)
        return loss
    
    return focal_loss

## Below are all draft

In [27]:
def LSTM(rnn_units):
    return tf.keras.layers.LSTM(
        rnn_units,
        return_sequences=True,
        recurrent_initializer='glorot_uniform',
        recurrent_activation='sigmoid',
        stateful=True
    )

In [28]:
def build_model(rnn_units, batch_zize):
    return tf.keras.Sequential([
        LSTM(rnn_units),
        tf.keras.layers.Dense()
    ])


In [29]:
def compute_loss(labels, logits):
    return tf.keras.losses.categorical_crossentropy(labels, logits, from_logits=True)

In [30]:
model = build_model(rnn_units, batch_zize)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        y_hat = model(x)
        loss = compute_loss(y, y_hat)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

NameError: name 'rnn_units' is not defined

In [None]:
predication_days = 60


In [None]:
p = [0.036, 0.021]