In [1]:
from sklearn.datasets import make_classification

In [2]:
base_dataset = make_classification(
    n_samples=10_000, 
    n_features=30, 
    n_informative=10,
    n_clusters_per_class=2,
    n_classes=4)

x, y = base_dataset

In [3]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()

x_standardized = ss.fit_transform(x)

In [4]:
base_classes = []

for i in range(4):
    base_classes.append(x_standardized[y == i])

In [5]:
import numpy as np

num_points = 5_000
class1_dist = np.array([.5, .5, 0, 0])
class2_dist = np.array([0, .2, .6, .2])

def make_var_len_feature_point(dist):
    sequence_dist = dist.copy()
    
    feature_sets = []
    previous_feature_set = np.zeros((1, 30))
    num_features = np.random.randint(3, 11)
    for i in range(num_features):
        # choose which distribution the transaction comes from
        base_class = np.random.choice([0, 1, 2, 3], 1, p=sequence_dist)
        base_class_points = base_classes[base_class[0]]
        feature_set_idx = np.random.choice(base_class_points.shape[0], 1)
        previous_feature_set += base_class_points[feature_set_idx]
        feature_sets.append(previous_feature_set)
        
        # now make it more likely to come from the same dist
        dist_update = np.zeros([4]); dist_update[base_class] = 1
        sequence_dist += dist_update
        sequence_dist = sequence_dist / sequence_dist.sum()

        
    for _ in range(10 - num_features):
        feature_sets.append(np.zeros((1, 30)))

    return np.concatenate(feature_sets)[np.newaxis, :, :]


class1_points = []
for _ in range(num_points):
    class1_points.append(
        make_var_len_feature_point(class1_dist))
class1_points = np.concatenate(class1_points)
    
class2_points = []
for _ in range(num_points):
    class2_points.append(
        make_var_len_feature_point(class2_dist))
class2_points = np.concatenate(class2_points)

In [6]:
class2_points.shape

(5000, 10, 30)

In [7]:
def bootstrap_sample_generator(batch_size):
    while True:
        batch_idx = np.random.choice(
            class1_points.shape[0], batch_size // 2)
        batch_x = np.concatenate([
            class1_points[batch_idx],
            class2_points[batch_idx],
        ])
        batch_y = np.concatenate([
            np.zeros(batch_size // 2),
            np.ones(batch_size // 2),
        ])
        yield ({'numeric_inputs': batch_x}, 
               {'output': batch_y})

In [8]:
import tensorflow as tf

p = .1

In [9]:
inputs = tf.keras.layers.Input((10, 30), name='numeric_inputs')

In [10]:
x = tf.keras.layers.Dropout(p)(inputs)
x = tf.keras.layers.Bidirectional(
    tf.keras.layers.GRU(10))(x)

x = tf.keras.layers.BatchNormalization()(x)

In [11]:
# bonus
x = tf.keras.layers.RepeatVector(10)(x)
x = tf.keras.layers.Concatenate()([inputs, x])

x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Bidirectional(
    tf.keras.layers.GRU(10))(x)

x = tf.keras.layers.BatchNormalization()(x)

In [12]:
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(100, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(20, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)

x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Dropout(p)(x)
out = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x)

In [13]:
model = tf.keras.models.Model(inputs=inputs, outputs=out)
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [14]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
numeric_inputs (InputLayer)     [(None, 10, 30)]     0                                            
__________________________________________________________________________________________________
dropout (Dropout)               (None, 10, 30)       0           numeric_inputs[0][0]             
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 20)           2520        dropout[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 20)           80          bidirectional[0][0]              
_______________________________________________________________________________________

In [15]:
batch_size = 32

model.fit_generator(
    bootstrap_sample_generator(batch_size),
    steps_per_epoch=10_000 // batch_size,
    epochs=5,
    max_queue_size=10,
)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fd5c92af6d0>