# Keras Functional API basics

## Functional Equivalent of Sequential model

In [1]:
from keras import models, layers, Input

# Sequential model
seq_model = models.Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

seq_model.summary()

Using TensorFlow backend.


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [2]:
# Functional Equivalent
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)

func_model = models.Model(input_tensor, output_tensor)
func_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_6 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


## Multi-Input Model

In [4]:
## Example two-input question answering model
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

# 1st input
text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input)
encoded_text = layers.LSTM(32)(embedded_text)

# 2nd input
question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(question_vocabulary_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

# Merge input branches
concatenated = layers.concatenate([encoded_text, encoded_question],
                                 axis=1)

# Output layers
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated) # one word answer

model = models.Model([text_input, question_input], answer)

model.compile(
    optimizer='rmsprop',
    loss='categorical_crossentropy',
    metrics=['acc']
)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, None)         0                                            
__________________________________________________________________________________________________
question (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 64)     640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, None, 32)     320000      question[0][0]                   
____________________________________________________________________________________________

In [14]:
# Fit arbitrary data
import numpy as np

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulary_size,
                        size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size,
                            size=(num_samples, max_length))
answers = np.random.randint(0, 1,
                           size=(num_samples, answer_vocabulary_size))
# one hot encode
for a in answers:
    idx = np.random.randint(0, len(a))
    a[idx] = 1

model.fit(
    [text, question],
    answers,
    epochs=2,
    batch_size=128
)

Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x22f6728b3c8>

## Multi-Output Model

In [23]:
## Example three-output model - Predict age,income,gender based on social media text posts
vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(vocabulary_size, 256)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(2)(x)
x = layers.Conv1D(256, 2, activation='relu')(x)
x = layers.Conv1D(256, 2, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

# Brach off separate outputs
age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = models.Model(posts_input,
                    [age_prediction, income_prediction, gender_prediction])

model.compile(
    optimizer='rmsprop',
    loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'], #sums all losses into one
    loss_weights=[0.25, 1, 10] #Optional loss weights
)

model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
posts (InputLayer)              (None, None)         0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, None, 256)    12800000    posts[0][0]                      
__________________________________________________________________________________________________
conv1d_16 (Conv1D)              (None, None, 128)    163968      embedding_7[0][0]                
__________________________________________________________________________________________________
max_pooling1d_7 (MaxPooling1D)  (None, None, 128)    0           conv1d_16[0][0]                  
____________________________________________________________________________________________

In [24]:
# Fit arbitrary data
num_samples = 1000
max_length = 100
max_age = 100

posts = np.random.randint(1, vocabulary_size,
                          size=(num_samples, max_length))
age_targets = np.random.randint(1, max_age+1,
                               size=(num_samples, 1))
gender_targets = np.random.randint(0, 2,
                                   size=(num_samples, 1))
income_targets = np.random.randint(0, 1,
                                  size=(num_samples, num_income_groups))

# one hot encode
for t in income_targets:
    idx = np.random.randint(0, len(t))
    t[idx] = 1

model.fit(
    posts,
    [age_targets, income_targets, gender_targets],
    epochs=3,
    batch_size=64
)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x22f8188d8d0>

## Directed Acyclic Graph Architectures

In [36]:
# Inception V3 architecture
x = Input(shape=(255, 255, 3), dtype='float32')

branch_a = layers.Conv2D(128, 1, activation='relu', strides=2)(x)

branch_b = layers.Conv2D(128, 1, activation='relu')(x)
branch_b = layers.Conv2D(128, 3, activation='relu', strides=2, padding='same')(branch_b)

branch_c = layers.AveragePooling2D(3, strides=2, padding='same')(x)
branch_c = layers.Conv2D(128, 3, activation='relu', padding='same')(branch_c)

branch_d = layers.Conv2D(128, 1, activation='relu')(x)
branch_d = layers.Conv2D(128, 3, activation='relu', padding='same')(branch_d)
branch_d = layers.Conv2D(128, 3, activation='relu', strides=2, padding='same')(branch_d)

output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=-1)

model = models.Model(x, output)
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_13 (InputLayer)           (None, 255, 255, 3)  0                                            
__________________________________________________________________________________________________
conv2d_55 (Conv2D)              (None, 255, 255, 128 512         input_13[0][0]                   
__________________________________________________________________________________________________
conv2d_52 (Conv2D)              (None, 255, 255, 128 512         input_13[0][0]                   
__________________________________________________________________________________________________
average_pooling2d_8 (AveragePoo (None, 128, 128, 3)  0           input_13[0][0]                   
____________________________________________________________________________________________