In [1]:
# 用keras API 搭建任意的网络模型，如：
# 1. Sequential model
# 2. A multi-input model
# 3. A multi-output(or multi-head) model
# 4. An Inception module

# 一、Introduction to the functional API

## 1. Calling layers as function in the function API

In [None]:
from keras import Inputnput, layers

# This is a tensor
input_tensor = Input(shape=(32,))

# A layer is a function
dense = layers.Dense(32, activation='relu')

# A layer may be called on a tensor, and it returns a tensor
output_tensor = dense(input_tensor)

## 2. The functional API equivalent to a Sequential model

In [None]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

# A Sequential model
seq_model = Sequential()
seq_model.add(layers.Dense(32, activation = 'relu', input_shape = (64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

# Its functional equivalent
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)

# The Model class turns an input tensor and output tensor into a model
model = Model(input_tensor, output_tensor)

# summary
model.summary()

# 二、 Multi-input models

## 1. Functional API implementation of a two-input question-answering model

In [None]:
from keras.models import Model
from keras import layers
from keras import Input

text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

# our text input is a variable-length sequences of integers
# Note that we can optionally name our inputs
text_input = Input(shape=(None), dtype = 'int32', name = 'text')

# Which we embed into a sequence of vectors of size
embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)

# Which we encoded in a single vector via a LSTM
encoded_text = layers.LSTM(32)(embedded_text)

# Same process (with defferent layer instances) for the question
question_input = Input(shape=(None,), dtype = 'int32', name = 'question')
embeded_question = layers.Embedding(32, question_vocabulary_size)(question_input)
encoded_question = layers.LSTM(16)(embeded_question)

# We then  concatenate the encoded question and encoded text
concatenated = layers.concatenate([encoded_text, encoded_question], axis = -1)

# And we add a softmax classifier on top
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)

# At model instantiation, we specify the two inputs and the output
model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop',
             loss= 'categorical_crossentropy',
             metircs = ['acc'])

## 2. Feeding data to a multi-input model

In [None]:
import numpy as np

# generate some dummy Numpy data
text = np.random.randint(1, text_vocabulary_size, size = (num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size, size = (num_samples, max_length))

# Answers are ont-hot encoded, not integers
answers = np.random.randint(0, 1, size = (num_samples, answer_vocabulary_size))

# Fitting using a dircetionary of inputs(only if inputs were named)
model.fit({'text' : text, 'question': question}, answers, epochs=10, batch_size=128)

# 三、Multi-output models

## 1. Functional API inplementation of a three-output model

In [3]:
from keras import layers
from keras import Input
from keras.models impor Model

vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape(None,), dtype = 'int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)

x = layers.Conv1D(128, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation = 'relu')(x)
x = layers.Conv1D(256, 5, activation = 'relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation = 'relu')(x)
x = layers.Conv1D(256, 5, activation = 'relu')(x)
x = layers.GlobalMaxPool1D(x)
x = layers.Dense(128, activation='relu')(x)

# Note that we are giving names to the output layers
age_prediction = layers.Dense(1, name = 'age')(x)
income_prediction = layers.Dense(num_income_groups, activation='softmax', name = 'income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name = 'gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

## 2. Compilation options of a multi-output model: multiple losses

In [None]:
model.compile(optimizer='rmsprop',
             loss= ['mse', 'categorical_crossentropy', 'binary_crossentropy'])

# Equivalent (only possible if you gave names to the output layers)
model.compile(optimizer='rmsprop',
             loss= {'age' : 'mse',
                   'income' : 'categorical_crossentropy',
                   'gender' : 'binary_crossentropy'})

## 3. Compilation options of a multi-output model: loss weighting

In [None]:
model.compile(optimizer='rmsprop',
             loss= ['mse', 'categorical_crossentropy', 'binary_crossentropy'],
             loss_weights = [0.25, 1., 10.])

# Equivalent (only possible if you gave names to the output layers)
model.compile(optimizer='rmsprop',
             loss= {'age' : 'mse',
                   'income' : 'categorical_crossentropy',
                   'gender' : 'binary_crossentropy'},
             loss_weights= {'age' : 0.25,
                   'income' : 1.,
                   'gender' : 10.})

## 4. Feeding data to a multi-output model

In [None]:
# age_targets, income_targets and gender_targets are assumed to be Numpy arrays
model.fit(posts, [age_targets, income_targets, gender_targets],
         epochs = 10, batch_size = 64)

# Equivalent (only possible if you gave names to the output layers)
model.fit(posts, {'age' : age_targets,
                 'income' : income_targets,
                 'gender' : gender_targets},
         epochs = 10, batch_size = 64)

# 四、Directed acyclic graphs of layers

##  1. Implementing an inception module with the function API

In [None]:
from keras import layers

# We assume the existence of a 4D input tensor 'x'

# Every branch has the same stride value(2), which is necessary to keep all
# branch outputs the same size, so as to be able to concatenate them
branch_a = layers.Conv2D(128, 1, activation = 'relu', strides = 2)(x)

# In this branch, the striding occurs in the spatial convolution layer
branch_b = layers.Conv2D(128, 1, activation = 'relu')(x)
branch_b = layers.Conv2D(128, 1, activation = 'relu', strides = 2)(branch_b)

# In this branch, the striding occurs in the average pooling layer
branch_c = layers.AveragePooling2D(3, strides = 2, activation = 'relu')(x)
branch_c = layers.Conv2D(128, 3, activation = 'relu')(branch_c)

branch_d = layers.Conv2D(128, 1, activation = 'relu')(x)
branch_d = layers.Conv2D(128, 3, activation = 'relu')(branch_d)
branch_d = layers.Conv2D(128, 3, activation = 'relu', strides = 2)(branch_d)

# Finally, we concatenate the branch outputs to obtain the module output
output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=-1)

## 2. Implementing a residual connection when feature map size are the same : using identity residual connections

In [None]:
from keras import layers

# We assume the existence of 1 4D input tensor 'x'
x = ''

# we apply some transformation to 'x'
y = layers.Conv2D(128, 3, activation = 'relu')(x)
y = layers.Conv2D(128, 3, activation = 'relu')(y)
y = layers.Conv2D(128, 3, activation = 'relu')(y)

# We add the original 'x' back to the output features
y = layers.add([y, x])

## 3. Implementing a residual connection when feature map sizes differ: using a linear residual connection

In [None]:
from keras import layers

# We assume the existence of 1 4D input tensor 'x'
x = ''

# we apply some transformation to 'x'
y = layers.Conv2D(128, 3, activation = 'relu')(x)
y = layers.Conv2D(128, 3, activation = 'relu')(y)
y = layers.MaxPooling1D(2, strides = 2)(y)

# We use a 1*1 convolution to linearly downsampls
# the original 'x' tensor to the same shape as 'y'
residual = layers.Conv2D(1, strides = 2)(x)

# We add the original 'x' back to the output features
y = layers.add([y, residual])

In [None]:
# p228
# Layer weight sharing
# Model as layers
# TensorBoard