In [None]:
# 使用keras函数式API定义模型
from keras.models import Model
from keras.layers import Dense

input_tensor = Input(shape=(64,))
x = Dense(32, activation='relu')(input_tensor)
x = Dense(32, activation='relu')(x)
output_tensor = Dense(10, activation='softmax')(x)

model = Model(input_tensor, output_tensor)
model.summary()

In [None]:
# 定义一个多输入模型(QA model)
from keras.models import Model
from keras import layers
from keras import Input

text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

text_input = Input(shape=(None,), dtype='int32', name='text')
embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input)
encoded_text = layers.LSTM(32)(embedded_text)

question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(
    question_vocabulary_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1)
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)

model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['acc'])

有两个可用的API可以用来训练上面的双输入模型:
- 向模型输入一个由Numpy数组组成的列表
- 输入一个将输入名称映射为Numpy数组的字典

In [None]:
import numpy as np
import keras

num_samples = 1000
max_length = 100

# 随机生成输入文本和问题数据
text = np.random.randint(1, text_vocabulary_size,
                         size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size,
                             size=(num_samples, max_length))

answers = np.random.randint(answer_vocabulary_size, size=(num_samples))
answers = keras.utils.to_categorical(answers, answer_vocabulary_size)

model.fit([text, question], answers, epochs=10, batch_size=128)
# model.fit(('text': text, 'question': question), answers, epochs=10, batch_size=128)

In [None]:
# 定义一个多输出模型
from keras import layers, Input
from keras.models import Model

vocabulary_size = 5000
num_income_group = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_group,
                                 activation='softmax',
                                 name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

 多头(多输出)模型需要为各个头指定不同的损失函数
 梯度下降是将一个标量最小化, 所以为了能够训练模型, 必须将多个损失合并为一个标量
 最简单的合并方法就是求和
 keras中可以在编译时使用损失组成的列表或字典来为不同的输出指定不同的损失, 然后将得到的损失相加得到一个全局损失, 并在训练过程中将这个损失最小化

In [None]:
# 编译模型方法一
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])
# 编译模型方法二
model.compile(optimizer='rmsprop',
              loss=('age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'))

严重不平衡的损失贡献会导致模型表示针对单个损失值最大的任务进行优先优化, 而不考虑其它任务的优化. 针对这一点, 可以为每个损失值对最终损失的贡献分配不同大小的重要性. 
比如, 用于年龄回归任务的均方误差(MSE)损失值约为3-5, 用于性别分类任务的交叉熵损失值可能低至0.1. 为了平衡不同损失的贡献, 可以让交叉熵损失的权重为10, MSE损失为0.5
上面的编译方法更新为

In [None]:
# 编译模型方法一
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1., 10.])
# 编译模型方法二
model.compile(optimizer='rmsprop',
              loss=('age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'),
              loss_weights={'age': 0.25,
                            'income': 1.,
                            'gender': 10.})

In [None]:
# 和多输入模型相同, 多输出模型的训练输入数据可以是Numpy数组组成的列表和字典.
# 训练方式一
model.fit(posts, [age_targets, income_targets, gender_targets],
          epochs=10, batch_size=64)
# 训练方式二
model.fit(posts, {'age': age_targets,
                  'income': income_targets,
                  'gender': gender_targets},
          epochs=10, batch_size=64)

层组成的有向无环图:
- Inception模块  
  1x1卷积的作用: 有助于区分通道特征学习和空间特征学习. 如果假设每个通道在跨越空间时是高度自相关的, 但不同的通道之间可能并不高度相关, 按么使用1x1卷积是合理的.
- 残差连接(residual connection)  
  让前面某层的输出作为后面某层的输入, 从而在序列网络中有效的创造了一条捷径. 若前面层和后面层的两个激活形状相同, 则将前面层的输出直接与后面层的激活相加; 否则, 用一个线性变换将前面层的激活改变成目标形状.
  解决了大规模深度学习模型的两个共性问题:梯度消失和表示瓶颈. 向任何多于10层的模型中添加残差连接, 都可能会有帮助.

In [None]:
# 残差连接的实例
from keras import layers

x = ...
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.MaxPooling2D(2, strides=2)(y)

residual = layers.Conv2D(128, 1, strides=2, padding='same')(x)

y = layers.add([y, residual])

#### 共享层权重:
函数式API能够多次重复使用一个层的实例. 对一个层实例调用两次, 那么每次调用可以重复使用相同的权重. 构建具有共享分支的模型.  
#### 将模型作为层:
函数式API中可以像使用层一样使用模型. 可以将模型看作"更大的层"

In [None]:
from keras import layers
from keras import Input
from keras.models import Model

# 实例化LSTM层, left_input和right_input共用该层, 共享权重
lstm = layers.LSTM(32)

left_input = Input(shape=(None, 128))
left_output = lstm(left_input)

right_input = Input(shape=(None, 128))
right_output = lstm(right_input)

merged = layers.concatenate([left_output, right_output], axis=-1)
predictions = layers.Dense(1, activation='sigmoid')(merged)

model = Model([left_input, right_input], predictions)
model.fit([left_data, right_data], targets)

In [None]:
from keras import layers
from keras import applications
from keras import Input

# 将图像处理基础模型作为一层来使用, 并让left_input和right_input共享该层权重
xception_base = applications.Xception(weights=None,
                                      include_top=False)

left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

left_features = xception_base(left_input)
right_features = xception_base(right_input)

merged_features = layers.concatenate(
    [left_features, right_features], axis=-1)