In [2]:
from tensorflow.keras import layers, Input

In [4]:
input_tensor = Input(shape=(32,))

In [5]:
dense = layers.Dense(32, activation='relu')

In [6]:
output_tensor = dense(input_tensor)

In [7]:
type(output_tensor)

tensorflow.python.framework.ops.Tensor

In [8]:
type(input_tensor)

tensorflow.python.framework.ops.Tensor

In [9]:
from tensorflow.keras.models import Sequential, Model


In [10]:
seq = Sequential()

In [11]:
seq.add(layers.Dense(32, activation='relu', input_shape=(32,)))

In [12]:
seq.add(layers.Dense(32, activation='relu'))

In [13]:
seq.add(layers.Dense(1, activation='sigmoid'))

In [14]:
seq.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 2,145
Trainable params: 2,145
Non-trainable params: 0
_________________________________________________________________


In [15]:
Input_tensor = Input(shape=(32,))

In [17]:
x = layers.Dense(32, activation='relu')(Input_tensor)

In [20]:
x = layers.Dense(32, activation='relu')(x)

In [24]:
output_tensor = layers.Dense(1, activation='sigmoid')(x)

In [25]:
model_fn = Model(Input_tensor, output_tensor) 

In [26]:
model_fn.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 32)]              0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 33        
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 2         
Total params: 2,147
Trainable params: 2,147
Non-trainable params: 0
_________________________________________________________________


# Multi-input models

A typical question-answering model has two inputs: a natural-language question and a text snippet (such as a news article) providing information to be used for answering the question. The model must then produce an answer: in the simplest possible setup, this is a one-word answer obtained via a softmax over some predefined vocabulary

<img src='img/multi-model.jpg'>


In [29]:
from tensorflow.keras import layers, Input
from tensorflow.keras.models import Model

In [30]:
text_vocab_size = 10000
question_vocab_size = 10000
anser_vocab_size = 500


In [31]:
text_input = Input(shape=(None,), dtype='int32', name='text')

In [33]:
embedded_text = layers.Embedding(64, text_vocab_size)(text_input)

In [34]:
lstm_text = layers.LSTM(32)(embedded_text)

In [35]:
question_input = Input(shape=(None,), dtype='int32', name='question')

In [36]:
embedded_question = layers.Embedding(64, question_vocab_size)(question_input)

In [37]:
lstm_question = layers.LSTM(32)(embedded_question)

In [40]:
concatenated = layers.concatenate([lstm_text, lstm_question], axis=-1)

In [41]:
answer = layers.Dense(anser_vocab_size, activation='softmax')(concatenated)

In [42]:
multi_model = Model([text_input, question_input], answer)

In [43]:
multi_model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None, None)]       0                                            
__________________________________________________________________________________________________
question (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 10000)  640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 10000)  640000      question[0][0]                   
_______________________________________________________________________________________

In [45]:
multi_model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['acc'])

In [49]:
import numpy as np

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocab_size,
                         size=(num_samples, max_length))
question = np.random.randint(1, question_vocab_size,
                             size=(num_samples, max_length))
answers = np.random.randint(0, 1,
                            size=(num_samples, anser_vocab_size))

#multi_model.fit([text, question], answers, epochs=10, batch_size=128)

#multi_model.fit({'text': text, 'question': question}, answers,          epochs=10, batch_size=128)




# 7.1.3. Multi-output models
In the same way, you can use the functional API to build models with multiple outputs (or multiple heads). A simple example is a network that attempts to simultaneously predict different properties of the data, such as a network that takes as input a series of social media posts from a single anonymous person and tries to predict attributes of that person, such as age, gender, and income level

<img src='img/multi-outout.png'>

In [54]:

vocabulary_size = 50000
num_income_groups = 10

post_input = Input(shape=(None,), dtype='int32', name='post_in')

embedding_post = layers.Embedding(256, vocabulary_size)(post_input)

x = layers.Conv1D(128, 5, activation='relu')(embedding_post)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)


In [55]:
age_prediction = layers.Dense(1, name='age')(x)

In [56]:
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)

In [59]:
gender_prediction = layers.Dense(1, activation='sigmoid',name='gender')(x)

In [61]:
model = Model(post_input,
              [age_prediction, income_prediction, gender_prediction])

In [62]:
model.summary()

Model: "functional_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
post_in (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, None, 50000)  12800000    post_in[0][0]                    
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, None, 128)    32000128    embedding_5[0][0]                
__________________________________________________________________________________________________
max_pooling1d_2 (MaxPooling1D)  (None, None, 128)    0           conv1d_6[0][0]                   
_______________________________________________________________________________________

Importantly, training such a model requires the ability to specify different loss functions for different heads of the network: for instance, age prediction is a scalar regression task, but gender prediction is a binary classification task, requiring a different training procedure. But because gradient descent requires you to minimize a scalar, you must combine these losses into a single value in order to train the model. The simplest way to combine different losses is to sum them all. In Keras, you can use either a list or a dictionary of losses in compile to specify different objects for different outputs; the resulting loss values are summed into a global loss, which is minimized during trainin

In [63]:
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])

model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'})


Note that very imbalanced loss contributions will cause the model representations to be optimized preferentially for the task with the largest individual loss, at the expense of the other tasks. To remedy this, you can assign different levels of importance to the loss values in their contribution to the final loss. This is useful in particular if the losses’ values use different scales. For instance, the mean squared error (MSE) loss used for the age-regression task typically takes a value around 3–5, whereas the cross-entropy loss used for the gender-classification task can be as low as 0.1. In such a situation, to balance the contribution of the different losses, you can assign a weight of 10 to the crossentropy loss and a weight of 0.25 to the MSE loss.



In [64]:
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1., 10.])

model.compile(optimizer='rmsprop',
              loss={'age': 'mse',
                    'income': 'categorical_crossentropy',
                    'gender': 'binary_crossentropy'},
              loss_weights={'age': 0.25,
                            'income': 1.,
                            'gender': 10.})


Much as in the case of multi-input models, you can pass Numpy data to the model for training either via a list of arrays or via a dictionary of arrays.

In [65]:
model.fit(posts, [age_targets, income_targets, gender_targets],
          epochs=10, batch_size=64)

model.fit(posts, {'age': age_targets,
                  'income': income_targets,
                  'gender': gender_targets},
          epochs=10, batch_size=64)


NameError: name 'posts' is not defined

# Directed acyclic graphs of layers
