In [1]:
from keras import models, layers
from keras import Input

posts_vocabulary_size = 10000
# Classify into 10 income groups.
n_income_groups = 10

posts_input = Input(shape=(None,),
                    dtype='int32',
                    name='posts')
embedded_posts = layers.Embedding(input_dim=posts_vocabulary_size,
                                  output_dim=256)(posts_input)
x = layers.Conv1D(filters=128,
                  kernel_size=5,
                  activation='relu')(embedded_posts)
x = layers.MaxPooling1D(pool_size=3)(x)
x = layers.Conv1D(filters=256,
                  kernel_size=5,
                  activation='relu')(x)
x = layers.MaxPooling1D(pool_size=3)(x)
x = layers.Conv1D(filters=256,
                  kernel_size=5,
                  activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(units=128, activation='relu')(x)

Using TensorFlow backend.


In [2]:
# Regression
age_prediction = layers.Dense(units=1, name='age')(x)
# Multi-classifier
income_prediction = layers.Dense(units=10,
                                activation='softmax',
                                name='income')(x)
# Binary-classifier
gender_prediction = layers.Dense(units=2,
                                 activation='sigmoid',
                                 name='gender')(x)

In [3]:
model = models.Model(posts_input,
                     [age_prediction, income_prediction, gender_prediction])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
posts (InputLayer)              (None, None)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 256)    2560000     posts[0][0]                      
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, None, 128)    163968      embedding_1[0][0]                
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D)  (None, None, 128)    0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
conv1d_2 (

In [4]:
# MSE, loss function for regression, return a loss about 3-5.
# Crossentropy, loss function for classfier, return a loss about 0.1.
# Model will be partial fitting to regression features with large loss.

# Loss scaling keep from partial fitting.
# Scale factor for MSE is 0.25, for crossentropy is 10.

model.compile(optimizer='rmsprop',
              loss={'age':'mse',
                    'income':'categorical_crossentropy',
                    'gender':'binary_crossentropy'},
              loss_weights={'age':0.25,
                           'income':1.,
                           'gender':10.})

In [5]:
import numpy as np
from keras.utils import to_categorical

n_samples = 25000
maxlen = 140

train_posts = np.random.randint(low=1, high=posts_vocabulary_size,
                                size=(n_samples, maxlen))
target_age = np.random.randint(low=0, high=99,
                              size=n_samples)
target_income = np.random.randint(low=0, high=10, size=n_samples)
target_gender = np.random.randint(low=0, high=2, size=n_samples)

target_income = to_categorical(target_income)
target_gender = to_categorical(target_gender)

print(train_posts.shape)
print(target_age.shape)
print(target_income.shape)
print(target_gender.shape)

(25000, 140)
(25000,)
(25000, 10)
(25000, 2)


In [6]:
model.fit(train_posts,
          {'age':target_age,
           'income':target_income,
           'gender':target_gender},
          epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fee3b726b38>