In [1]:
import tensorflow as tf

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Nadam

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

# TensorFlow and tf.keras

from tensorflow import keras

# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from tqdm.notebook import tqdm

print(tf.__version__)

2.4.1


In [4]:
# A = tf.constant([[3, 2], 
#                  [5, 2]])
# B = tf.constant([[9, 5], 
#                  [1, 3]])

In [5]:
# tf.multiply(A, B) # поэлементно!

In [6]:
# tf.matmul(A, B) # матричное умножение

#### import dataset

In [7]:
boston_dataset = tf.keras.datasets.boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_dataset.load_data(test_split = 0.2, seed = 42)

In [8]:
print(f'Training data : {train_data.shape}')
print(f'Test data : {test_data.shape}')
print(f'Training sample : {train_data[0]}')
print(f'Training target sample : {train_targets[0]}')

Training data : (404, 13)
Test data : (102, 13)
Training sample : [9.1780e-02 0.0000e+00 4.0500e+00 0.0000e+00 5.1000e-01 6.4160e+00
 8.4100e+01 2.6463e+00 5.0000e+00 2.9600e+02 1.6600e+01 3.9550e+02
 9.0400e+00]
Training target sample : 23.6


In [9]:
# feature normalization
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

### model

In [10]:
def create_model(layers, neurons, activator, optimizer, loss, metric):
    model = Sequential()
    model.add(Dense(neurons, activation = activator, input_shape=(train_data.shape[1],)))
    for k in range(layers - 2):
        model.add(Dense(neurons * (10-k) * 0.06, activation = activator, input_shape=(784,)))
    model.add(Dense(1))

    model.compile(optimizer = optimizer,
              loss = loss,
              metrics = [metric])
    return model

In [11]:
def get_score(layers, neurons, activator, optimizer, loss, metric):
    train_scores, test_scores = [], []
    for i in range(15):
        model = create_model(layers, neurons, activator, optimizer, loss, metric)
        model.fit(train_data,
                      train_targets,
                      epochs=100,
                      batch_size=100,
                      verbose=0)
        train_mse, train_mae = model.evaluate(train_data, train_targets, verbose=0)
        test_mse, test_mae = model.evaluate(test_data, test_targets, verbose=0)
        train_scores.append(train_mae)
        test_scores.append(test_mae)
    test_score = np.mean(test_scores)
    train_score = np.mean(train_scores)
    return train_score, test_score

In [12]:
get_score(3, 64, 'relu', 'rmsprop', 'mse', 'mae')

(2.3133161703745526, 2.7035226662953695)

### model params

#### neurons

In [13]:
%%time
res_n = {}
for n_num in [16, 32, 64, 128, 256, 512]:
    train_score, test_score = get_score(3, n_num, 'relu', 'rmsprop', 'mse', 'mae')
    res_n['3_' + str(n_num) + '_relu' + '_rmsprop'] = [train_score, test_score]

Wall time: 1min 18s


In [14]:
pd.DataFrame.from_dict(res_n, orient='index', columns = ['mae_train', 'mae_test'])

Unnamed: 0,mae_train,mae_test
3_16_relu_rmsprop,4.178839,4.258955
3_32_relu_rmsprop,2.826152,3.114621
3_64_relu_rmsprop,2.29943,2.740141
3_128_relu_rmsprop,2.137596,2.64623
3_256_relu_rmsprop,2.314604,2.869233
3_512_relu_rmsprop,2.141694,2.837103


С увеличением количества нейронов MAE сначала снижается, но после 128 нейронов начинает расти. В некоторых случаях после 128 нейронов MAE на трейне продолжает снижаться, но на тесте растет (проверено несколькими перезапусками). Также с увеличением количества нейронов разрыв между метрикой на трейне и тесте увеличивается

#### layers

In [15]:
%%time
res_l = {}
for l in [2, 3, 5, 10]:
    train_score, test_score = get_score(l, 128, 'relu', 'rmsprop', 'mse', 'mae')
    res_l[str(l) + '_128' + '_relu' + '_rmsprop'] = [train_score, test_score]

Wall time: 57.6 s


In [16]:
pd.DataFrame.from_dict(res_l, orient='index', columns = ['mae_train', 'mae_test'])

Unnamed: 0,mae_train,mae_test
2_128_relu_rmsprop,3.058808,3.440689
3_128_relu_rmsprop,2.144337,2.632035
5_128_relu_rmsprop,2.217191,2.807581
10_128_relu_rmsprop,2.735339,3.328983


Уменьшение (впрочем, как и увеличение) количества слоев приводит к росту ошибки. Оставляем 3 слоя и 128 нейронов

#### activation

In [17]:
%%time
res_a = {}
for f in ['relu', 'elu', 'exponential']:
    train_score, test_score = get_score(3, 128, f, 'rmsprop', 'mse', 'mae')
    res_a['3' + '_128_' + str(f) + '_rmsprop'] = [train_score, test_score]

Wall time: 35.8 s


In [18]:
pd.DataFrame.from_dict(res_a, orient='index', columns = ['mae_train', 'mae_test'])

Unnamed: 0,mae_train,mae_test
3_128_relu_rmsprop,2.179984,2.662947
3_128_elu_rmsprop,2.615568,2.867502
3_128_exponential_rmsprop,2.928225,3.316731


С функцией активации RELU ошибка меньше

#### optimizer

In [19]:
%%time
res_o = {}
for opt in ['rmsprop', 'adam', 'nadam']:
    train_score, test_score = get_score(3, 128, 'relu', opt, 'mse', 'mae')
    res_o['3' + '_128' + '_relu_' + opt] = [train_score, test_score]

Wall time: 37.4 s


In [20]:
pd.DataFrame.from_dict(res_o, orient='index', columns = ['mae_train', 'mae_test'])

Unnamed: 0,mae_train,mae_test
3_128_relu_rmsprop,2.157922,2.635941
3_128_relu_adam,2.310746,2.746766
3_128_relu_nadam,2.304336,2.77082


При нескольких перезапусках лучшие результаты получаем c оптимизатором RMSprop. В итоге лучшей оказалась модель 3/128/relu/rmsprop

In [21]:
%%time
res_o = {}
for opt in ['rmsprop', 'adam', 'nadam']:
    train_score, test_score = get_score(5, 256, 'relu', opt, 'mse', 'mae')
    res_o['5' + '_256' + '_relu_' + opt] = [train_score, test_score]

Wall time: 55.5 s


In [22]:
pd.DataFrame.from_dict(res_o, orient='index', columns = ['mae_train', 'mae_test'])

Unnamed: 0,mae_train,mae_test
5_256_relu_rmsprop,2.693233,3.27379
5_256_relu_adam,1.746313,2.495076
5_256_relu_nadam,1.84555,2.600522


Очень интересным оказался факт, что при изменении числа нейронов и слоев на значения, при которых ранее стабильно получали более низкий результат, мы получили меньшую ошибку как на трейне, так и на тесте.

#### gridsearch

In [23]:
# params = {
#     'neurons_num' : [16, 32, 64, 128, 256, 512],
#     'layers' : [2, 3, 5, 10],
#     'activator' : ['relu', 'elu', 'exponential'],
#     'optimizer' : ['rmsprop', 'adam', 'nadam'],
# }

In [24]:
params = {
    'neurons_num' : [64, 128, 256],
    'layers' : [3, 5],
    'activator' : ['relu', 'elu'],
    'optimizer' : ['rmsprop', 'adam'],
}

In [25]:
%%time
res_gridsearch = {}
with tqdm(total=len(list(itertools.product(params['neurons_num'], params['layers'], params['activator'], params['optimizer'])))) as pbar:
    for n, l, f, opt in itertools.product(params['neurons_num'], params['layers'], params['activator'], params['optimizer']):
        train_score, test_score = get_score(l, n, f, opt, 'mse', 'mae')
        res_gridsearch[str(n) + '_' + str(l) + '_' + str(f) + '_' + str(opt)] = [train_score, test_score]
        pbar.update(1)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))


Wall time: 5min 39s


In [26]:
pd.DataFrame.from_dict(res_gridsearch, orient='index', columns = ['mae_train', 'mae_test']).sort_values(by = 'mae_test').head(5)

Unnamed: 0,mae_train,mae_test
256_5_relu_adam,1.728091,2.507011
128_5_relu_adam,1.969093,2.608755
256_5_elu_adam,2.192612,2.634629
64_5_elu_adam,2.399467,2.635143
128_5_elu_adam,2.176619,2.636891


И мы получили результат, совершенно отличный от того, к которому пришли, последовательно подбирая параметры. 3/128/relu/adam даже отсутствует в топ-5

#### loss function / metric

In [27]:
%%time
res_gridsearch_l = {}
with tqdm(total=len(list(itertools.product(params['neurons_num'], params['layers'], params['activator'], params['optimizer'])))) as pbar:
    for n, l, f, opt in itertools.product(params['neurons_num'], params['layers'], params['activator'], params['optimizer']):
        train_score, test_score = get_score(l, n, f, opt, 'log_cosh', 'mse')
        res_gridsearch_l[str(l) + '_' + str(n) + '_' + str(f) + '_' + str(opt)] = [train_score, test_score]
        pbar.update(1)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))


Wall time: 5min 39s


In [28]:
pd.DataFrame.from_dict(res_gridsearch_l, orient='index', columns = ['mse_train', 'mse_test']).sort_values(by = 'mse_test').head(5)

Unnamed: 0,mse_train,mse_test
5_256_relu_adam,7.582439,12.269112
5_128_relu_adam,9.211209,14.067571
5_256_elu_adam,11.217912,14.649683
5_128_elu_adam,11.978361,15.014632
3_256_relu_adam,11.038289,16.038416


При смене функции потерь и метрики лидирует тот же набор параметров, что мы получили на предыдущем шаге, а вот остальные немного отличаются

### predictions

In [29]:
final_model = create_model(5, 256, 'relu', 'adam', 'mse', 'mae')
final_model.fit(train_data, train_targets, epochs=100, batch_size=100, verbose=0)

y_pred = final_model.predict(test_data)

In [30]:
y_pred.flatten()[:5]

array([22.86755 , 29.13674 , 17.357775, 25.066797, 23.238813],
      dtype=float32)

In [31]:
test_targets[:5]

array([22.4, 28.6, 19.5, 24.8, 24.5])

In [32]:
from sklearn.metrics import r2_score, mean_absolute_error
r2_score(test_targets, y_pred.flatten()), mean_absolute_error(test_targets, y_pred.flatten())

(0.8571744279034041, 2.3367486374050968)

В целом, довольно неплохие результаты

Сравним с предсказаниями модели, построенной на последовательно подобранных параметрах

In [33]:
final_model = create_model(3, 128, 'relu', 'rmsprop', 'mse', 'mae')
final_model.fit(train_data, train_targets, epochs=100, batch_size=100, verbose=0)

y_pred = final_model.predict(test_data)

r2_score(test_targets, y_pred.flatten()), mean_absolute_error(test_targets, y_pred.flatten())

(0.7859865003779233, 2.6524867113898773)

Результат заметно хуже