In [1]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
import pandas as pd
from hkfold import HKFold
from sklearn.model_selection import KFold
from keras.layers import PReLU
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, TensorBoard

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
dframe_path = 'data/cabauw/processed.csv.gz'

try:
    df = pd.read_csv(dframe_path, na_values='--')
except UnicodeDecodeError:
    df = pd.read_csv(dframe_path, na_values='--', compression='gzip')


df = df[(df.ustar > 0.1) & (abs(df.H) > 10) & (df.wind > 1)]
df = df[df.ds != 201603]
#df = df.sort_values(['ds', 'tt'])
df = df.dropna()
df = df.sample(frac=1)  # aka shuffle

In [3]:
features = [
    'dewpoint', 'spec_hum', 'rel_hum', 'press', 'rain',
    'air_dens', 'wind', 'temp', 'virtual_temp', 'soil_temp', 'z',
    #'ustar', 'H', 'LE', 'zL_m', 'lval'
]

target = 'phi_m'

train_idx, test_idx = next(HKFold(KFold(50, shuffle=True), h=int(1 * 60 / 10)).split(df))

nr, ne, nn = len(train_idx), len(test_idx), len(df)
print('training with %d (%.1f%%) samples, testing with %d (%.1f%%), %d (%.1f%%) samples lost' % (
    nr, 100 * nr / nn, ne, 100 * ne / nn, nn - nr - ne, 100 * (nn - nr - ne) / nn
))

train_x, train_y = df[features].iloc[train_idx], df[target].iloc[train_idx]
test_x, test_y = df[features].iloc[test_idx], df[target].iloc[test_idx]

mean_x, mean_y = train_x.mean(), train_y.mean()
std_x, std_y = train_x.std(), train_y.std()

train_x = (train_x - mean_x) /  std_x
test_x = (test_x - mean_x) / std_x

#train_y = (train_y - mean_y) / std_y
#test_y = (test_y - mean_y) / std_y

training with 1022889 (76.9%) samples, testing with 26602 (2.0%), 280601 (21.1%) samples lost


In [10]:
model = Sequential([
    Dense(1024, input_shape=(len(features),)), PReLU(),
    Dense(1024), PReLU(),
    Dense(256), PReLU(),
    Dense(256), PReLU(),
    Dense(128), PReLU(),
    Dense(128), PReLU(),
    Dense(32), PReLU(),
    Dense(1)
])

opt = RMSprop(lr=0.001)
model.compile(loss='mse', optimizer=opt)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 1024)              12288     
_________________________________________________________________
p_re_lu_15 (PReLU)           (None, 1024)              1024      
_________________________________________________________________
dense_18 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
p_re_lu_16 (PReLU)           (None, 1024)              1024      
_________________________________________________________________
dense_19 (Dense)             (None, 256)               262400    
_________________________________________________________________
p_re_lu_17 (PReLU)           (None, 256)               256       
_________________________________________________________________
dense_20 (Dense)             (None, 256)               65792     
__________

In [11]:
hist = model.fit(
    train_x, train_y,
    batch_size=4096, epochs=250,
    callbacks=[
        ReduceLROnPlateau(factor=0.2, verbose=1, min_lr=1e-6),
        ModelCheckpoint('dev/models/weights.{epoch:04d}-{val_loss:.4f}.hdf5',
                        verbose=1, save_best_only=True),
        TensorBoard('dev/logs/', write_graph=False, write_grads=True),
    ],
    validation_data=(test_x, test_y)
)

Train on 1022889 samples, validate on 26602 samples
Epoch 1/250
Epoch 00001: val_loss improved from inf to 0.73347, saving model to weights.01-0.73.hdf5
Epoch 2/250
Epoch 00002: val_loss improved from 0.73347 to 0.71025, saving model to weights.02-0.71.hdf5
Epoch 3/250
Epoch 00003: val_loss improved from 0.71025 to 0.45091, saving model to weights.03-0.45.hdf5
Epoch 4/250
Epoch 00004: val_loss improved from 0.45091 to 0.38986, saving model to weights.04-0.39.hdf5
Epoch 5/250
Epoch 00005: val_loss did not improve
Epoch 6/250
Epoch 00006: val_loss did not improve
Epoch 7/250
Epoch 00007: val_loss improved from 0.38986 to 0.36172, saving model to weights.07-0.36.hdf5
Epoch 8/250
Epoch 00008: val_loss improved from 0.36172 to 0.31307, saving model to weights.08-0.31.hdf5
Epoch 9/250
Epoch 00009: val_loss did not improve
Epoch 10/250
Epoch 00010: val_loss did not improve
Epoch 11/250
Epoch 00011: val_loss improved from 0.31307 to 0.27564, saving model to weights.11-0.28.hdf5
Epoch 12/250
Ep

Epoch 00034: val_loss did not improve
Epoch 35/250
Epoch 00035: val_loss improved from 0.13492 to 0.12200, saving model to weights.35-0.12.hdf5
Epoch 36/250
Epoch 00036: val_loss did not improve
Epoch 37/250
Epoch 00037: val_loss did not improve
Epoch 38/250
Epoch 00038: val_loss did not improve
Epoch 39/250
Epoch 00039: val_loss did not improve
Epoch 40/250
Epoch 00040: val_loss did not improve
Epoch 41/250
Epoch 00041: val_loss did not improve
Epoch 42/250
Epoch 00042: val_loss improved from 0.12200 to 0.11690, saving model to weights.42-0.12.hdf5
Epoch 43/250
Epoch 00043: val_loss did not improve
Epoch 44/250
Epoch 00044: val_loss improved from 0.11690 to 0.10768, saving model to weights.44-0.11.hdf5
Epoch 45/250
Epoch 00045: val_loss did not improve
Epoch 46/250
Epoch 00046: val_loss did not improve
Epoch 47/250
Epoch 00047: val_loss did not improve
Epoch 48/250
Epoch 00048: val_loss did not improve
Epoch 49/250
Epoch 00049: val_loss did not improve
Epoch 50/250
Epoch 00050: val_lo

Epoch 69/250
Epoch 00069: val_loss did not improve
Epoch 70/250
Epoch 00070: val_loss improved from 0.07906 to 0.07045, saving model to weights.70-0.07.hdf5
Epoch 71/250
Epoch 00071: val_loss did not improve
Epoch 72/250
Epoch 00072: val_loss did not improve
Epoch 73/250
Epoch 00073: val_loss did not improve
Epoch 74/250
Epoch 00074: val_loss did not improve
Epoch 75/250
Epoch 00075: val_loss did not improve
Epoch 76/250
Epoch 00076: val_loss did not improve
Epoch 77/250
Epoch 00077: val_loss did not improve
Epoch 78/250
Epoch 00078: val_loss did not improve
Epoch 79/250
Epoch 00079: val_loss did not improve
Epoch 80/250
Epoch 00080: val_loss did not improve
Epoch 81/250
Epoch 00081: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 00081: val_loss did not improve
Epoch 82/250
Epoch 00082: val_loss improved from 0.07045 to 0.04134, saving model to weights.82-0.04.hdf5
Epoch 83/250
Epoch 00083: val_loss improved from 0.04134 to 0.03978, saving model to weights.8

Epoch 102/250
Epoch 00102: val_loss improved from 0.02406 to 0.02374, saving model to weights.102-0.02.hdf5
Epoch 103/250
Epoch 00103: val_loss improved from 0.02374 to 0.02325, saving model to weights.103-0.02.hdf5
Epoch 104/250
Epoch 00104: val_loss did not improve
Epoch 105/250
Epoch 00105: val_loss improved from 0.02325 to 0.02289, saving model to weights.105-0.02.hdf5
Epoch 106/250
Epoch 00106: val_loss improved from 0.02289 to 0.02227, saving model to weights.106-0.02.hdf5
Epoch 107/250
Epoch 00107: val_loss improved from 0.02227 to 0.02182, saving model to weights.107-0.02.hdf5
Epoch 108/250
Epoch 00108: val_loss did not improve
Epoch 109/250
Epoch 00109: val_loss improved from 0.02182 to 0.02143, saving model to weights.109-0.02.hdf5
Epoch 110/250
Epoch 00110: val_loss improved from 0.02143 to 0.02081, saving model to weights.110-0.02.hdf5
Epoch 111/250
Epoch 00111: val_loss did not improve
Epoch 112/250
Epoch 00112: val_loss improved from 0.02081 to 0.02023, saving model to we

Epoch 135/250
Epoch 00135: val_loss did not improve
Epoch 136/250
Epoch 00136: val_loss improved from 0.01702 to 0.01657, saving model to weights.136-0.02.hdf5
Epoch 137/250
Epoch 00137: val_loss did not improve
Epoch 138/250
Epoch 00138: val_loss improved from 0.01657 to 0.01533, saving model to weights.138-0.02.hdf5
Epoch 139/250
Epoch 00139: val_loss did not improve
Epoch 140/250
Epoch 00140: val_loss did not improve
Epoch 141/250
Epoch 00141: val_loss improved from 0.01533 to 0.01508, saving model to weights.141-0.02.hdf5
Epoch 142/250
Epoch 00142: val_loss improved from 0.01508 to 0.01502, saving model to weights.142-0.02.hdf5
Epoch 143/250
Epoch 00143: val_loss did not improve
Epoch 144/250
Epoch 00144: val_loss did not improve
Epoch 145/250
Epoch 00145: val_loss improved from 0.01502 to 0.01436, saving model to weights.145-0.01.hdf5
Epoch 146/250
Epoch 00146: val_loss did not improve
Epoch 147/250
Epoch 00147: val_loss improved from 0.01436 to 0.01414, saving model to weights.14

Epoch 169/250
Epoch 00169: val_loss did not improve
Epoch 170/250
Epoch 00170: val_loss did not improve
Epoch 171/250
Epoch 00171: val_loss did not improve
Epoch 172/250
Epoch 00172: val_loss improved from 0.01282 to 0.01274, saving model to weights.172-0.01.hdf5
Epoch 173/250
Epoch 00173: val_loss did not improve
Epoch 174/250
Epoch 00174: val_loss improved from 0.01274 to 0.01214, saving model to weights.174-0.01.hdf5
Epoch 175/250
Epoch 00175: val_loss did not improve
Epoch 176/250
Epoch 00176: val_loss did not improve
Epoch 177/250
Epoch 00177: val_loss did not improve
Epoch 178/250
Epoch 00178: val_loss did not improve
Epoch 179/250
Epoch 00179: val_loss improved from 0.01214 to 0.01194, saving model to weights.179-0.01.hdf5
Epoch 180/250
Epoch 00180: val_loss did not improve
Epoch 181/250
Epoch 00181: val_loss did not improve
Epoch 182/250
Epoch 00182: val_loss did not improve
Epoch 183/250
Epoch 00183: val_loss did not improve
Epoch 184/250
Epoch 00184: val_loss did not improve


Epoch 204/250
Epoch 00204: val_loss did not improve
Epoch 205/250
Epoch 00205: val_loss did not improve
Epoch 206/250
Epoch 00206: val_loss did not improve
Epoch 207/250
Epoch 00207: val_loss did not improve
Epoch 208/250
Epoch 00208: val_loss did not improve
Epoch 209/250
Epoch 00209: val_loss did not improve
Epoch 210/250
Epoch 00210: val_loss improved from 0.01048 to 0.01046, saving model to weights.210-0.01.hdf5
Epoch 211/250
Epoch 00211: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.

Epoch 00211: val_loss did not improve
Epoch 212/250
Epoch 00212: val_loss improved from 0.01046 to 0.00730, saving model to weights.212-0.01.hdf5
Epoch 213/250
Epoch 00213: val_loss improved from 0.00730 to 0.00720, saving model to weights.213-0.01.hdf5
Epoch 214/250
Epoch 00214: val_loss improved from 0.00720 to 0.00699, saving model to weights.214-0.01.hdf5
Epoch 215/250
Epoch 00215: val_loss did not improve
Epoch 216/250
Epoch 00216: val_loss improved from 0.00699 to 0.00694, 

Epoch 236/250
Epoch 00236: val_loss did not improve
Epoch 237/250
Epoch 00237: val_loss did not improve
Epoch 238/250
Epoch 00238: val_loss improved from 0.00620 to 0.00613, saving model to weights.238-0.01.hdf5
Epoch 239/250
Epoch 00239: val_loss did not improve
Epoch 240/250
Epoch 00240: val_loss improved from 0.00613 to 0.00610, saving model to weights.240-0.01.hdf5
Epoch 241/250
Epoch 00241: val_loss did not improve
Epoch 242/250
Epoch 00242: val_loss improved from 0.00610 to 0.00599, saving model to weights.242-0.01.hdf5
Epoch 243/250
Epoch 00243: val_loss did not improve
Epoch 244/250
Epoch 00244: val_loss did not improve
Epoch 245/250
Epoch 00245: val_loss did not improve
Epoch 246/250
Epoch 00246: val_loss did not improve
Epoch 247/250
Epoch 00247: val_loss improved from 0.00599 to 0.00593, saving model to weights.247-0.01.hdf5
Epoch 248/250
Epoch 00248: val_loss improved from 0.00593 to 0.00591, saving model to weights.248-0.01.hdf5
Epoch 249/250
Epoch 00249: val_loss did not 

<keras.callbacks.History at 0x7fe628f330b8>