In [None]:
import numpy as np
import pandas as pd
import os
import keras
from keras import models, layers

In [None]:
data_dir = 'F:\study\ml\DataSet\jena_climate'
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')

f = open(fname)
data = f.read()
f.close()

In [3]:
lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]
print(header)
print(len(lines))

['"Date Time"', '"p (mbar)"', '"T (degC)"', '"Tpot (K)"', '"Tdew (degC)"', '"rh (%)"', '"VPmax (mbar)"', '"VPact (mbar)"', '"VPdef (mbar)"', '"sh (g/kg)"', '"H2OC (mmol/mol)"', '"rho (g/m**3)"', '"wv (m/s)"', '"max. wv (m/s)"', '"wd (deg)"']
420551


In [4]:
float_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(',')[1:]]
    float_data[i, :] = values

In [5]:
mean = float_data[:20000].mean(axis=0)
float_data -= mean
std = float_data[:20000].std(axis=0)
float_data /= std

In [6]:
float_data.shape

(420551, 14)

In [15]:
def generator(data,
              lookback,
              delay,
              min_index,
              max_index,
              shuffle=False,
              batch_size=128,
              step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback

    while 1:
        if shuffle:
            rows = np.random.randint(min_index + lookback, max_index,
                                     batch_size)
        else:
            if i + batch_size > max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)
#         print(rows)
#         print(len(rows))

        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows), ))
        #         print(samples.shape)

        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            #             print(indices)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]


#             print(samples.shape)
        yield samples, targets

In [16]:
lookback = 1440
step = 6
delay = 144
batch_size = 128

In [17]:
train_gen = generator(float_data,
                      lookback=lookback,
                      delay=delay,
                      min_index=0,
                      max_index=200000,
                      shuffle=True,
                      step=step,
                      batch_size=batch_size)

In [18]:
# next(train_gen)

In [19]:
val_gen = generator(
    float_data,
    lookback=lookback,
    delay=delay,
    min_index=200001,
    max_index=300000,
    #                       shuffle=True,
    step=step,
    batch_size=batch_size)

In [20]:
val_steps = (300000 - 200001 - lookback) // batch_size

In [21]:
def evaluate_naive_method():
    batch_maes = []
    for step in range(val_steps):
        samples, targets = next(val_gen)
        preds = samples[:, -1, 1]
        mae = np.mean(np.abs(preds - targets))
        batch_maes.append(mae)
    print(np.mean(batch_maes))

In [22]:
evaluate_naive_method()

0.33551756286169604


In [23]:
from keras import models, layers, optimizers

In [24]:
model = models.Sequential()
model.add(layers.Flatten(input_shape=(lookback // step, float_data.shape[-1])))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))

In [25]:
model.compile(optimizer=optimizers.rmsprop(), loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=20,
                              validation_data=val_gen,
                              validation_steps=val_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
model2 = models.Sequential()
model2.add(layers.GRU(32, input_shape=(None, float_data.shape[-1])))
model2.add(layers.Dense(1))

In [29]:
model2.compile(optimizer=optimizers.rmsprop(), loss='mae')
history = model2.fit_generator(train_gen,
                               steps_per_epoch=500,
                               epochs=20,
                               validation_data=val_gen,
                               validation_steps=val_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [31]:
model3 = models.Sequential()
model3.add(
    layers.GRU(32,
               dropout=0.2,
               recurrent_dropout=0.2,
               input_shape=(None, float_data.shape[-1])))
model3.add(layers.Dense(1))

In [32]:
model3.compile(optimizer=optimizers.rmsprop(), loss='mae')
history = model3.fit_generator(train_gen,
                               steps_per_epoch=500,
                               epochs=20,
                               validation_data=val_gen,
                               validation_steps=val_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
model4 = models.Sequential()
model4.add(
    layers.GRU(32,
               dropout=0.1,
               recurrent_dropout=0.5,
               return_sequences=True,
               input_shape=(None, float_data.shape[-1])))
model4.add(
    layers.GRU(64, activation='relu', dropout=0.1, recurrent_dropout=0.5))
model4.add(layers.Dense(1))

In [34]:
model4.compile(optimizer=optimizers.rmsprop(), loss='mae')
history = model4.fit_generator(train_gen,
                               steps_per_epoch=500,
                               epochs=20,
                               validation_data=val_gen,
                               validation_steps=val_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


reverse ltsm imdb

In [36]:
from keras.datasets import imdb

In [38]:
max_features = 10000
maxlen = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [40]:
type(x_train[0])

list

In [39]:
x_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 2,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 2,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 2,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 2,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 2,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 2,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5,
 144,
 30,
 5535,
 18,

In [41]:
x_train = [x[::-1] for x in x_train]
x_test = [x[::-1] for x in x_test]

In [42]:
from keras.preprocessing import sequence

In [43]:
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

In [44]:
model5 = models.Sequential()
model5.add(layers.Embedding(max_features, 128))
model5.add(layers.LSTM(32))
model5.add(layers.Dense(1, activation='sigmoid'))

In [46]:
model5.compile(optimizer='rmsprop',
               loss='binary_crossentropy',
               metrics=['acc'])

In [47]:
history5 = model5.fit(x_train,
                      y_train,
                      epochs=10,
                      batch_size=128,
                      validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
BidiRNN

In [50]:
def reverse_order_generator(
                            data,
                            lookback,
                            delay,
                            min_index,
                            max_index,
                            shuffle=False,
                            batch_size=128,
                            step=6):
    if max_index is None:
        max_index=len(data)-delay-1
    i=min_index+lookback
    
    while 1:
        if shuffle:
            rows=np.random.randint(min_index+lookback,max_index,step)
        else:
            if i+batch_size>max_index:
                i=min_index+lookback
            rows=np.arange(i,min(i+lookback,max_index))
        
        samples=np.zeros((len(rows),lookback//step,data.shape[-1]))
        targets=np.zeros((len(rows),))
        
        for j,row in enumerate(rows):
            indices=range(rows[j]-lookback,rows[j],step)
            samples[j]=data[indices]
            targets[j]=data[rows[j]+delay][1]
        yield samples[:,::-1,:],targets
            

In [51]:
train_gen_reverse = reverse_order_generator(float_data,
                                            lookback=lookback,
                                            delay=delay,
                                            min_index=0,
                                            max_index=200000,
                                            shuffle=True,
                                            step=step,
                                            batch_size=batch_size)
val_gen_reverse = reverse_order_generator(float_data,
                                          lookback=lookback,
                                          delay=delay,
                                          min_index=200001,
                                          max_index=300000,
                                          step=step,
                                          batch_size=batch_size)

In [53]:
model6 = models.Sequential()
model6.add(layers.Bidirectional(layers.GRU(32),input_shape=(None,float_data.shape[-1])))
model6.add(layers.Dense(1))

model6.compile(optimizer=optimizers.RMSprop(), loss='mae')
history6 = model6.fit_generator(train_gen_reverse,
                               steps_per_epoch=500,
                               epochs=20,
                               validation_data=val_gen_reverse,
                               validation_steps=val_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
