# MLP that predicts a single observation from univariate data

In [2]:
import pandas as pd
from numpy import array
import datetime
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


## Load Official Weather Data Observations

In [19]:

# parse the DWD dataset and convert date and align with home readings 
# Format dwd: yyyymmddhh
df = pd.read_csv('/Users/stewarta/repos/iot-htsensor/data/produkt_tu_stunde_19510101_20171231_00662.txt', sep=';')

# parse date
df.insert(2, 'datetime', pd.to_datetime(df['MESS_DATUM'], errors='coerce', format='%Y%m%d%H'))

# create index
df.set_index('datetime', inplace=True)

# extract relevant columns and rename, subset
df.rename(columns={'TT_TU':'D_Temp','RF_TU':'D_Humi'}, inplace=True)

# filter years
filter = (df.index.year >= 2013) & (df.index.year <= 2016) 

df = df.loc[filter,['D_Temp', 'D_Humi']]

step = 'D'
df = df.resample(step).mean()


# create a subset for analysis
sensor = 'D_Temp'
filter = (df.index.year == 2013) & (df.index.month == 4) 
y = np.array(df[filter][sensor])
raw_seq = y[:20]

df.head()

Unnamed: 0_level_0,D_Temp,D_Humi
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,6.658333,81.0
2013-01-02,5.354167,83.541667
2013-01-03,7.995833,92.083333
2013-01-04,8.958333,92.458333
2013-01-05,7.554167,97.458333


## Subset Data

In [21]:
# create a subset for analysis
sensor = 'D_Temp'
filter = (df.index.year == 2013) 

y = np.array(df[filter][sensor])
raw_seq = y[:20]

## Split data into sequences 

In [22]:
# univariate data preparation
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)


## Build Train and Test Set

In [26]:
# define input sequence
# choose a number of time steps
n_steps = 3
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# summarize the data
for i in range(len(X)):
    print(X[i], y[i])

[6.65833333 5.35416667 7.99583333] 8.958333333333332
[5.35416667 7.99583333 8.95833333] 7.554166666666668
[7.99583333 8.95833333 7.55416667] 6.929166666666666
[8.95833333 7.55416667 6.92916667] 5.737499999999998
[7.55416667 6.92916667 5.7375    ] 6.320833333333334
[6.92916667 5.7375     6.32083333] 6.279166666666666
[5.7375     6.32083333 6.27916667] 2.858333333333333
[6.32083333 6.27916667 2.85833333] -0.9625
[ 6.27916667  2.85833333 -0.9625    ] -2.8708333333333336
[ 2.85833333 -0.9625     -2.87083333] -2.5541666666666667
[-0.9625     -2.87083333 -2.55416667] -4.216666666666668
[-2.87083333 -2.55416667 -4.21666667] -5.512499999999999
[-2.55416667 -4.21666667 -5.5125    ] -3.766666666666666
[-4.21666667 -5.5125     -3.76666667] -2.0999999999999996
[-5.5125     -3.76666667 -2.1       ] -2.8166666666666664
[-3.76666667 -2.1        -2.81666667] -5.954166666666667
[-2.1        -2.81666667 -5.95416667] -5.7749999999999995


In [24]:
xtrain = X[0:len(X)-1]
xtest = X[-1]

ytrain = y[0:len(X)-1]
ytest = y[-1]

print('=======  x train ========')
print(xtrain)
print('========= y train ======')
print(ytrain)

print('=======  x test ========')
print(xtest)

print('========= y test ======')
print(ytest)

[[ 6.65833333  5.35416667  7.99583333]
 [ 5.35416667  7.99583333  8.95833333]
 [ 7.99583333  8.95833333  7.55416667]
 [ 8.95833333  7.55416667  6.92916667]
 [ 7.55416667  6.92916667  5.7375    ]
 [ 6.92916667  5.7375      6.32083333]
 [ 5.7375      6.32083333  6.27916667]
 [ 6.32083333  6.27916667  2.85833333]
 [ 6.27916667  2.85833333 -0.9625    ]
 [ 2.85833333 -0.9625     -2.87083333]
 [-0.9625     -2.87083333 -2.55416667]
 [-2.87083333 -2.55416667 -4.21666667]
 [-2.55416667 -4.21666667 -5.5125    ]
 [-4.21666667 -5.5125     -3.76666667]
 [-5.5125     -3.76666667 -2.1       ]
 [-3.76666667 -2.1        -2.81666667]]
[-2.1        -2.81666667 -5.95416667]
[ 8.95833333  7.55416667  6.92916667  5.7375      6.32083333  6.27916667
  2.85833333 -0.9625     -2.87083333 -2.55416667 -4.21666667 -5.5125
 -3.76666667 -2.1        -2.81666667 -5.95416667]
-5.7749999999999995


## Train and test a model: 
### First Results: Univariate MLP is not doing so badly; more epochs did not produce better results; running multiple times yields slightly difference results 

In [25]:
# define model
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=n_steps))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(xtrain, ytrain, epochs=2000, verbose=0)
# demonstrate prediction
x_input = xtest
x_input = x_input.reshape((1, n_steps))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[-5.683259]]


## Systematic Evaluation 
### Parameters : step, epoc