In [77]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
df_train = pd.read_excel('Train.xlsx')

In [3]:
df_test = pd.read_excel('Test.xlsx')

In [4]:
def rename_column(x):
    return x.split('(')[0].lower()

In [5]:
df_train.columns = [rename_column(x) for x in df_train.columns]

In [6]:
df_test.columns = [rename_column(x) for x in df_test.columns]

In [7]:
def data_prep(df):
    df.runid = df.runid.replace(-9.999, None).fillna(method='ffill')
    df.cdw = df.cdw.replace(-9.999, None).astype('float64').interpolate(method ='linear', limit_direction ='forward')
    df.samplingvolume = df.samplingvolume.replace(-9.999, 0)
    return df

In [8]:
df_train = data_prep(df_train)
df_test = data_prep(df_test)

In [9]:
features = ['temp', 'feed', 'inductormass','inductor', 'base', 'reaktorvolumen', 'samplingvolume']
target = 'cdw'

In [39]:
scaler_features = MinMaxScaler(feature_range=(0, 1))
scaler_features.fit(df_train[features])
scaler_target = MinMaxScaler(feature_range=(0, 1))
scaler_target.fit(df_train[target].values.reshape(-1, 1))

MinMaxScaler()

In [28]:
df = df_train[df_train.runid == 58][features + [target]]
df.reset_index(drop=True,inplace=True)

In [29]:
look_back = 10

In [46]:
X = []
y = []
x_data = df[features]
y_data = df[target]
y_data = scaler_target.transform(y_data.values.reshape(-1,1))
for i in range(look_back, len(df)):
    point = df.loc[i-look_back:i][features]
    X.append(scaler_features.transform(point))
    y.append(y_data[i])

In [67]:
def transform_data(dataset,features,target, scaler_features, scaler_target, look_back=10):
    X = []
    y = []
    for runid in dataset.runid.unique():
        print(runid)
        df = dataset[dataset.runid == runid][features + [target]]
        df.reset_index(drop=True, inplace=True)
        x_data = df[features]
        y_data = df[target]
        y_data = scaler_target.transform(y_data.values.reshape(-1,1))
        for i in range(look_back, len(df)):
            point = df.loc[i-look_back:i][features]
            X.append(scaler_features.transform(point))
            y.append(y_data[i])
    return np.array(X), np.array(y)

In [68]:
X_train, y_train = transform_data(df_train, features,target, scaler_features, scaler_target)

58.0
61.0
63.0
101.0
53.0
80.0
96.0


In [73]:
model = Sequential()
model.add(LSTM(10, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=50, batch_size=1, verbose=2)

Epoch 1/50
2717/2717 - 7s - loss: 0.0032 - 7s/epoch - 3ms/step
Epoch 2/50
2717/2717 - 6s - loss: 0.0018 - 6s/epoch - 2ms/step
Epoch 3/50
2717/2717 - 6s - loss: 0.0015 - 6s/epoch - 2ms/step
Epoch 4/50
2717/2717 - 6s - loss: 0.0014 - 6s/epoch - 2ms/step
Epoch 5/50
2717/2717 - 6s - loss: 0.0014 - 6s/epoch - 2ms/step
Epoch 6/50
2717/2717 - 6s - loss: 0.0013 - 6s/epoch - 2ms/step
Epoch 7/50
2717/2717 - 6s - loss: 0.0013 - 6s/epoch - 2ms/step
Epoch 8/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 9/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 10/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 11/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 12/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 13/50
2717/2717 - 6s - loss: 0.0012 - 6s/epoch - 2ms/step
Epoch 14/50
2717/2717 - 6s - loss: 0.0011 - 6s/epoch - 2ms/step
Epoch 15/50
2717/2717 - 6s - loss: 0.0011 - 6s/epoch - 2ms/step
Epoch 16/50
2717/2717 - 6s - loss: 0.0012 - 6s/ep

<keras.callbacks.History at 0x22855a83580>

In [74]:
X_test, y_test = transform_data(df_test, features,target, scaler_features, scaler_target)

66.0
68.0


In [75]:
y_pred = model.predict(X_test)



In [78]:
mean_squared_error(y_test, y_pred)

0.0020557751234059458