In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import asarray
from numpy import save
from numpy import load
import tensorflow as tf
import tensorflow.keras as keras
import pickle
import joblib
from sklearn.metrics import mean_squared_error
import seaborn as sns

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [None]:
sns.set_style("white")

### Load Data

In [None]:
with open ('../final_data/label_keys_ts.pkl', 'rb') as fp:
    label_keys = pickle.load(fp)
with open ('../final_data/feature_keys_ts.pkl', 'rb') as fp:
    feature_keys = pickle.load(fp)    

train_x = load('../final_data/train_x_norm.npy')
train_y = load('../final_data/train_y_norm.npy')
valid_x = load('../final_data/valid_x_norm.npy')
valid_y = load('../final_data/valid_y_norm.npy')
test_x = load('../final_data/test_x_norm.npy')
test_y = load('../final_data/test_y_norm.npy')
all_train_x = load('../final_data/all_train_x_norm.npy')
all_train_y = load('../final_data/all_train_y_norm.npy')

scaler_x = joblib.load("../final_data/scaler_x.save") 
scaler_y = joblib.load("../final_data/scaler_y.save") 

scaler_train_x = joblib.load("../final_data/scaler_train_x.save") 
scaler_train_y = joblib.load("../final_data/scaler_train_y.save")

train_unsc = pd.read_pickle("../final_data/train_unsc.pkl")
valid_unsc = pd.read_pickle("../final_data/valid_unsc.pkl")
test_unsc = pd.read_pickle("../final_data/test_unsc.pkl")
all_train_unsc = pd.read_pickle("../final_data/all_train_unsc.pkl")

train_norm = pd.read_pickle("../final_data/train_norm.pkl")
valid_norm = pd.read_pickle("../final_data/valid_norm.pkl")
test_norm = pd.read_pickle("../final_data/test_norm.pkl")
all_train_norm = pd.read_pickle("../final_data/all_train_norm.pkl")

cc4_data = pd.read_pickle("../final_data/cc4_data.pkl") 

### Load Model LSTM Dropout Model

In [None]:
model = keras.models.load_model("../final_data/models/LSTMDropout_model")

### Prepare data to look at single sequences

In [None]:
## transform data so that for each sequence we have measured values and prediction separately

## transform into (samples, time steps, features) format
def lstm_format(data):
    length_cc4 = 15.42
    seq_len = 50  #only 5 time steps in the past
    features = []
    labels = []
    data_set = {}

    for _, group in data.groupby('seq_id'):
        for i in range(len(group)-(seq_len-1)):
            features.append(group[feature_keys].iloc[i:i+seq_len].to_numpy())  
            labels.append(group[label_keys].iloc[i+(seq_len-1)].to_numpy())

    data_set['x'] = np.stack(features)
    data_set['y'] = np.stack(labels)
    return data_set

test_seq = test_norm['seq_id'].drop_duplicates()

x = {}
y = {}
for seq in test_seq:
    df = lstm_format(test_norm[test_norm['seq_id']==seq])
    x[seq]= df['x'].copy()
    y[seq] = df['y'].copy()

## predicted and measured values
prediction = {}
real_values = {}
rmse = {}
for seq in test_seq:
    prediction[seq] = scaler_y.inverse_transform(model.predict(x[seq]))
    real_values[seq] = scaler_y.inverse_transform(y[seq])
    rmse[seq] = np.sqrt(mean_squared_error( prediction[seq],  real_values[seq]))  

error = []
for seq in test_seq:
    error.append((rmse[seq], seq))

In [None]:
water_keys = ['WasserZ4FsInLproMin_integr', 'WasserZ4LsInLproMin_integr',
       'WasserZ2bFsInLproMin_integr', 'WasserZ2bLsInLproMin_integr',
       'WasserZ3bFsInLproMin_integr', 'WasserZ1FsInLproMin_integr',
       'WasserZ3bLsInLproMin_integr', 'WasserZ1LsInLproMin_integr',
       'WasserZ3aFsInLproMin_integr', 'WasserZ3aLsInLproMin_integr',
       'WasserZ2aLsInLproMin_integr', 'WasserZ2aFsInLproMin_integr',
       'WasserZ5LsInLproMin_integr', 'WasserZ5FsInLproMin_integr',
       
             ]
not_water_keys = [e for e in feature_keys if e not in water_keys]

### Change values of specific features

In [None]:
## Function that plots the measured temperature, the original prediciton and the new prediction
def new_predicition_plot(test_change_unsc, test_change_unsc_2, descr_change, descr_change_2):
    id_test = test_change_unsc['seq_id'].to_frame()
    id_test =id_test.reset_index(drop=True)

    test_change_x = scaler_x.transform(test_change_unsc[feature_keys])
    test_change_x = pd.DataFrame(test_change_x, columns=(feature_keys))
    test_change_y = scaler_y.transform(test_change_unsc[label_keys])
    test_change_y = pd.DataFrame(test_change_y, columns=(label_keys))

    test_change = pd.concat([test_change_y,test_change_x],axis=1)
    test_change = pd.concat([test_change,id_test],axis=1)
    test_change = test_change.reset_index(drop=True)

    df = lstm_format(test_change)
    x_change = df['x'].copy()

    prediction_change = scaler_y.inverse_transform(model.predict(x_change))
    rmse_change = np.sqrt(mean_squared_error(prediction_change, real_values[seq])) 
    print("new RMSE",descr_change, " :" , rmse_change)
    
    test_change_x_2 = scaler_x.transform(test_change_unsc_2[feature_keys])
    test_change_x_2 = pd.DataFrame(test_change_x_2, columns=(feature_keys))
    test_change_y_2 = scaler_y.transform(test_change_unsc_2[label_keys])
    test_change_y_2 = pd.DataFrame(test_change_y_2, columns=(label_keys))

    test_change_2 = pd.concat([test_change_y_2,test_change_x_2],axis=1)
    test_change_2 = pd.concat([test_change_2,id_test],axis=1)
    test_change_2 = test_change_2.reset_index(drop=True)

    df_2 = lstm_format(test_change_2)
    x_change_2 = df_2['x'].copy()

    prediction_change_2 = scaler_y.inverse_transform(model.predict(x_change_2))
    rmse_change_2 = np.sqrt(mean_squared_error(prediction_change_2, real_values[seq]  )) 
    print("new RMSE",descr_change_2, " :" , rmse_change_2)
    
    fig, (ax1) = plt.subplots(1, 1, figsize=(6, 5),  sharex=True, sharey=True)
    ax1.plot(real_values[seq][:,:1], color='black', label='measured')
    ax1.plot(prediction[seq][:,:1], color='blue', label='original predicted')

    ax1.plot(prediction_change[:,:1], color='red', label=descr_change)
    ax1.plot(prediction_change_2[:,:1], color='green', label=descr_change_2)

    ax1.set_xlabel('Minutes', fontsize=15)
    ax1.set_ylabel('Temperature [°C]',fontsize=15)
    ax1.xaxis.grid(True)
   #ax1.yaxis.grid(True)
    ax1.legend(loc='best', prop={'size': 13})
    ax1.set(yticks=[])
    ax1.set_ylim(660, 800)

### Change casting target temperature

Features that can be changed:

In [None]:
feature_keys

In [None]:
seq = '473562_str_1'  ## choose sequence 
test_change_unsc = test_unsc[test_unsc['seq_id']==seq].copy() ##test data of chosen sequence
test_change_unsc_2 = test_change_unsc.copy()

features_changed =  ['WasserZ4FsInLproMin_integr', 'GiessLaengeSequenzInM_delta',
 'WasserZ4LsInLproMin_integr']  ## choose features to change
features_changed_2 = [] ## choose second features to change

descr_change = "high casting target temp" ## description of first new prediction in plot
descr_change_2 = "low casting target temp" ## description of second new prediction in plot

for col in features_changed:
    test_change_unsc[col].values[:] = cc4_data[col].mean()/2
    test_change_unsc['GiessLaengeSequenzInM_delta'].values[:] = cc4_data[col].mean()/2
    ## set value for each feature
    test_change_unsc_2[col].values[:] = cc4_data[col].mean() 
    
new_predicition_plot(test_change_unsc, test_change_unsc_2, descr_change, descr_change_2 )

### Change cooling water quantities

All cooling water features:

In [None]:
water_keys

In [None]:
together_keys = ['WasserZ4FsInLproMin_integr',
 'WasserZ4LsInLproMin_integr',
 'WasserZ2bFsInLproMin_integr',
 'WasserZ2bLsInLproMin_integr',
 'WasserZ3bFsInLproMin_integr',
 'WasserZ1FsInLproMin_integr',
 'WasserZ3bLsInLproMin_integr',
 'WasserZ1LsInLproMin_integr',
 'WasserZ3aFsInLproMin_integr',
 'WasserZ3aLsInLproMin_integr',
 'WasserZ2aLsInLproMin_integr',
 'WasserZ2aFsInLproMin_integr',
 'WasserZ5LsInLproMin_integr',
 'WasserZ5FsInLproMin_integr',
                  'GiessLaengeSequenzInM_delta',
                ]

In [None]:
together_keys

In [None]:
seq = '473562_str_1'  ## choose sequence 
test_change_unsc = test_unsc[test_unsc['seq_id']==seq].copy() ##test data of chosen sequence
test_change_unsc_2 = test_change_unsc.copy()

features_changed =  together_keys  ## choose features to change
features_changed_2 = together_keys ## choose second features to change

descr_change = "high cooling water all zones" ## description of first new prediction in plot
descr_change_2 = "low cooling water all zones" ## description of second new prediction in plot

for col in features_changed:
    test_change_unsc[col].values[:] = cc4_data[col].mean()*2.5  ## set value for each feature
    test_change_unsc[ 'GiessLaengeSequenzInM_delta'].values[:] = cc4_data['GiessLaengeSequenzInM_delta'].mean()/2.5
    test_change_unsc_2[col].values[:] = cc4_data[col].mean()/3.5
    test_change_unsc_2[ 'GiessLaengeSequenzInM_delta'].values[:] = cc4_data['GiessLaengeSequenzInM_delta'].mean()*3.5
    
new_predicition_plot(test_change_unsc, test_change_unsc_2, descr_change, descr_change_2 )

In [None]:
seq = '473562_str_1'  ## choose sequence 
test_change_unsc = test_unsc[test_unsc['seq_id']==seq].copy() ##test data of chosen sequence
test_change_unsc_2 = test_change_unsc.copy()

features_changed =  together_keys  ## choose features to change
features_changed_2 = together_keys ## choose second features to change

descr_change = "high cooling water low speed" ## description of first new prediction in plot
descr_change_2 = "low cooling water high speed" ## description of second new prediction in plot

for col in features_changed:
    test_change_unsc[col].values[:] = cc4_data[col].mean()*2.5  ## set value for each feature
    test_change_unsc[ 'GiessLaengeSequenzInM_delta'].values[:] = cc4_data['GiessLaengeSequenzInM_delta'].mean()/2.5
    test_change_unsc_2[col].values[:] = cc4_data[col].mean()/3.5
    test_change_unsc_2[ 'GiessLaengeSequenzInM_delta'].values[:] = cc4_data['GiessLaengeSequenzInM_delta'].mean()*3.5
    

id_test = test_change_unsc['seq_id'].to_frame()
id_test =id_test.reset_index(drop=True)

test_change_x = scaler_x.transform(test_change_unsc[feature_keys])
test_change_x = pd.DataFrame(test_change_x, columns=(feature_keys))
test_change_y = scaler_y.transform(test_change_unsc[label_keys])
test_change_y = pd.DataFrame(test_change_y, columns=(label_keys))

test_change = pd.concat([test_change_y,test_change_x],axis=1)
test_change = pd.concat([test_change,id_test],axis=1)
test_change = test_change.reset_index(drop=True)

df = lstm_format(test_change)
x_change = df['x'].copy()

prediction_change = scaler_y.inverse_transform(model.predict(x_change))
rmse_change = np.sqrt(mean_squared_error(prediction_change, real_values[seq])) 
print("new RMSE",descr_change, " :" , rmse_change)

test_change_x_2 = scaler_x.transform(test_change_unsc_2[feature_keys])
test_change_x_2 = pd.DataFrame(test_change_x_2, columns=(feature_keys))
test_change_y_2 = scaler_y.transform(test_change_unsc_2[label_keys])
test_change_y_2 = pd.DataFrame(test_change_y_2, columns=(label_keys))

test_change_2 = pd.concat([test_change_y_2,test_change_x_2],axis=1)
test_change_2 = pd.concat([test_change_2,id_test],axis=1)
test_change_2 = test_change_2.reset_index(drop=True)

df_2 = lstm_format(test_change_2)
x_change_2 = df_2['x'].copy()

prediction_change_2 = scaler_y.inverse_transform(model.predict(x_change_2))
rmse_change_2 = np.sqrt(mean_squared_error(prediction_change_2, real_values[seq]  )) 
print("new RMSE",descr_change_2, " :" , rmse_change_2)

fig, (ax1) = plt.subplots(1, 1, figsize=(6, 5),  sharex=True, sharey=True)
ax1.plot(real_values[seq][:,:1], color='black', label='measured')
ax1.plot(prediction[seq][:,:1], color='blue', label='original predicted')

ax1.plot(prediction_change[:,:1], color='red', label=descr_change)
ax1.plot(prediction_change_2[:,:1], color='green', label=descr_change_2)

ax1.set_xlabel('Minutes', fontsize=15)
ax1.set_ylabel('Temperature [°C]',fontsize=15)
ax1.xaxis.grid(True)
#ax1.yaxis.grid(True)
ax1.legend(loc='best', prop={'size': 13})
#ax1.set(yticks=[])
ax1.set_ylim(660, 800)

In [None]:
fig.savefig('/home/di40438/bachelorarbeit/data/water_speed_dill.png', format='png', dpi=200)

In [None]:
test_change_2.head()

In [None]:
test_unsc.head()

In [None]:
test_change_unsc_2.head()

### Changes for each Zone 
Here only for no water in first 4 zones

In [None]:
seq = '473562_str_1'
test_change_3_unsc = test_unsc[test_unsc['seq_id']==seq].copy()
features_changed_3 =  ['WasserZ1LsInLproMin_integr',
 'WasserZ1FsInLproMin_integr'] 

for col in features_changed_3:
    test_change_3_unsc[col].values[:] = 0

Load Dataframes

In [None]:
prediction_change_low_1 = pd.read_pickle("../final_data/sensitivity/low_water_z1.pkl")
prediction_change_low_2a = pd.read_pickle("../final_data/sensitivity/low_water_z2a.pkl")
prediction_change_low_2b = pd.read_pickle("../final_data/sensitivity/low_water_z2b.pkl")
prediction_change_low_3a = pd.read_pickle("../final_data/sensitivity/low_water_z3a.pkl")

In [None]:
id_test = test_change_3_unsc['seq_id'].to_frame()
id_test =id_test.reset_index(drop=True)

test_change_3_x = scaler_x.transform(test_change_3_unsc[feature_keys])
test_change_3_x = pd.DataFrame(test_change_3_x, columns=(feature_keys))
test_change_3_y = scaler_y.transform(test_change_3_unsc[label_keys])
test_change_3_y = pd.DataFrame(test_change_3_y, columns=(label_keys))

test_change_3 = pd.concat([test_change_3_y,test_change_3_x],axis=1)
test_change_3 = pd.concat([test_change_3,id_test],axis=1)
test_change_3 = test_change_3.reset_index(drop=True)

df = lstm_format(test_change_3)
x_change_3= df['x'].copy()

prediction_change_3 = scaler_y.inverse_transform(model.predict(x_change_3))
rmse_change_3 = np.sqrt(mean_squared_error( prediction_change_3, real_values[seq]  ))  

fig, (ax1) = plt.subplots(1, 1, figsize=(6, 5),  sharex=True, sharey=True)
ax1.plot(real_values[seq][:,:1], color='black', label='measured')
ax1.plot(prediction[seq][:,:1], color='blue', label='original predicted')
ax1.plot(prediction_change_low_1.iloc[:,:1],'red',  label='Z1 low')
ax1.plot(prediction_change_low_2a.iloc[:,:1],'orange',  label='Z2a low')
ax1.plot(prediction_change_low_2b.iloc[:,:1],'green',  label='Z2b low')
ax1.plot(prediction_change_low_3a.iloc[:,:1],'m',  label='Z3a low')

ax1.set_xlabel('Minutes', fontsize=15)
ax1.set_ylabel('Temperature [°C]',fontsize=15)
ax1.set_ylim(660, 800)
ax1.xaxis.grid(True)
ax1.yaxis.grid(True)
ax1.legend(loc='best', prop={'size': 13})
#ax1.set(yticks=[])

In [None]:
prediction_change_low_1 = prediction_change_3.copy()

### Change casting speed

In [None]:
feature_keys

### Planned Casting Speed

In [None]:
seq = '473562_str_1'  ## choose sequence 
test_change_unsc = test_unsc[test_unsc['seq_id']==seq].copy() ##test data of chosen sequence
test_change_unsc_2 = test_change_unsc.copy()

features_changed =  ['SollGiessGeschwInMproMin']   ## choose features to change
features_changed_2 = ['SollGiessGeschwInMproMin'] ## choose second features to change

descr_change = "high casting speed" ## description of first new prediction in plot
descr_change_2 = "low casting speed" ## description of second new prediction in plot

for col in features_changed:
    test_change_unsc[col].values[:] = cc4_data[col].max()*1.5   ## set value for each feature
    test_change_unsc_2[col].values[:] = cc4_data[col].min()/1.5 
    
new_predicition_plot(test_change_unsc, test_change_unsc_2, descr_change, descr_change_2 )

### Actual Casting Speed

In [None]:
seq = '473562_str_1'  ## choose sequence 
test_change_unsc = test_unsc[test_unsc['seq_id']==seq].copy() ##test data of chosen sequence
test_change_unsc_2 = test_change_unsc.copy()

features_changed =  ['GiessLaengeSequenzInM_delta']   ## choose features to change
features_changed_2 = ['GiessLaengeSequenzInM_delta'] ## choose second features to change

descr_change = "high casting speed" ## description of first new prediction in plot
descr_change_2 = "low casting speed" ## description of second new prediction in plot

for col in features_changed:
    test_change_unsc[col].values[:] = cc4_data[col].max()*1.5   ## set value for each feature
    test_change_unsc_2[col].values[:] = cc4_data[col].min() 
    
new_predicition_plot(test_change_unsc, test_change_unsc_2, descr_change, descr_change_2 )

### Effect of changes to average predicted temperature

#### Mean of new predicted temperature for whole test set
Here for casting target temperature as an example:

In [None]:
## Set values to change
all_test_change_unsc = test_unsc.copy()
features_changed = ['ZielTempTreiberInC']
for col in features_changed:
    all_test_change_unsc[col].values[:] = cc4_data[col].min()

In [None]:
id_test = all_test_change_unsc['seq_id'].to_frame()
id_test =id_test.reset_index(drop=True)

test_change_x = scaler_x.transform(all_test_change_unsc[feature_keys])
test_change_x = pd.DataFrame(test_change_x, columns=(feature_keys))
test_change_y = scaler_y.transform(all_test_change_unsc[label_keys])
test_change_y = pd.DataFrame(test_change_y, columns=(label_keys))

test_change = pd.concat([test_change_y,test_change_x],axis=1)
test_change = pd.concat([test_change,id_test],axis=1)
test_change = test_change.reset_index(drop=True)

df = lstm_format(test_change)
x_change= df['x'].copy()

prediction_change = scaler_y.inverse_transform(model.predict(x_change))
print('Mean of new predicted temperature for whole test set:')
prediction_change.mean()

#### Mean of original predicted temperature for whole test set

In [None]:
original_prediction = scaler_y.inverse_transform(model.predict(test_x))
print('Mean of original predicted temperature for whole test set:')
original_prediction.mean()

#### Mean of original predicted temperature for a sequence

In [None]:
test_seq = test_change['seq_id'].drop_duplicates()  ## als sequence ids in test set

x_all_changed = {}
y_all_changed = {}
for seq in test_seq:
    df = lstm_format(test_change[test_change['seq_id']==seq])
    x_all_changed[seq]= df['x'].copy()
    y_all_changed[seq] = df['y'].copy()
mean_temp = []
prediction_all_changed = {}  ## new predictions for each sequence in test set
for seq in test_seq:
    prediction_all_changed[seq] = scaler_y.inverse_transform(model.predict(x_all_changed[seq]))
    mean_temp.append(prediction_all_changed[seq].mean())
    
print('Mean of new predicted temperature for each sequences:')
np.mean(mean_temp)  ##mean over all sequences in test set

In [None]:
mean_prediciton = []
for seq in test_seq:
     mean_prediciton.append(prediction[seq].mean())
        
print('Mean of original predicted temperature for each sequences:')
np.mean(mean_prediciton)

### Looking at Properties of a single sequence after changes were made

In [None]:
## Choose sequences
seq_number = '473562_str_1'

Cooling water curves:

In [None]:
fig, (ax1) = plt.subplots(1,1, figsize=(6, 5))
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ5LsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z5')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ4LsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z4')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ3bLsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z3b')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ3aLsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z3a')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ2bLsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z2b')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ2aLsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z2a')
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True),
             y='WasserZ1LsInLproMin_integr',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][50:].reset_index(drop=True).index,
             ax=ax1, label='cooling water Z1')
ax1.set_ylabel('Cooling Water [l]', fontsize=15)
ax1.xaxis.grid(True)
ax1.set(yticks=[]) 
ax1.set_xlabel('Minutes', fontsize=15)
ax1.legend(loc='upper left', prop={'size': 13})

Casting speed curve:

In [None]:
fig, (ax1) = plt.subplots(1, figsize=(6, 5))
sns.lineplot(data=test_change_unsc[test_change_unsc['seq_id']==seq_number][5:].reset_index(drop=True),
             y='GiessLaengeSequenzInM_delta',x=test_change_unsc[test_change_unsc['seq_id']==seq_number][5:].reset_index(drop=True).index,
             ax=ax1, color='black')
ax1.set_xlabel('Minutes', fontsize=15  )
ax1.set_ylabel('Casting Speed [m/min]', fontsize=15 )
ax1.set(yticks=[]) 
ax1.xaxis.grid(True)
ax1.yaxis.grid(True)


## Train Model without specific features

In [None]:
feature_keys

In [None]:
not_used = ['ZielTempTreiberInC']  ## exclude feature
new_features = [e for e in feature_keys if e not in not_used]

Transform data in LSTM format:

In [None]:
def lstm_format_new_features(data):
    length_cc4 = 15.42
    seq_len = 50
    features = []
    labels = []
    data_set = {}

    for _, group in data.groupby('seq_id'):
        for i in range(len(group)-(seq_len-1)):
            features.append(group[new_features].iloc[i:i+seq_len].to_numpy())  
            labels.append(group[label_keys].iloc[i+(seq_len-1)].to_numpy())

    data_set['x'] = np.stack(features)
    data_set['y'] = np.stack(labels)
    return data_set

test_n = lstm_format_new_features(test_norm)
test_x_norm = test_n['x'].copy()
test_y_norm = test_n['y'].copy()

all_train_n = lstm_format_new_features(all_train_norm)
all_train_x_norm = all_train_n['x'].copy()
all_train_y_norm = all_train_n['y'].copy()

Train the best LSTM model with new features:

In [None]:
# LSTMDropout_model = keras.models.Sequential([
#     keras.layers.LSTM(16,
#                       input_shape=(all_train_x_norm.shape[1],all_train_x_norm.shape[2]),
#                       return_sequences=True,
#                       dropout=0.3,
#                       recurrent_dropout=0.3,
#                       kernel_constraint=keras.constraints.max_norm(max_value=1),
#                       recurrent_constraint=keras.constraints.max_norm(max_value=1),
#                       ),
    
#     keras.layers.LSTM(16,
#                     return_sequences=True,
#                     dropout=0.3,
#                     recurrent_dropout=0.3,
#                     kernel_constraint=keras.constraints.max_norm(max_value=1),
#                     recurrent_constraint=keras.constraints.max_norm(max_value=1),
#                      ),
    
#     keras.layers.LSTM(16,
#                     return_sequences=False,
#                     dropout=0.3,
#                     recurrent_dropout=0.3,
#                     kernel_constraint=keras.constraints.max_norm(max_value=1),
#                     recurrent_constraint=keras.constraints.max_norm(max_value=1),
#                     ),
    
#     keras.layers.Dense(2, kernel_initializer='he_normal')
# ])

# LSTMDropout_model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(lr=1e-3))
# LSTMDropout_model.summary()


# LSTMDropout_history = LSTMDropout_model.fit(all_train_x_norm, all_train_y_norm,
#                     epochs=99,
#                     batch_size=128, 
#                     verbose=2
#                    )

### Load Model

In [None]:
new_lstm = keras.models.load_model("../final_data/models/lstm_no_casting_target")

### RMSE of new Model:

In [None]:
y_pred_test= new_lstm.predict(test_x_norm)
y_test_unsc = scaler_y.inverse_transform(test_y_norm)
y_pred_test_unsc = scaler_y.inverse_transform(y_pred_test)
print('RMSE of new model:')
np.sqrt(mean_squared_error(y_pred_test_unsc, y_test_unsc))  

### Effect to prediction of a single sequence

Bring data in the LSTM format:

In [None]:
def lstm_format(data):
    length_cc4 = 15.42
    seq_len = 50
    features = []
    labels = []
    data_set = {}

    for _, group in data.groupby('seq_id'):
        for i in range(len(group)-(seq_len-1)):
            features.append(group[new_features].iloc[i:i+seq_len].to_numpy())   ##Achtung keys wieder ändern!
            labels.append(group[label_keys].iloc[i+(seq_len-1)].to_numpy())

    data_set['x'] = np.stack(features)
    data_set['y'] = np.stack(labels)
    return data_set

test_seq = test_norm['seq_id'].drop_duplicates()

x = {}
y = {}
for seq in test_seq:
    df = lstm_format(test_norm[test_norm['seq_id']==seq])
    x[seq]= df['x'].copy()
    y[seq] = df['y'].copy()

prediction = {}
real_values = {}
rmse = {}
for seq in test_seq:
    prediction[seq] = scaler_y.inverse_transform(new_lstm.predict(x[seq]))
    real_values[seq] = scaler_y.inverse_transform(y[seq])
    rmse[seq] = np.sqrt(mean_squared_error( prediction[seq],  real_values[seq]))  

error = []
for seq in test_seq:
    error.append((rmse[seq], seq))

Look at new prediction:

In [None]:
sequences_name = '473562_str_1'

fig, (ax1) = plt.subplots(1, figsize=(6, 5),  sharex=True, sharey=True)
ax1.plot(real_values[sequences_name][:,:1], color='black', label='Measured Temperature')
ax1.plot(prediction[sequences_name][:,:1], color='blue', label='Predicted Temperature')
ax1.legend(loc='best')
#ax1.set_title('Comparison Between Measured and Predicted Temperatures for Sequence '+str(sequences_name))
ax1.set_ylabel('Temperature')
ax1.tick_params(axis='both', which='both', labelbottom=True)
ax1.grid()
ax1.set_xlabel('Minutes', fontsize=15)
ax1.set_ylabel('Temperature [°C]',fontsize=15)
ax1.set(yticks=[]) 

print('RMSE:', rmse[sequences_name] )