In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os.path
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing

In [2]:
filtered_data_dir="filtered_displacements" 
raw_sensors_dir = "raw_sensors"
raw_data_file = """/Cluster_Sim_{0}.txt"""
filtered_data_file = """/Cluster_Sim_{0}_filtered.npz"""

In [3]:
clustersim_lhs = pd.read_excel('clustersim_lhs.xlsx', sheet_name='Zuordnung_Messdaten')

In [4]:
def readAndCombineData(messDatei, n,fz,ae,R1,R2):
    if(os.path.isfile(filtered_data_dir+filtered_data_file.format(messDatei)) and os.path.isfile(f'{raw_sensors_dir}/{raw_data_file.format(messDatei)}')):
        data_filtered = np.load(filtered_data_dir+filtered_data_file.format(messDatei))
        data_raw = np.loadtxt(f'{raw_sensors_dir}/{raw_data_file.format(messDatei)}', skiprows=8, usecols=[1, 2, 3, 4, 5, 6, 7])
        X_df=pd.DataFrame({'Time':data_filtered["time"], 'n':n, 'fz':fz, 'ae':ae, 'R1':R1, 'R2':R2,'Fx': data_raw[:,0],'Fy': data_raw[:,1], 'Fz': data_raw[:,2]})
        Y_df=pd.DataFrame({'Dx':data_filtered["dx"], 'Dy': data_filtered["dy"]})
        return X_df,Y_df

def getData():
    frames=[]
    for i in range(len(clustersim_lhs)):
        n=clustersim_lhs['n'][i]
        fz=clustersim_lhs['fz'][i]
        ae=clustersim_lhs['ae'][i]
        R1=clustersim_lhs['R1'][i]
        R2=clustersim_lhs['R2'][i]
        Messdatei=clustersim_lhs['Messdatei'][i]
        frames.append(readAndCombineData(Messdatei,n,fz,ae,R1,R2))

    combined_data = pd.concat(frames)
    return combined_data

In [5]:
X_df,Y_df = readAndCombineData('V0_1001',13499,0.05,7,7,17)

In [6]:
X_df.head()

Unnamed: 0,Time,n,fz,ae,R1,R2,Fx,Fy,Fz
0,0.0,13499,0.05,7,7,17,-0.478649,-0.053215,0.367355
1,1e-05,13499,0.05,7,7,17,-0.505972,-0.083542,0.371647
2,2e-05,13499,0.05,7,7,17,-0.516701,-0.054645,0.492096
3,3e-05,13499,0.05,7,7,17,-0.548458,-0.050497,0.456905
4,4e-05,13499,0.05,7,7,17,-0.502682,-0.073385,0.356197


In [7]:
Y_df.head()

Unnamed: 0,Dx,Dy
0,9.177191e-07,-5.411851e-07
1,5.4399e-07,-9.584568e-07
2,-2.878134e-08,-1.477918e-06
3,-1.385935e-07,-1.711568e-06
4,4.465556e-07,-1.440406e-06


In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X_df, Y_df, test_size=0.2, random_state=42)

In [None]:
base_model = LinearRegression()
multi_output_model = MultiOutputRegressor(base_model)
multi_output_model.fit(X_train, Y_train)

In [14]:
Y_pred = multi_output_model.predict(X_test)
Y_pred_df = pd.DataFrame(Y_pred, columns=['Y1_Pred', 'Y2_Pred'])

In [None]:
comparison = pd.concat([Y_test.reset_index(drop=True), Y_pred_df], axis=1)
print(comparison.head())
comparison.to_excel("predictions.xlsx")

In [None]:
mse_Y1 = mean_squared_error(Y_test['Dx'], Y_pred[:, 0])
mse_Y2 = mean_squared_error(Y_test['Dy'], Y_pred[:, 1])

print(f'MSE for Y1: {mse_Y1}')
print(f'MSE for Y2: {mse_Y2}')

### CNN Approach

In [10]:
data_array= np.array(X_df.get(["Fx","Fy","Fz"]))
len(data_array)

1183744

In [11]:
min_max_scaler = preprocessing.MinMaxScaler()
data_normalized = min_max_scaler.fit_transform(data_array)
data_normalized

array([[0.49772308, 0.66256973, 0.76986487],
       [0.49765046, 0.66245795, 0.76991494],
       [0.49762194, 0.66256445, 0.7713201 ],
       ...,
       [0.49818239, 0.66349026, 0.77520513],
       [0.49821205, 0.66350608, 0.77569911],
       [0.49823486, 0.66370484, 0.77586932]])

In [16]:
data= pd.concat([pd.DataFrame(data_normalized,columns=["Fx","Fy","Fz"]),Y_df["Dx"]], axis=1)

In [17]:
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [19]:
X,y = split_sequences(np.array(data),3)

In [20]:
print(X.shape, y.shape)
# summarize the data
# for i in range(len(X)):
# 	print(X[i], y[i])

(1183742, 3, 3) (1183742,)


In [21]:
from keras.models import Sequential
from keras.layers import Conv1D, Dense, GlobalAveragePooling1D, MaxPooling1D, Flatten
from sklearn.model_selection import train_test_split

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [23]:
n_steps=3
n_features = 3
# define model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
model.fit(X_train, y_train, epochs=100, verbose=0)

<keras.src.callbacks.history.History at 0x15ee6401610>

In [25]:
X_test

array([[[0.4987204 , 0.66326777, 0.77184411],
        [0.49876451, 0.66306374, 0.77158044],
        [0.49864207, 0.66310117, 0.77199097]],

       [[0.49968084, 0.66372224, 0.77280535],
        [0.4996219 , 0.66397742, 0.77370319],
        [0.49960251, 0.66397373, 0.7728888 ]],

       [[0.4978151 , 0.66383876, 0.77692737],
        [0.49773031, 0.66389992, 0.77783186],
        [0.49761852, 0.66364843, 0.77782859]],

       ...,

       [[0.49857326, 0.66403594, 0.77225465],
        [0.49852459, 0.66395739, 0.77203102],
        [0.49838162, 0.66368375, 0.77207441]],

       [[0.49846793, 0.66308219, 0.76772878],
        [0.49855577, 0.66328992, 0.768166  ],
        [0.4985877 , 0.6633089 , 0.76779886]],

       [[0.49700333, 0.66254758, 0.77533197],
        [0.4971999 , 0.6629546 , 0.77662698],
        [0.49702081, 0.66285337, 0.7757158 ]]])

In [26]:
yhat = model.predict(X_test, verbose=0) 

In [27]:
print(yhat)

[[-6.9355774e-06]
 [-6.9355774e-06]
 [-6.9355774e-06]
 ...
 [-6.9355774e-06]
 [-6.9355774e-06]
 [-6.9355774e-06]]


In [101]:
test_vs_pred = pd.DataFrame({'actual':y_test, 'predicted':yhat.flatten()})

In [105]:
test_vs_pred.to_excel("predictions.xlsx");

In [None]:
test_vs_pred.plot(figsize=(12,5))