In [None]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D,UpSampling3D, Conv3DTranspose, Flatten, Concatenate, Dense, TimeDistributed, Bidirectional, Input, Reshape  
from keras.models import Model
from sklearn.model_selection import train_test_split
from keras.layers import LSTM
from keras.callbacks import EarlyStopping
from time import sleep
directory_path = "/home/arman_abouali/Downloads/DWD"

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=15000)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)



In [None]:
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.debugging.set_log_device_placement(True)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
def create_sequences(array, sequence_length):
    X = []
    for i in range(len(array)-sequence_length):
        end_idx = i + sequence_length
        sequence_x = array[i:end_idx]
        X.append(sequence_x)

    return np.array(X)

In [None]:
# Load the data
y_df = pd.read_csv('test.csv', sep=';', parse_dates=['Zeit'])
y_df.set_index('Zeit', inplace=True)
# Extract values for the desired date range
y = y_df.loc['2016-08-01':'2017-08-31'][['Margarethenklippe_Pegel_now', 'Sennhuette_Pegel_now']].values

print(f"Shape of y: {y.shape}")

In [None]:
y_df

In [None]:
scaler = MinMaxScaler()
y = scaler.fit_transform(y)
print(f"Shape of y_scaled: {y.shape}")

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

seq_length = 4

X_sensors_sequence = create_sequences(y, sequence_length=seq_length)

y_sensors_sequence = X_sensors_sequence[1:, -1, :]

X_sensors_sequence = X_sensors_sequence[:-1, :, :]

# 70/20/10 split of the dataset
train_len = int(X_sensors_sequence.shape[0]*0.7)
val_len = int(X_sensors_sequence.shape[0]*0.2)

X_sen_train = X_sensors_sequence[:train_len]
X_sen_val = X_sensors_sequence[train_len:train_len+val_len]
X_sen_test = X_sensors_sequence[train_len+val_len:]

y_train = y_sensors_sequence[:train_len]
y_val = y_sensors_sequence[train_len:train_len+val_len]
y_test = y_sensors_sequence[train_len+val_len:]

In [None]:
print("X_sen_train shape:", X_sen_train.shape)
print("X_sen_val shape:", X_sen_val.shape)
print("X_sen_test shape:", X_sen_test.shape)
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)
print("y_test shape:", y_test.shape)

In [None]:
# Create the LSTM model
model = Sequential()
model.add(LSTM(units=128, return_sequences=False, input_shape=(X_sen_train.shape[1], X_sen_train.shape[2])))
model.add(Dense(units=2))

# Compile the model
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(1e-5), metrics=['mae'])

model.summary()

# Training the model
# Uncomment the next line if you want to use EarlyStopping.
# callbacks = [tf.keras.callbacks.EarlyStopping(patience=0, monitor='val_loss')]
callbacks= []
history = model.fit(X_sen_train, y_train, epochs=1000, validation_data=(X_sen_val, y_val), batch_size=256, callbacks=callbacks)

In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
#plt.yscale('log')
plt.legend()
plt.show()

In [None]:
# prediction
y_hat_train = model.predict(X_sen_train)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# For the first column
axes[0].plot(y_hat_train[:, 0], label='Predicted 0')
axes[0].plot(y_train[:, 0], label='Reference 0', alpha=.5)
axes[0].legend()
axes[0].set_title('Margarethenklippe_Pegel_now')

# For the second column
axes[1].plot(y_hat_train[:, 1], label='Predicted 1')
axes[1].plot(y_train[:, 1], label='Reference 1', alpha=.5)
axes[1].legend()
axes[1].set_title('Sennhuette_Pegel_now')

plt.show()


In [None]:
y_hat_val = model.predict(X_sen_val)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# For the first column
axes[0].plot(y_hat_val[:, 0], label='Predicted 0')
axes[0].plot(y_val[:, 0], label='Reference 0', alpha=.5)
axes[0].legend()
axes[0].set_title('Margarethenklippe_LSTM_Validation')

# For the second column
axes[1].plot(y_hat_val[:, 1], label='Predicted 1')
axes[1].plot(y_val[:, 1], label='Reference 1', alpha=.5)
axes[1].legend()
axes[1].set_title('Sennhuette_LSTM_Validation')

plt.show()


In [None]:
y_hat_test = model.predict(X_sen_test)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# For the first column
axes[0].plot(y_hat_test[:, 0], label='Predicted 0')
axes[0].plot(y_test[:, 0], label='Reference 0', alpha=.5)
axes[0].legend()
axes[0].set_title('Margarethenklippe_LSTM_Test')

# For the second column
axes[1].plot(y_hat_test[:, 1], label='Predicted 1')
axes[1].plot(y_test[:, 1], label='Reference 1', alpha=.5)
axes[1].legend()
axes[1].set_title('Sennhuette_LSTM_Test')

plt.show()


In [None]:
# Define the metrics
def mse(y_true, y_pred):
    return ((y_true - y_pred) ** 2).mean()

def rmse(y_true, y_pred):
    return np.sqrt(mse(y_true, y_pred))

def mae(y_true, y_pred):
    return np.abs(y_true - y_pred).mean()

def r_squared(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - (ss_res / ss_tot)

datasets = {
    'train': (y_train, y_hat_train),
    'test': (y_test, y_hat_test),
    'val': (y_val, y_hat_val)
}
def round_metric(metric_value, decimals=6):
    return round(metric_value, decimals)

for name, (y_true, y_pred) in datasets.items():
    print(f"Metrics for {name} dataset:")
    print(f"MSE: {round_metric(mse(y_true, y_pred))}")
    print(f"RMSE: {round_metric(rmse(y_true, y_pred))}")
    print(f"MAE: {round_metric(mae(y_true, y_pred))}")
    print(f"R-squared: {round_metric(r_squared(y_true, y_pred))}") 
    print("-" * 30)


# List of column names
column_names = ['Margarethenklippe_Pegel_now','Sennhuette_Pegel_now']

# Function to plot residuals
def plot_residuals(y_true, y_pred, column_name):
    residuals = y_true - y_pred
    plt.scatter(y_pred, residuals, alpha=0.5)
    plt.axhline(0, color='r', linestyle='--')
    plt.title(f"Residual Plot for {column_name}")
    plt.xlabel(f"Predicted Values for {column_name}")
    plt.ylabel("Residuals")
    plt.show()

# Calculate residuals for each column
for i in range(y_test.shape[1]):
    y_true_column = y_test[:, i]
    y_pred_column = y_hat_test[:, i]
    
    plot_residuals(y_true_column, y_pred_column, column_names[i])

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
custom_index = 300
sequence_length = 8

# Calculate start index of y_test in y_df based on your provided splits
test_start_index = train_len + val_len

# Extract corresponding timestamps for y_test
test_time_stamps = y_df.index[test_start_index:test_start_index + len(y_test)]

# Now extract specific sequence timestamps for the custom index
sequence_time_stamps = test_time_stamps[custom_index:custom_index + sequence_length].strftime('%Y-%m-%d %H:%M:%S')

# Actual and predicted values for the sequence
actual_sequence = y_test[custom_index:custom_index + sequence_length]
predicted_sequence = y_hat_test[custom_index:custom_index + sequence_length]  # Assuming y_hat_test has the same structure as y_test

# Plotting the selected sequence for both columns
column_names = ['Margarethenklippe','Sennhuette']

for i in range(2):
    plt.figure(figsize=(10, 4))  # Adjust the figure size for better label readability
    plt.plot(sequence_time_stamps, actual_sequence[:, i], label='Actual', marker='o')
    plt.plot(sequence_time_stamps, predicted_sequence[:, i], label='Predicted', marker='x')
    plt.title(f"Actual vs Predicted for {column_names[i]}")
    plt.xlabel("Time stamp")
    plt.ylabel(column_names[i])
    plt.xticks(rotation=90)  # Rotate timestamps for better readability
    plt.legend()
    plt.tight_layout()  # Adjust layout for better visibility of timestamps
    plt.show()


In [None]:
test_time_stamps

In [None]:
np.random.seed(42) 
custom_index = 200  
sequence_length = 8 

# Actual and predicted values for the sequence
actual_sequence = y_test[custom_index:custom_index + sequence_length]
predicted_sequence = y_hat_test[custom_index:custom_index + sequence_length]

# Plotting the selected sequence for both columns
column_names = ['Margarethenklippe_Pegel_now','Sennhuette_Pegel_now']

for i in range(2):
    plt.figure(figsize=(10, 4))
    plt.plot(actual_sequence[:, i], label='Actual', marker='o')
    plt.plot(predicted_sequence[:, i], label='Predicted', marker='x')
    plt.title(f"Actual vs Predicted for {column_names[i]}")
    plt.xlabel("Time step")
    plt.ylabel(column_names[i])
    plt.legend()
    plt.show()


In [None]:
np.random.seed(42)  # For reproducibility
custom_index = 200  
sequence_length = 24 #6H

# Actual and predicted values for the sequence
actual_sequence = y_test[custom_index:custom_index + sequence_length]
predicted_sequence = y_hat_test[custom_index:custom_index + sequence_length]

# Plotting the selected sequence for both columns
column_names = ['Margarethenklippe_Pegel_now','Sennhuette_Pegel_now']

for i in range(2):
    plt.figure(figsize=(10, 4))
    plt.plot(actual_sequence[:, i], label='Actual', marker='o')
    plt.plot(predicted_sequence[:, i], label='Predicted', marker='x')
    plt.title(f"Actual vs Predicted for {column_names[i]}")
    plt.xlabel("Time step")
    plt.ylabel(column_names[i])
    plt.legend()
    plt.show()
