In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt

dates = pd.date_range(start='2023-01-01', periods=1000, freq='H')
data = np.random.rand(1000, 2) * 100
df_dummy = pd.DataFrame(data, index=dates, columns=['Feature1', 'Feature2'])
df_dummy.index.name = 'Timestamp'
df_dummy.iloc[50:150, 0] = np.nan
df_dummy.iloc[200:300, 1] = np.nan
df_dummy.to_csv('time_series_data.csv')

df = pd.read_csv('time_series_data.csv', index_col=0, parse_dates=True)
df.fillna(method='ffill', inplace=True)
print("Data head:\n", df.head())

target_col = 'Feature1'
window = 10
values = df[[target_col]].values.astype('float32')

scaler = MinMaxScaler()
values_scaled = scaler.fit_transform(values)

X, y = [], []
for i in range(window, len(values_scaled)):
    X.append(values_scaled[i-window:i, 0])
    y.append(values_scaled[i, 0])
X = np.array(X)
y = np.array(y)

X = X.reshape((X.shape[0], X.shape[1], 1))

train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

model = models.Sequential([
    layers.LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    layers.LSTM(30),
    layers.Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=1)

y_pred_scaled = model.predict(X_test)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1,1))
y_pred_actual = scaler.inverse_transform(y_pred_scaled)

rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual))
mae = mean_absolute_error(y_test_actual, y_pred_actual)
r2 = r2_score(y_test_actual, y_pred_actual)

print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R2) Score: {r2:.4f}")

  dates = pd.date_range(start='2023-01-01', periods=1000, freq='H')
  df.fillna(method='ffill', inplace=True)
  super().__init__(**kwargs)


Data head:
                       Feature1   Feature2
Timestamp                                
2023-01-01 00:00:00  78.009455  33.891859
2023-01-01 01:00:00  38.746181  47.257566
2023-01-01 02:00:00  57.422555  89.251781
2023-01-01 03:00:00  24.199878  27.257694
2023-01-01 04:00:00  92.324953  81.964427
X_train shape: (792, 10, 1), y_train shape: (792,)
X_test shape: (198, 10, 1), y_test shape: (198,)


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 0.1794 - mae: 0.3507 - val_loss: 0.0833 - val_mae: 0.2428
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0804 - mae: 0.2423 - val_loss: 0.0827 - val_mae: 0.2439
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0754 - mae: 0.2327 - val_loss: 0.0820 - val_mae: 0.2429
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0773 - mae: 0.2333 - val_loss: 0.0815 - val_mae: 0.2423
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0810 - mae: 0.2437 - val_loss: 0.0857 - val_mae: 0.2481
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0799 - mae: 0.2353 - val_loss: 0.0817 - val_mae: 0.2429
Epoch 7/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0749



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Root Mean Squared Error (RMSE): 28.6198
Mean Absolute Error (MAE): 24.3697
R-squared (R2) Score: -0.0358
