<a href="https://colab.research.google.com/github/bishair/Pirna/blob/main/Print.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from google.colab import files
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from math import sqrt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [6]:
uploaded_file = files.upload()

Saving riverL.xlsx to riverL.xlsx


In [3]:
# Read the groundwater level data
gw_data = pd.read_excel('groundwater.xlsx')
gw_data['Date'] = pd.to_datetime(gw_data['Date'], format='%d/%m/%Y %H:%M')

In [4]:
print(gw_data)

                     Date      G10
0     2015-01-30 12:18:00  110.722
1     2015-01-30 13:18:00  110.720
2     2015-01-30 14:18:00  110.718
3     2015-01-30 15:18:00  110.715
4     2015-01-30 16:18:00  110.714
...                   ...      ...
17533 2017-01-30 10:19:00  109.456
17534 2017-01-30 11:19:00  109.455
17535 2017-01-30 12:19:00  109.456
17536 2017-01-30 13:19:00  109.457
17537 2017-01-30 14:19:00  109.459

[17538 rows x 2 columns]


In [7]:
 # Read the river water level data
river_data = pd.read_excel('riverL.xlsx')
river_data['Date'] = pd.to_datetime(river_data['Date'], format='%d/%m/%Y %H:%M')


In [8]:
print(river_data)

                     Date  River
0     2014-12-31 23:00:00    192
1     2015-01-02 08:00:00    206
2     2015-01-02 20:00:00    207
3     2015-01-04 08:00:00    211
4     2015-01-04 14:00:00    207
...                   ...    ...
23772 2017-12-31 19:00:00    260
23773 2017-12-31 20:00:00    260
23774 2017-12-31 21:00:00    260
23775 2017-12-31 22:00:00    260
23776 2017-12-31 23:00:00    260

[23777 rows x 2 columns]


In [9]:
 # Merge the two datasets on the 'Date' column
merged_data = pd.merge_asof(gw_data.sort_values('Date'), river_data.sort_values('Date'), on='Date', direction='nearest')

In [10]:
print(merged_data)

                     Date      G10  River
0     2015-01-30 12:18:00  110.722    261
1     2015-01-30 13:18:00  110.720    260
2     2015-01-30 14:18:00  110.718    260
3     2015-01-30 15:18:00  110.715    260
4     2015-01-30 16:18:00  110.714    260
...                   ...      ...    ...
17533 2017-01-30 10:19:00  109.456    136
17534 2017-01-30 11:19:00  109.455    138
17535 2017-01-30 12:19:00  109.456    141
17536 2017-01-30 13:19:00  109.457    144
17537 2017-01-30 14:19:00  109.459    146

[17538 rows x 3 columns]


In [11]:
 # Set 'Date' as the index
merged_data.set_index('Date', inplace=True)

In [12]:
print(merged_data)

                         G10  River
Date                               
2015-01-30 12:18:00  110.722    261
2015-01-30 13:18:00  110.720    260
2015-01-30 14:18:00  110.718    260
2015-01-30 15:18:00  110.715    260
2015-01-30 16:18:00  110.714    260
...                      ...    ...
2017-01-30 10:19:00  109.456    136
2017-01-30 11:19:00  109.455    138
2017-01-30 12:19:00  109.456    141
2017-01-30 13:19:00  109.457    144
2017-01-30 14:19:00  109.459    146

[17538 rows x 2 columns]


In [13]:
# Resample the data to hourly frequency and forward fill missing values
resampled_data = merged_data.resample('H').ffill()


In [14]:
print(resampled_data)

                         G10  River
Date                               
2015-01-30 12:00:00      NaN    NaN
2015-01-30 13:00:00  110.722  261.0
2015-01-30 14:00:00  110.720  260.0
2015-01-30 15:00:00  110.718  260.0
2015-01-30 16:00:00  110.715  260.0
...                      ...    ...
2017-01-30 10:00:00  109.457  135.0
2017-01-30 11:00:00  109.456  136.0
2017-01-30 12:00:00  109.455  138.0
2017-01-30 13:00:00  109.456  141.0
2017-01-30 14:00:00  109.457  144.0

[17547 rows x 2 columns]


# Preprocess DATA for LSTM model input

In [15]:
# Handling missing values
data = resampled_data.fillna(method='ffill')  # Forward fill
print(data)

                         G10  River
Date                               
2015-01-30 12:00:00      NaN    NaN
2015-01-30 13:00:00  110.722  261.0
2015-01-30 14:00:00  110.720  260.0
2015-01-30 15:00:00  110.718  260.0
2015-01-30 16:00:00  110.715  260.0
...                      ...    ...
2017-01-30 10:00:00  109.457  135.0
2017-01-30 11:00:00  109.456  136.0
2017-01-30 12:00:00  109.455  138.0
2017-01-30 13:00:00  109.456  141.0
2017-01-30 14:00:00  109.457  144.0

[17547 rows x 2 columns]


In [16]:
 # Normalizing the data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

In [22]:
print(data_scaled)
print(data_scaled.shape)

[[       nan        nan]
 [0.8510427  0.66666667]
 [0.85004965 0.66287879]
 ...
 [0.22194638 0.20075758]
 [0.2224429  0.21212121]
 [0.22293942 0.22348485]]
(17547, 2)


In [23]:
# Define window size
n_steps = 5

X, y = [], []
for i in range(n_steps, len(data_scaled)):
  X.append(data_scaled[i-n_steps:i, :])
  y.append(data_scaled[i, 0])
X, y = np.array(X), np.array(y)



In [24]:
print (X)
print(X.shape)

[[[       nan        nan]
  [0.8510427  0.66666667]
  [0.85004965 0.66287879]
  [0.8490566  0.66287879]
  [0.84756703 0.66287879]]

 [[0.8510427  0.66666667]
  [0.85004965 0.66287879]
  [0.8490566  0.66287879]
  [0.84756703 0.66287879]
  [0.84707051 0.66287879]]

 [[0.85004965 0.66287879]
  [0.8490566  0.66287879]
  [0.84756703 0.66287879]
  [0.84707051 0.66287879]
  [0.84607746 0.66287879]]

 ...

 [[0.22542205 0.18181818]
  [0.224429   0.1780303 ]
  [0.224429   0.18181818]
  [0.22293942 0.18939394]
  [0.2224429  0.19318182]]

 [[0.224429   0.1780303 ]
  [0.224429   0.18181818]
  [0.22293942 0.18939394]
  [0.2224429  0.19318182]
  [0.22194638 0.20075758]]

 [[0.224429   0.18181818]
  [0.22293942 0.18939394]
  [0.2224429  0.19318182]
  [0.22194638 0.20075758]
  [0.2224429  0.21212121]]]
(17542, 5, 2)


In [25]:
print(y)
print(y.shape)

[0.84707051 0.84607746 0.84508441 ... 0.22194638 0.2224429  0.22293942]
(17542,)


In [37]:
train_size = int(len(X) * 0.7)
test_size = len(X) - train_size
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
print(train_size)
print(test_size)
print(train_size+test_size)
print(X_train.shape, 'X-train shape')
print(X_test.shape, 'X-test shape')
print(y_train.shape, 'y-train shape')
print(X_test.shape, 'y-test shape')

12279
5263
17542
(12279, 5, 2) X-train shape
(5263, 5, 2) X-test shape
(12279,) y-train shape
(5263, 5, 2) y-test shape
