<a href="https://colab.research.google.com/github/bishair/Pirna/blob/main/PreprocessPrint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from google.colab import files
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from math import sqrt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [17]:
uploaded_file = files.upload()

Saving riverL.xlsx to riverL.xlsx


In [7]:
# Read the groundwater level data
gw_data = pd.read_excel('groundwater.xlsx')
gw_data['Date'] = pd.to_datetime(gw_data['Date'], format='%d/%m/%Y %H:%M')

In [8]:
print(gw_data)

                     Date      G10
0     2015-01-30 12:18:00  110.722
1     2015-01-30 13:18:00  110.720
2     2015-01-30 14:18:00  110.718
3     2015-01-30 15:18:00  110.715
4     2015-01-30 16:18:00  110.714
...                   ...      ...
17533 2017-01-30 10:19:00  109.456
17534 2017-01-30 11:19:00  109.455
17535 2017-01-30 12:19:00  109.456
17536 2017-01-30 13:19:00  109.457
17537 2017-01-30 14:19:00  109.459

[17538 rows x 2 columns]


In [18]:
 # Read the river water level data
river_data = pd.read_excel('riverL.xlsx')
river_data['Date'] = pd.to_datetime(river_data['Date'], format='%d/%m/%Y %H:%M')


In [19]:
print(river_data)

                     Date  River
0     2014-12-31 23:00:00    192
1     2015-01-02 08:00:00    206
2     2015-01-02 20:00:00    207
3     2015-01-04 08:00:00    211
4     2015-01-04 14:00:00    207
...                   ...    ...
23772 2017-12-31 19:00:00    260
23773 2017-12-31 20:00:00    260
23774 2017-12-31 21:00:00    260
23775 2017-12-31 22:00:00    260
23776 2017-12-31 23:00:00    260

[23777 rows x 2 columns]


In [20]:
 # Merge the two datasets on the 'Date' column
merged_data = pd.merge_asof(gw_data.sort_values('Date'), river_data.sort_values('Date'), on='Date', direction='nearest')

In [21]:
print(merged_data)

                     Date      G10  River
0     2015-01-30 12:18:00  110.722    261
1     2015-01-30 13:18:00  110.720    260
2     2015-01-30 14:18:00  110.718    260
3     2015-01-30 15:18:00  110.715    260
4     2015-01-30 16:18:00  110.714    260
...                   ...      ...    ...
17533 2017-01-30 10:19:00  109.456    136
17534 2017-01-30 11:19:00  109.455    138
17535 2017-01-30 12:19:00  109.456    141
17536 2017-01-30 13:19:00  109.457    144
17537 2017-01-30 14:19:00  109.459    146

[17538 rows x 3 columns]


In [22]:
 # Set 'Date' as the index
merged_data.set_index('Date', inplace=True)

In [23]:
print(merged_data)

                         G10  River
Date                               
2015-01-30 12:18:00  110.722    261
2015-01-30 13:18:00  110.720    260
2015-01-30 14:18:00  110.718    260
2015-01-30 15:18:00  110.715    260
2015-01-30 16:18:00  110.714    260
...                      ...    ...
2017-01-30 10:19:00  109.456    136
2017-01-30 11:19:00  109.455    138
2017-01-30 12:19:00  109.456    141
2017-01-30 13:19:00  109.457    144
2017-01-30 14:19:00  109.459    146

[17538 rows x 2 columns]


In [24]:
# Resample the data to hourly frequency and forward fill missing values
resampled_data = merged_data.resample('H').ffill()


In [25]:
print(resampled_data)

                         G10  River
Date                               
2015-01-30 12:00:00      NaN    NaN
2015-01-30 13:00:00  110.722  261.0
2015-01-30 14:00:00  110.720  260.0
2015-01-30 15:00:00  110.718  260.0
2015-01-30 16:00:00  110.715  260.0
...                      ...    ...
2017-01-30 10:00:00  109.457  135.0
2017-01-30 11:00:00  109.456  136.0
2017-01-30 12:00:00  109.455  138.0
2017-01-30 13:00:00  109.456  141.0
2017-01-30 14:00:00  109.457  144.0

[17547 rows x 2 columns]


In [26]:
 # Reset index so that 'Date' is a column again
resampled_data.reset_index(inplace=True)

In [27]:
print(resampled_data)

                     Date      G10  River
0     2015-01-30 12:00:00      NaN    NaN
1     2015-01-30 13:00:00  110.722  261.0
2     2015-01-30 14:00:00  110.720  260.0
3     2015-01-30 15:00:00  110.718  260.0
4     2015-01-30 16:00:00  110.715  260.0
...                   ...      ...    ...
17542 2017-01-30 10:00:00  109.457  135.0
17543 2017-01-30 11:00:00  109.456  136.0
17544 2017-01-30 12:00:00  109.455  138.0
17545 2017-01-30 13:00:00  109.456  141.0
17546 2017-01-30 14:00:00  109.457  144.0

[17547 rows x 3 columns]
