<a href="https://colab.research.google.com/github/bishair/Pirna/blob/main/ModelPirna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and upload excel files for groundwater level and river water level separately

In [2]:
import numpy as np
import pandas as pd
from google.colab import files
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from math import sqrt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [4]:
uploaded_file = files.upload()

Saving riverL.xlsx to riverL.xlsx


# Main Functions
**merge_and_resample:**

Merge and preprocess data from two Excel files containing groundwater and river water level data along with their respective dates, function  reads the two files, merges the data on the date column, resamples it to an hourly frequency, and then applies forward fill to handle missing hours.

**preprocess_data_for_lstm:**

Handle missing values for groundwater level and river water level by using the previous value. Normalize the data in the range 0 to 1. Return numpy arrays for X and y and create sequences using windows of given length.

**Train model:**
Specify optimizer, loss function, batch size, epochs and number of neurons and layers for model training

In [5]:
#Use consistent time data, in case of missing observations use forward fill to have consistent time data
#date_format= '%Y-%m-%d %H:%M:%S'

def merge_and_resample(file_groundwater, file_river, date_format_groundwater, date_format_river):
    # Read the groundwater level data
    gw_data = pd.read_excel(file_groundwater)
    gw_data['Date'] = pd.to_datetime(gw_data['Date'], format=date_format_groundwater)

    # Read the river water level data
    river_data = pd.read_excel(file_river)
    river_data['Date'] = pd.to_datetime(river_data['Date'], format=date_format_river)

    # Merge the two datasets on the 'Date' column
    merged_data = pd.merge_asof(gw_data.sort_values('Date'), river_data.sort_values('Date'), on='Date', direction='nearest')

    # Set 'Date' as the index
    merged_data.set_index('Date', inplace=True)

    # Create a new date range that starts from the first timestamp in merged_data
    start_date = merged_data.index.min()
    end_date = merged_data.index.max()
    new_date_range = pd.date_range(start=start_date, end=end_date, freq='H')

    # Reindex and resample merged_data with the new date range
    resampled_data = merged_data.reindex(new_date_range).ffill().bfill()

    return resampled_data



In [6]:

def preprocess_data_for_lstm(data, n_steps):
    """
    Preprocess the data for LSTM model to predict 'G10' using past values of 'G10' and 'River'.
    - data: pandas DataFrame containing 'G10' and 'River' columns.
    - n_steps: number of time steps to use for predicting the next time step.

    Returns:
    - X, y: numpy arrays of features and labels suitable for LSTM model.
    """
    # Handling missing values using forward fill
    data = data.fillna(method='ffill')

    # Normalizing the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    data_scaled = scaler.fit_transform(data)

    # Creating sequences
    X, y = [], []
    for i in range(n_steps, len(data_scaled)):
        X.append(data_scaled[i-n_steps:i, :])
        y.append(data_scaled[i, 0])

    X, y = np.array(X), np.array(y)
    # Split the data into training and testing sets
    train_size = int(len(X) * 0.7)
    test_size = len(X) - train_size
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    return X_train, y_train, X_test, y_test



In [7]:
def train_model(x_train, y_train, input_shape, epochs=100, batch_size=32):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        LSTM(units=50),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
    return model

In [8]:
def evaluate_model(model, x_test, y_test):
    # Evaluate the model's performance on the test data
    # It returns a list: [loss, mse]
    loss, mse = model.evaluate(x_test, y_test)
    return loss, mse


# Forward fill the missing hours by calling merge_and_resample function

In [9]:
merged_df = merge_and_resample('groundwater.xlsx','riverL.xlsx', '%d/%m/%Y %H:%M' ,'%d/%m/%Y %H:%M')

# Preprocess and define window size for LSTM model by using function(preprocess_data_for_lstm)

In [10]:
n_steps = 24
X_train, y_train, X_test, y_test = preprocess_data_for_lstm(merged_df, n_steps)

# Train model

In [11]:
"""
input_shape  includes two dimensions: (time_steps, features)
Time Steps: Number of time intervals or sequences each input sample consists of.
Features: Number of variables observed at each time step.
"""

input_shape=(X_train.shape[1], X_train.shape[2])
epochs=40
batch_size=64
trained_model = train_model(X_train, y_train, input_shape, epochs, batch_size)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


# Evaluate Model by calculating MSE

In [12]:
evaluate_model(trained_model, X_test, y_test)




TypeError: ignored