In [1]:
import numpy as np
import pandas as pd
import joblib

def preprocess_input(new_data, scaler, window_size=195, features=['Soil_Moisture', 'sin_day', 'cos_day', 'month'], date_format='%Y-%m-%d'):
    """
    Preprocesses the input data for the LSTM model.

    Parameters:
    - new_data (pd.DataFrame): The complete dataset including the latest records.
      Must contain 'Date', 'Soil_Moisture', 'Latitude', 'Longitude' columns.
    - scaler (MinMaxScaler): The scaler fitted on the training data.
    - window_size (int): Number of past records to use for prediction.
    - features (list): List of feature column names to be used.
    - date_format (str): The format of the 'Date' column in new_data (e.g., '%Y-%m-%d').

    Returns:
    - np.ndarray: The preprocessed data reshaped to (1, window_size, num_features).
    """
    # Parse 'Date' column to datetime and set as index
    new_data['Date'] = pd.to_datetime(new_data['Date'], format=date_format)
    new_data.set_index('Date', inplace=True)
    
    # Sort the data by date in ascending order
    new_data.sort_index(inplace=True)
    
    # Feature Engineering
    new_data['day_of_year'] = new_data.index.dayofyear
    new_data['sin_day'] = np.sin(2 * np.pi * new_data['day_of_year'] / 365.25)
    new_data['cos_day'] = np.cos(2 * np.pi * new_data['day_of_year'] / 365.25)
    new_data['month'] = new_data.index.month
    
    # Select relevant features
    df_features = new_data[features]
    
    # Check for missing values
    if df_features.isnull().values.any():
        raise ValueError("Input data contains missing values. Please handle them before prediction.")
    
    # Scale the data using the fitted scaler
    scaled_data = scaler.transform(df_features)
    
    # Select the latest 'window_size' records
    if len(scaled_data) < window_size:
        raise ValueError(f"Insufficient data: requires at least {window_size} records, but got {len(scaled_data)}.")
    
    latest_window = scaled_data[-window_size:]
    
    # Reshape to (1, window_size, num_features) for LSTM input
    reshaped_input = latest_window.reshape((1, window_size, len(features)))
    
    return reshaped_input


In [2]:
def postprocess_output(prediction, scaler, features=['Soil_Moisture', 'sin_day', 'cos_day', 'month']):
    """
    Converts the model's scaled prediction back to the original soil moisture scale.

    Parameters:
    - prediction (np.ndarray): The raw prediction output from the model (scaled).
      Typically of shape (1, 1).
    - scaler (MinMaxScaler): The scaler fitted on the training data.
    - features (list): List of feature column names used during scaling.

    Returns:
    - float: The soil moisture value in the original scale.
    """
    # Ensure the prediction is a NumPy array
    if not isinstance(prediction, np.ndarray):
        prediction = np.array(prediction)
    
    # Flatten prediction if necessary
    if prediction.ndim == 2 and prediction.shape[1] == 1:
        prediction = prediction.flatten()
    elif prediction.ndim != 1:
        raise ValueError(f"Unexpected prediction shape: {prediction.shape}")
    
    # Create a placeholder array with zeros for other features
    placeholder = np.zeros((len(prediction), len(features)))
    
    # Assign the predicted soil moisture values to the 'Soil_Moisture' column (assumed to be the first feature)
    placeholder[:, 0] = prediction
    
    # Apply the inverse transformation to revert scaling
    inversed = scaler.inverse_transform(placeholder)
    
    # Extract the 'Soil_Moisture' values from the first column
    soil_moisture = inversed[:, 0]
    
    # Return as a single float value
    return float(soil_moisture[0])


In [5]:
import joblib
from tensorflow.keras.models import load_model

# Load the scaler
scaler = joblib.load('scaler.joblib')

# Load the trained model
# If your model uses custom layers like 'Attention', ensure they are defined before loading
model = load_model('Soil_m_1.h5')


In [6]:
import pandas as pd

# Define target coordinates (Ignored in preprocessing)
# Since all input data is from the same location, latitude and longitude are not used
# target_latitude = 42.154439
# target_longitude = -119.161597

# Load your data
# Ensure 'Date' is in 'YYYY-MM-DD' format or adjust the 'date_format' parameter accordingly
df = pd.read_csv('specific_region_data.csv', parse_dates=['Date'])

# Predict the next day's soil moisture
try:
    # Preprocess the input data
    processed_input = preprocess_input(
        new_data=df,
        scaler=scaler,
        window_size=195,
        features=['Soil_Moisture', 'sin_day', 'cos_day', 'month'],
        date_format='%Y-%m-%d'  # Adjust if your date format is different
    )
    
    # Make a prediction
    raw_prediction = model.predict(processed_input)
    
    # Postprocess the prediction to get the actual soil moisture value
    actual_soil_moisture = postprocess_output(
        prediction=raw_prediction,
        scaler=scaler,
        features=['Soil_Moisture', 'sin_day', 'cos_day', 'month']
    )
    
    print(f"Predicted Soil Moisture for the next day: {actual_soil_moisture:.4f}")
    
except Exception as e:
    print(f"An error occurred during prediction: {e}")


Predicted Soil Moisture for the next day: 0.2707
