In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

In [2]:
import warnings
warnings.filterwarnings('ignore')  # early-stop warnings

In [3]:
# Set the seed for the random number generator in NumPy
# Setting the random seed value isn't necessary, but doing so helps get reproducible results in most situations.
np.random.seed(1)

In [4]:
# Load the data for water content prediction
data = pd.read_csv("royal_city_daily_avg.csv") # Daily averages

# Split data into features and target variables
X = data[['wind_speed', 'solar_radiation', 'relative_humidity', 'air_temp', 'canopy_temp_mean', 'soil_temp']]
y_water_content = data['water_content_mean']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train_water_content, X_test_water_content, y_train_water_content, y_test_water_content = train_test_split(X_scaled, y_water_content, test_size=0.2, random_state=42)

In [5]:
# Load the data for water potential prediction
data = pd.read_csv("royal_city_late_morning_avg.csv") # Late morning averages (9:00AM - 11:00PM)

# Split data into features and target variables
X = data[['wind_speed', 'solar_radiation', 'relative_humidity', 'air_temp', 'canopy_temp_mean', 'soil_temp']]
y_water_potential = data['water_potential_mean']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train_water_potential, X_test_water_potential, y_train_water_potential, y_test_water_potential = train_test_split(X_scaled, y_water_potential, test_size=0.2, random_state=42)

In [6]:
# Define a function to train and evaluate models
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return {'MSE': mse, 'R2': r2, 'predicted_values': y_pred} 

In [7]:
# Define a parameter grid for MLPRegressor (Water Content Model)
param_grid_water_content = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50)],  # Experiment with different hidden layer configurations
    # Try different activation functions
    # Choices are: 'identity', 'logistic', 'tanh', 'relu'. 
    # If you use an 'sgd' solver', try 'tanh' activation first.
    # If you use an 'adam' solver', try 'relu' activation first.
    # 'activation': ['relu', 'tanh', 'identity', 'logistic'],
    'activation': ['relu'], 
    # Try different solvers
    # Choices are: 'adam', 'sgd', or 'lbfgs'.     
    # 'solver': ['adam', 'sgd', 'lbfgs'], 
    'solver': ['lbfgs'], 
    # 'alpha': [0.0001, 0.001, 0.01],  # Experiment with different regularization parameters
    'alpha': [0.01],
    # 'learning_rate': ['constant', 'adaptive', 'invscaling'],  # Try different learning rate strategies
    'learning_rate': ['invscaling'],
    # 'learning_rate_init' : [0.001, 0.01, 0.05, 0.10], # Experiment with different learning rates
    'learning_rate_init' : [0.001],
    'max_iter' : [1000], # Set the maximum number of training iterations
    'shuffle' : [True],
    'n_iter_no_change' : [50], 
    'nesterovs_momentum' : [False],
    'verbose' : [False]
}

# Define a parameter grid for MLPRegressor (Water Potential Model)
param_grid_water_potential = {
    'hidden_layer_sizes': [(100,), (50, 50), (100, 50)],  # Experiment with different hidden layer configurations
    # Try different activation functions
    # Choices are: 'identity', 'logistic', 'tanh', 'relu'. 
    # If you use an 'sgd' solver', try 'tanh' activation first.
    # If you use an 'adam' solver', try 'relu' activation first.
    # 'activation': ['relu', 'tanh', 'identity', 'logistic'],
    'activation': ['tanh'], 
    # Try different solvers
    # Choices are: 'adam', 'sgd', or 'lbfgs'.     
    # 'solver': ['adam', 'sgd', 'lbfgs'], 
    'solver': ['sgd'], 
    # 'alpha': [0.0001, 0.001, 0.01],  # Experiment with different regularization parameters
    'alpha': [0.01],
    # 'learning_rate': ['constant', 'adaptive', 'invscaling'],  # Try different learning rate strategies
    'learning_rate': ['constant'],
    # 'learning_rate': ['adaptive'],
    # 'learning_rate_init' : [0.001, 0.01, 0.05, 0.10], # Experiment with different learning rates
    'learning_rate_init' : [0.01],
    'max_iter' : [1000], # Set the maximum number of training iterations
    'shuffle' : [True],
    'n_iter_no_change' : [50], 
    'nesterovs_momentum' : [False],
    'verbose' : [False]
}

# Create a GridSearchCV object for MLPRegressor 
mlp_regressor = MLPRegressor()
mlp_cv_water_content = GridSearchCV(mlp_regressor, param_grid_water_content, cv=5, scoring='neg_mean_squared_error')
mlp_cv_water_potential = GridSearchCV(mlp_regressor, param_grid_water_potential, cv=5, scoring='neg_mean_squared_error')

In [8]:
def mlp_regressor_summary(params):
    """
    Prints a summary of an MLPRegressor model.

    Args:
        params: dictionary holding best model params.
    """

    print("Model Summary (Best Params):")
    # Convert the dictionary to a DataFrame
    df = pd.DataFrame(params.items(), columns=["Parameter", "Value"])
    # Print the DataFrame as a table
    print(df.to_markdown(index=False))   

In [None]:
# Train the GridSearchCV object
mlp_cv_water_content.fit(X_train_water_content, y_train_water_content)

# Get the best model with the lowest mean squared error
best_mlp = mlp_cv_water_content.best_estimator_

# Print best model summary
best_params = mlp_cv_water_content.best_params_
mlp_regressor_summary(best_params)

# Print best score
best_score = mlp_cv_water_content.best_score_
print("\nBest Score", best_score)

# Evaluate the best model
results_water_content = {}
results_water_content = train_and_evaluate(best_mlp, 
                                           X_train_water_content, 
                                           y_train_water_content, 
                                           X_test_water_content, 
                                           y_test_water_content
                                           )

In [None]:
# Train the GridSearchCV object
mlp_cv_water_potential.fit(X_train_water_potential, y_train_water_potential)

# Get the best model with the lowest mean squared error
best_mlp = mlp_cv_water_potential.best_estimator_

# Print best model summary
best_params = mlp_cv_water_potential.best_params_
mlp_regressor_summary(best_params)

# Print best score
best_score = mlp_cv_water_potential.best_score_
print("\nBest Score", best_score)

# Evaluate the best model
results_water_potential = {}
results_water_potential = train_and_evaluate(best_mlp, 
                                             X_train_water_potential, 
                                             y_train_water_potential, 
                                             X_test_water_potential, 
                                             y_test_water_potential
                                             )

In [None]:
# Print results
print('Best MLPRegressor Performance') 

print("Water Content Prediction Results:")
print(f"MSE={results_water_content['MSE']:.4f}, R2={results_water_content['R2']:.4f}")

print("\nWater Potential Prediction Results:")
print(f"MSE={results_water_potential['MSE']:.4f}, R2={results_water_potential['R2']:.4f}")

In [None]:
# Visualize results for water content prediction (scatter plots of predicted vs. actual values)
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))  # Create two subplots
model = 'MLPRegressor'

# Plot water content prediction
axs[0].scatter(y_test_water_content, results_water_content['predicted_values'], label=model)
axs[0].set_xlabel("Actual Water Content")
axs[0].set_ylabel("Predicted Water Content")
axs[0].set_title("Predicted vs. Actual Water Content")
axs[0].legend()

# Plot water potential prediction
axs[1].scatter(y_test_water_potential, results_water_potential['predicted_values'], label=model)
axs[1].set_xlabel("Actual Water Potential")
axs[1].set_ylabel("Predicted Water Potential")
axs[1].set_title("Predicted vs. Actual Water Potential")
axs[1].legend()

# Add text annotations with R² and MSE values
axs[0].text(0.45, 0.95, f"R²: {results_water_content['R2']:.4f}", ha='left', va='top', transform=axs[0].transAxes, fontsize=10)
axs[0].text(0.45, 0.9, f"MSE: {results_water_content['MSE']:.4f}", ha='left', va='top', transform=axs[0].transAxes, fontsize=10)

axs[1].text(0.45, 0.95, f"R²: {results_water_potential['R2']:.4f}", ha='left', va='top', transform=axs[1].transAxes, fontsize=10)
axs[1].text(0.45, 0.9, f"MSE: {results_water_potential['MSE']:.4f}", ha='left', va='top', transform=axs[1].transAxes, fontsize=10)

# Add diagonal line
for ax in axs:
    xlims = ax.get_xlim()
    ylims = ax.get_ylim()
    xmin, xmax = min(xlims), max(xlims)
    ymin, ymax = min(ylims), max(ylims)
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)
    ax.plot(xlims, ylims, 'k--', alpha=0.75, zorder=0) # Adjust line style, alpha, and zorder as needed

plt.tight_layout()
plt.show()