In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from tabulate import tabulate
import joblib
import os

In [25]:
# Load the data
data = pd.read_csv("royal_city_daily_avg.csv") # Daily averages
# data = pd.read_csv("royal_city_late_morning_avg.csv") # Late morning averages (9:00AM - 11:00PM)

# Split data into features and target variables
X = data[['wind_speed', 'solar_radiation', 'relative_humidity', 'air_temp', 'canopy_temp_mean', 'soil_temp']]
y_water_content = data['water_content_mean']
y_water_potential = data['water_potential_mean']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train_water_content, X_test_water_content, y_train_water_content, y_test_water_content = train_test_split(X_scaled, y_water_content, test_size=0.2, random_state=42)
X_train_water_potential, X_test_water_potential, y_train_water_potential, y_test_water_potential = train_test_split(X_scaled, y_water_potential, test_size=0.2, random_state=42)

In [26]:
# Define a function to train and evaluate models
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, r2, y_pred    

# List of models to try
models = [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), GradientBoostingRegressor(), SVR(), MLPRegressor()]

In [27]:
# Train and evaluate models for water content prediction
results_water_content = {}
for model in models:
    # joblib.dump(model, f"models/{model.__class__.__name__}_model_water_content.pkl") # Save water content model
    mse, r2, y_pred = train_and_evaluate(model, X_train_water_content, y_train_water_content, X_test_water_content, y_test_water_content)
    results_water_content[str(model)] = {'MSE': mse, 'R2': r2, 'predicted_values': y_pred}

In [None]:
# Train and evaluate models for water potential prediction
results_water_potential = {}
for model in models:
    # joblib.dump(model, f"models/{model.__class__.__name__}_model_water_potential.pkl") # Save water potential model
    mse, r2, y_pred = train_and_evaluate(model, X_train_water_potential, y_train_water_potential, X_test_water_potential, y_test_water_potential)
    results_water_potential[str(model)] = {'MSE': mse, 'R2': r2, 'predicted_values': y_pred}

In [None]:
# Print results
print("Water Content Prediction Results:")
for model, results in results_water_content.items():
    print(f"{model}: MSE={results['MSE']:.4f}, R2={results['R2']:.4f}")

print("\nWater Potential Prediction Results:")
for model, results in results_water_potential.items():
    print(f"{model}: MSE={results['MSE']:.4f}, R2={results['R2']:.4f}")

In [None]:
# Create lists of model names, MSE, and R2 values
model_names = []
mse_values = []
r2_values = []

for model, results in results_water_content.items():
    model_names.append(model)
    mse_values.append(results['MSE'])
    r2_values.append(results['R2'])

# Create a list of dictionaries with results
results_table_water_content = [{'Model': model_name, 'MSE': mse_value, 'R2': r2_value} for model_name, mse_value, r2_value in zip(model_names, mse_values, r2_values)]

# Print the table
print("Water Content Prediction Results:")
print(tabulate(results_table_water_content, headers='keys', tablefmt='grid'))

In [None]:
# Create lists of model names, MSE, and R2 values for water potential
model_names_water_potential = []
mse_values_water_potential = []
r2_values_water_potential = []

for model, results in results_water_potential.items():
    model_names_water_potential.append(model)
    mse_values_water_potential.append(results['MSE'])
    r2_values_water_potential.append(results['R2'])

# Create a list of dictionaries with results for water potential
results_table_water_potential = [{'Model': model_name, 'MSE': mse_value, 'R2': r2_value} for model_name, mse_value, r2_value in zip(model_names_water_potential, mse_values_water_potential, r2_values_water_potential)]

# Print the table for water potential prediction
print("\nWater Potential Prediction Results:")
print(tabulate(results_table_water_potential, headers='keys', tablefmt='grid'))

In [None]:
# Visualize results for water content prediction (scatter plots of predicted vs. actual values)
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 15))

for i, (model, results) in enumerate(results_water_content.items()):
    model_predictions = results['predicted_values']
    axs[i // 2, i % 2].scatter(y_test_water_content, model_predictions, label=model)  # Use correct subplot indexing
    axs[i // 2, i % 2].set_xlabel("Actual Water Content")
    axs[i // 2, i % 2].set_ylabel("Predicted Water Content")
    axs[i // 2, i % 2].set_title(f"Predicted vs. Actual Water Content - {model}")
    axs[i // 2, i % 2].legend()
    # Add text annotations with R² and MSE values
    axs[i // 2, i % 2].text(0.45, 0.85, f"R²: {results['R2']:.4f}", ha='left', va='top', transform=axs[i // 2, i % 2].transAxes, fontsize=10)
    axs[i // 2, i % 2].text(0.45, 0.80, f"MSE: {results['MSE']:.4f}", ha='left', va='top', transform=axs[i // 2, i % 2].transAxes, fontsize=10)

# Add diagonal line
for ax in axs.flat:  # Iterate through flattened subplots
    xlims = ax.get_xlim()
    ylims = ax.get_ylim()
    xmin, xmax = min(xlims), max(xlims)
    ymin, ymax = min(ylims), max(ylims)
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)
    ax.plot(xlims, ylims, 'k--', alpha=0.75, zorder=0)

plt.tight_layout()
plt.show()

In [None]:
# Visualize results for water potential prediction
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 15))

for i, (model, results) in enumerate(results_water_content.items()):
    model_predictions = results['predicted_values']
    axs[i // 2, i % 2].scatter(y_test_water_potential, model_predictions, label=model)  # Use correct subplot indexing
    axs[i // 2, i % 2].set_xlabel("Actual Water Potential")
    axs[i // 2, i % 2].set_ylabel("Predicted Water Potential")
    axs[i // 2, i % 2].set_title(f"Predicted vs. Actual Water Potential - {model}")
    axs[i // 2, i % 2].legend()
    # Add text annotations with R² and MSE values
    axs[i // 2, i % 2].text(0.45, 0.85, f"R²: {results['R2']:.4f}", ha='left', va='top', transform=axs[i // 2, i % 2].transAxes, fontsize=10)
    axs[i // 2, i % 2].text(0.45, 0.80, f"MSE: {results['MSE']:.4f}", ha='left', va='top', transform=axs[i // 2, i % 2].transAxes, fontsize=10)

# Add diagonal line
for ax in axs.flat:  # Iterate through flattened subplots
    xlims = ax.get_xlim()
    ylims = ax.get_ylim()
    xmin, xmax = min(xlims), max(xlims)
    ymin, ymax = min(ylims), max(ylims)
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)
    ax.plot(xlims, ylims, 'k--', alpha=0.75, zorder=0)

plt.tight_layout()
plt.show()

In [34]:
# # Create the models folder if it doesn't exist
# model_folder = "models"
# if not os.path.exists(model_folder):
#     os.makedirs(model_folder)

# # Load saved water content models dynamically
# for model_name in [os.path.join(model_folder, f"{model.__class__.__name__}_model_water_content.pkl") for model in models]:
#     model = joblib.load(model_name)
#     mse, r2, y_pred = train_and_evaluate(model, X_train_water_content, y_train_water_content, X_test_water_content, y_test_water_content)
#     results_water_content[str(model)] = {'MSE': mse, 'R2': r2}

# # Load saved water potential models dynamically
# for model_name in [os.path.join(model_folder, f"{model.__class__.__name__}_model_water_potential.pkl") for model in models]:
#     model = joblib.load(model_name)
#     mse, r2, y_pred = train_and_evaluate(model, X_train_water_potential, y_train_water_potential, X_test_water_potential, y_test_water_potential)
#     results_water_content[str(model)] = {'MSE': mse, 'R2': r2}