In [1]:
# Import required libraries for data manipulation, visualization, and numerical operations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

## Plot Configuration
The following cell configures Matplotlib's plotting parameters to ensure consistent and professional-looking visualizations.

In [2]:
# Configure Matplotlib parameters for consistent plot styling
plt.rcParams.update({
    'font.size': 16,              # Set default font size
    'axes.labelsize': 16,         # Set axis label font size
    'xtick.labelsize': 16,        # Set x-tick label font size
    'ytick.labelsize': 16,        # Set y-tick label font size
    'axes.linewidth': 1.2,        # Set axis line width
    'xtick.major.width': 1.2,     # Set x-tick line width
    'ytick.major.width': 1.2,     # Set y-tick line width
    'savefig.format': 'png',      # Save figures in PNG format
    'savefig.bbox': 'tight',      # Use tight layout for saved figures
    'font.family': 'Times New Roman',  # Set font family
    'text.usetex': False          # Disable LaTeX rendering for text
})

## Data Loading
Load the dataset from a CSV file and display the first few rows to inspect its structure.

In [3]:
# Load the dataset from a CSV file
df = pd.read_csv('../dataset/data.csv')

# Convert 'Dates' to datetime format
df['DATE'] = pd.to_datetime(df['DATE'], format='%m/%d/%Y')

# Display the first 5 rows of the dataset
df.head()

Unnamed: 0,DATE,T,SP,SR,RH,WU,WV,TSM,CHL,CDOM,CDEC,EV
0,2016-06-24,26.248143,96821.02653,31562901.28,40.56,0.194397,0.860564,-0.710855,-1.107892,-1.451557,6.452353,8.42054
1,2016-07-14,27.984237,96651.16619,31276649.52,22.75,-0.011315,0.31981,-0.506057,-0.952389,-1.046996,8.165377,8.070396
2,2016-08-03,27.073733,96595.32039,29337726.72,15.12,-0.050064,0.459498,0.168353,-0.269065,0.236308,8.37357,7.883678
3,2016-08-23,24.41964,96680.79519,25496985.6,23.69,-0.560787,1.642996,0.755064,0.640315,1.284765,5.978892,6.562248
4,2016-09-12,20.526815,96353.97802,22353904.56,42.12,-0.93196,2.114761,0.93173,0.836024,1.706997,6.797793,5.121693


## Metrics Definition
Define evaluation metrics for model performance.

In [4]:
# Import metrics for evaluation
from sklearn.metrics import r2_score, root_mean_squared_error

# Define Mean Bias Error (MBE) function
def mbe(y_true, y_pred):
    return np.mean(y_pred - y_true)

## Data Extraction
Extract CDEC and ERA5 evapotranspiration (EV) data for analysis.

In [5]:
# Extract CDEC and ERA5 evapotranspiration data
cdec = df["CDEC"]
era = df["EV"]

## Scatter Plot with KDE
Create a scatter plot with kernel density estimation (KDE) to compare CDEC and ERA5 evapotranspiration values, including performance metrics.

In [6]:
# Create scatter plot with KDE
plt.figure(figsize=(7, 6), dpi=600)
sns.kdeplot(x=cdec, y=era, levels=10, cmap='CMRmap_r', alpha=0.6, fill=True)  # Plot KDE
plt.scatter(cdec, era, color='deepskyblue', s=60, alpha=0.8, edgecolor='black')  # Scatter points
plt.plot([-1, 12], [-1, 12], 'r--')  # Add 1:1 line
plt.xlabel("EV$_{CDEC}$ (mm/day)")  # X-axis label
plt.ylabel("EV$_{ERA5}$ (mm/day)")  # Y-axis label
plt.ylim(-1.00, 12)  # Set y-axis limits
plt.yticks(np.arange(-0.00, 12, 1))  # Set y-axis ticks
plt.xlim(-1.00, 12)  # Set x-axis limits
plt.xticks(np.arange(-0.00, 12, 1))  # Set x-axis ticks

# Add performance metrics text
plt.text(
    7, 0.5,
    f"RMSE: {root_mean_squared_error(cdec, era):.2f} (mm/day)\nRÂ²: {r2_score(cdec, era):.2f}\nBias: {mbe(cdec, era):.2f} (mm/day)",
    fontsize=16, color='black')

# Save plot as PNG
plt.savefig(f"../plots/validation-scatter.png", dpi=600, bbox_inches='tight')
plt.close()  # Close plot to free memory
# plt.show()

## Time Series Plot
Generate a time series plot to visualize CDEC and ERA5 evapotranspiration over time, with yearly x-axis ticks.

In [7]:
# Create time series plot
plt.figure(figsize=(7, 6), dpi=600)
plt.plot(df["DATE"], cdec, color="crimson", label="CDEC", lw=2.5)  # Plot CDEC data
plt.plot(df["DATE"], era, color="k", label="ERA5", lw=2.5)  # Plot ERA5 data
plt.xlabel("Date")  # X-axis label
plt.ylabel("EV (mm/day)")  # Y-axis label
plt.legend(loc="upper left")  # Add legend
plt.ylim(-0.00, 14)  # Set y-axis limits

# Set x-axis to show only years
plt.gca().xaxis.set_major_locator(mdates.YearLocator())  # Set ticks at year start
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format ticks as years

# Save plot as PNG
plt.savefig(f"../plots/validation-signal.png", dpi=600, bbox_inches='tight')
plt.close()  # Close plot to free memory
# plt.show()