In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from esda.moran import Moran
from libpysal.weights import Queen

# Load dataset
gdf = gpd.read_file("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")

# --- Basic ET histograms ---
et_cols = ["eem_eet", "sse_eet", "ens_eet"]
plt.figure(figsize=(10, 6))
for col in et_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Evapotranspiration (inches or mm, depending on dataset units)")
plt.ylabel("Density")
plt.title("Distribution of ET Estimates by Model")
plt.legend()
plt.tight_layout()
plt.show()

# --- Residual histograms ---
res_cols = ["eem_residual", "sse_residual", "ens_residual"]
plt.figure(figsize=(10, 6))
for col in res_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Residual (Diversion - ET)")
plt.ylabel("Density")
plt.title("Distribution of Residuals Across ET Models")
plt.legend()
plt.tight_layout()
plt.show()

# --- Correlation heatmap ---
corr_cols = ["eem_eet", "sse_eet", "ens_eet", "eem_residual", "sse_residual", "ens_residual"]
corr = gdf[corr_cols].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between ET Estimates and Residuals")
plt.tight_layout()
plt.show()

# --- Spatial autocorrelation (Moran's I) ---
w = Queen.from_dataframe(gdf)
moran = Moran(gdf["ens_residual"].fillna(0), w)

moran_stats = {
    "Moran_I": moran.I,
    "Expected_I": moran.EI,
    "p_value": moran.p_sim,
    "z_score": moran.z_sim
}
moran_stats


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from esda.moran import Moran
from libpysal.weights import Queen

# Load dataset
gdf = gpd.read_file("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")

# --- Basic ET histograms ---
et_cols = ["eem_eet", "sse_eet", "ens_eet"]
plt.figure(figsize=(10, 6))
for col in et_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Evapotranspiration (inches or mm, depending on dataset units)")
plt.ylabel("Density")
plt.title("Distribution of ET Estimates by Model")
plt.legend()
plt.tight_layout()
plt.show()

# --- Residual histograms ---
res_cols = ["eem_residual", "sse_residual", "ens_residual"]
plt.figure(figsize=(10, 6))
for col in res_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Residual (Diversion - ET)")
plt.ylabel("Density")
plt.title("Distribution of Residuals Across ET Models")
plt.legend()
plt.tight_layout()
plt.show()

# --- Correlation heatmap ---
corr_cols = ["eem_eet", "sse_eet", "ens_eet", "eem_residual", "sse_residual", "ens_residual"]
corr = gdf[corr_cols].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between ET Estimates and Residuals")
plt.tight_layout()
plt.show()

# --- Spatial autocorrelation (Moran's I) ---
w = Queen.from_dataframe(gdf)
moran = Moran(gdf["ens_residual"].fillna(0), w)

moran_stats = {
    "Moran_I": moran.I,
    "Expected_I": moran.EI,
    "p_value": moran.p_sim,
    "z_score": moran.z_sim
}
moran_stats


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
from esda.moran import Moran
from libpysal.weights import Queen

# Load dataset
gdf = gpd.read_file("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")

# --- Basic ET histograms ---
et_cols = ["eem_eet", "sse_eet", "ens_eet"]
plt.figure(figsize=(10, 6))
for col in et_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Evapotranspiration (inches or mm, depending on dataset units)")
plt.ylabel("Density")
plt.title("Distribution of ET Estimates by Model")
plt.legend()
plt.tight_layout()
plt.show()

# --- Residual histograms ---
res_cols = ["eem_residual", "sse_residual", "ens_residual"]
plt.figure(figsize=(10, 6))
for col in res_cols:
    sns.kdeplot(gdf[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Residual (Diversion - ET)")
plt.ylabel("Density")
plt.title("Distribution of Residuals Across ET Models")
plt.legend()
plt.tight_layout()
plt.show()

# --- Correlation heatmap ---
corr_cols = ["eem_eet", "sse_eet", "ens_eet", "eem_residual", "sse_residual", "ens_residual"]
corr = gdf[corr_cols].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between ET Estimates and Residuals")
plt.tight_layout()
plt.show()

# --- Spatial autocorrelation (Moran's I) ---
w = Queen.from_dataframe(gdf)
moran = Moran(gdf["ens_residual"].fillna(0), w)

moran_stats = {
    "Moran_I": moran.I,
    "Expected_I": moran.EI,
    "p_value": moran.p_sim,
    "z_score": moran.z_sim
}
moran_stats


In [None]:
# Clean the numeric columns before plotting — remove non-numeric or invalid entries

import sqlite3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Read GeoPackage as SQLite (ignore geometry for now)
conn = sqlite3.connect("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")
df = pd.read_sql_query("SELECT * FROM POU_w_data_all_irr_yr_w_err;", conn)

# Clean numeric columns: convert to float, coerce errors
cols_to_use = ["eem_eet", "sse_eet", "ens_eet", "eem_residual", "sse_residual", "ens_residual"]
for col in cols_to_use:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Drop NaN rows for analysis
df_clean = df[cols_to_use].dropna()

# --- ET histograms ---
plt.figure(figsize=(10, 6))
for col in ["eem_eet", "sse_eet", "ens_eet"]:
    sns.kdeplot(df_clean[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Evapotranspiration (ET units)")
plt.ylabel("Density")
plt.title("Distribution of ET Estimates by Model")
plt.legend()
plt.tight_layout()
plt.show()

# --- Residual histograms ---
plt.figure(figsize=(10, 6))
for col in ["eem_residual", "sse_residual", "ens_residual"]:
    sns.kdeplot(df_clean[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Residual (Diversion - ET)")
plt.ylabel("Density")
plt.title("Distribution of Residuals Across ET Models")
plt.legend()
plt.tight_layout()
plt.show()

# --- Correlation heatmap ---
corr = df_clean.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between ET Estimates and Residuals")
plt.tight_layout()
plt.show()

# Summary statistics
summary_stats = df_clean.describe().T
summary_stats


In [None]:
# Let's check available columns to find the actual names of ET and residual columns
import sqlite3
import pandas as pd

conn = sqlite3.connect("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")
cols = pd.read_sql_query("PRAGMA table_info(POU_w_data_all_irr_yr_w_err);", conn)
cols[["name", "type"]]


In [None]:
import sqlite3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset as a dataframe (ignore geometry)
conn = sqlite3.connect("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg")
df = pd.read_sql_query("SELECT * FROM POU_w_data_all_irr_yr_w_err;", conn)

# Correct column names based on inspection
et_cols = ["eem_eet", "sse_eet", "ens_eet"]
res_cols = ["eem_residu", "sse_residu", "ens_residu"]
cols_to_use = et_cols + res_cols

# Convert to numeric safely
for col in cols_to_use:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Drop missing
df_clean = df[cols_to_use].dropna()

# --- ET histograms ---
plt.figure(figsize=(10, 6))
for col in et_cols:
    sns.kdeplot(df_clean[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Evapotranspiration (ET units)")
plt.ylabel("Density")
plt.title("Distribution of ET Estimates by Model")
plt.legend()
plt.tight_layout()
plt.show()

# --- Residual histograms ---
plt.figure(figsize=(10, 6))
for col in res_cols:
    sns.kdeplot(df_clean[col], label=col, fill=True, alpha=0.4)
plt.xlabel("Residual (Diversion - ET)")
plt.ylabel("Density")
plt.title("Distribution of Residuals Across ET Models")
plt.legend()
plt.tight_layout()
plt.show()

# --- Correlation heatmap ---
corr = df_clean.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between ET Estimates and Residuals")
plt.tight_layout()
plt.show()

# --- Summary statistics ---
summary_stats = df_clean.describe().T
summary_stats["skewness"] = df_clean.skew()
summary_stats


In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the full GeoPackage (geometry included)
gdf = gpd.read_file("/mnt/data/POU_w_data_all_irr_yr_w_err.gpkg", layer="POU_w_data_all_irr_yr_w_err")

# Clean residual and soil columns
gdf["ens_residu"] = pd.to_numeric(gdf["ens_residu"], errors="coerce")
gdf["SoilClass"] = gdf["SoilClass"].astype(str)
gdf = gdf.dropna(subset=["ens_residu", "SoilClass", "geometry"])

# --- Residual Choropleth Map (linear scale) ---
fig, ax = plt.subplots(figsize=(10, 8))
gdf.plot(column="ens_residu", cmap="RdYlBu_r", legend=True, ax=ax, linewidth=0.1, edgecolor="gray")
ax.set_title("Spatial Distribution of Ensemble Residuals (Diversion - ET)\nLinear Scale", fontsize=14)
ax.set_axis_off()
plt.tight_layout()
plt.show()

# --- Boxplot of Residuals by SoilClass ---
plt.figure(figsize=(10, 6))
sns.boxplot(data=gdf, x="SoilClass", y="ens_residu", palette="coolwarm", showfliers=False)
plt.xlabel("Soil Class")
plt.ylabel("Ensemble Residual (Diversion - ET)")
plt.title("Residuals by Soil Class")
plt.tight_layout()
plt.show()

# Summary: median residual by soil class
soil_summary = gdf.groupby("SoilClass")["ens_residu"].describe().round(2)
soil_summary
