In [None]:
from aquacrop.utils import get_filepath, prepare_weather
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
import yaml

from evaluator import AquaCropEvaluator
from prescriptor import RNNPrescriptorFactory

In [None]:
weather_path = get_filepath("champion_climate.txt")
wdf = prepare_weather(weather_path)

In [None]:
wdf["Date"].min(), wdf["Date"].max()

In [None]:
nn = torch.nn.Sequential(
    torch.nn.Linear(4, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 5)
)
sum(p.numel() for p in nn.parameters())

In [None]:
rnn = torch.nn.RNN(4, 64, batch_first=True)
inp = torch.ones(3, 10, 4)

out, h = rnn(inp)
print(out.shape, h.shape)
print(out[0, -1, :])
print(h[0, 0, :])

## Examine Results of Experiment

In [None]:
results_df = pd.read_csv("results/rnn-seeded/results.csv")
baseline_df = pd.read_csv("baselines/one-season.csv")
results_df[results_df["gen"] == results_df["gen"].max()]

In [None]:
gen_df = results_df[results_df["gen"] == results_df["gen"].max()]
plt.scatter(gen_df["irrigation"], -1 * gen_df["yield"], label="evolution")
# plt.scatter(baseline_df["irrigation"], baseline_df["yield"], color='red', label="baseline")
plt.legend()
plt.xlabel("Avg Irrigation (mm)")
plt.ylabel("Avg Yield (tonnes/ha)")
plt.show()

plt.scatter(gen_df["mulch_pct"], -1 * gen_df["yield"], label="evolution")
plt.show()

In [None]:
plt.scatter(gen_df["irrigation"], gen_df["mulch_pct"], c=-1 * gen_df["yield"])
plt.title("Avg. Irrigation vs. Avg. Mulch colored by Avg. Yield")
plt.xlabel("Avg Irrigation (mm)")
plt.ylabel("Avg Mulch (%)")
plt.colorbar(label="Avg Yield (tonne/ha)")
plt.show()

In [None]:
dominant_cands = []
for _, row in gen_df.iterrows():
    for _, baseline_row in baseline_df.iterrows():
        if row["irrigation"] < baseline_row["irrigation"] and -1 * row["yield"] > baseline_row["yield"]:
            dominant_cands.append(row["cand_id"])
            break
print(dominant_cands)

## Examining the Results of Runs

In [None]:
with open("config.yml", "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)
config["n_jobs"] = 1

evaluator = AquaCropEvaluator(**config["eval_params"])
factory = RNNPrescriptorFactory()

population = factory.load_population("results/rnn-seeded/population")
cand_ids = gen_df["cand_id"].tolist()
results_dfs = {}
policies = {}
for cand_id in tqdm(cand_ids):
    cand = population[cand_id]
    policies[cand_id], results_dfs[cand_id] = evaluator.run_candidate(cand)

In [None]:
labels = ["lowest precip", "highest precip", "last year", "this year"]
fig, axes = plt.subplots(1, 4, figsize=(16, 4), sharey=True)
for context_idx, ax in enumerate(axes):
    ax.set_title(labels[context_idx])
    smts = np.zeros((len(policies), 5))
    for i, policy in enumerate(policies.values()):
        for j in range(5):
            smts[i, j] = policy[context_idx, j]
    ax.boxplot(smts, tick_labels=[f"SMT-{i+1}" for i in range(4)] + ["mulch"])
plt.show()

In [None]:
stdev_total = 0
n = 0
for cand_id in cand_ids:
    policy = policies[cand_id]
    stdev_total += np.std(policy, axis=0).mean()
    n += 1
print(stdev_total / n)


In [None]:
yields = np.zeros((len(cand_ids), 4))
irrs = np.zeros((len(cand_ids), 4))
mulches = np.zeros((len(cand_ids), 4))
for i, (cand_id, results_df) in enumerate(results_dfs.items()):
    for j in range(4):
        yields[i, j] = results_df.iloc[j]["yield"]
        irrs[i, j] = results_df.iloc[j]["irrigation"]
        mulches[i, j] = results_df.iloc[j]["mulch_pct"]

fig, axes = plt.subplots(1, 4, figsize=(16, 4), sharey=True)
for j in range(4):
    axes[j].scatter(mulches[:, j], irrs[:, j], c=yields[:, j])
    axes[j].set_xlabel("Mulch pct")
    axes[j].set_title(labels[j])
fig.supylabel("Irrigation (mm)")
plt.show()

In [None]:
fig, ax = plt.subplots()
for j in range(4):
    ax.scatter(irrs[:, j], yields[:, j], label=labels[j], alpha=0.5)
ax.set_title("Irrigation vs. Yield by Weather")
ax.set_xlabel("Total Irrigation (mm)")
ax.set_ylabel("Yield (tonnes/ha)")
ax.legend()
plt.show()

In [None]:
wdf = evaluator.weather_data
print(wdf.columns)
filtered = wdf[(wdf["Date"] >= pd.to_datetime(evaluator.sim_start_date)) & (wdf["Date"] <= pd.to_datetime(evaluator.sim_end_date))]
print(len(filtered))
print(len(results_df))

In [None]:
assert results_df["depths"].equals(results_df["IrrDay"])
assert results_df["DryYield"].max() == final_stats["Dry yield (tonne/ha)"].max()

In [None]:
print(results_df.columns)

In [None]:
plt.plot(results_df[results_df["depths"] > 0]["depths"])
plt.title("Irrigation Depth")
plt.xlabel("Days")
plt.ylabel("Depth (mm)")
plt.show()

## Examine Generated Data

In [None]:
data_df = pd.read_csv("data/one-season-data.csv")
# Filter out rows where every single column except "baseline" is zero
data_df = data_df[(data_df.drop(columns=["baseline"]) != 0).any(axis=1)]