# aquacrop-gym: PPO example

This notebook will show the processed used to train a PPO agent to learn to make irrigation decisions within AquaCrop-OSPy



import libraries and functions

In [None]:
from aquacrop.classes import *
from aquacrop.core import *
 

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from aquacropgym.utils import calc_eto_faopm
from aquacropgym.envs import CropEnv, nebraska_maize_config
from aquacropgym.utils import evaluate_agent
from aquacropgym.utils import calc_eto_faopm

import copy

import ray

import LARS-WG generated weather data (100 years split into 70 train and 30 test)

calculate ET0 using FAO-PM

In [None]:
#CP  Lat.(deg)= 40.40  Long.(deg)=101.73  Elev.(m)=1072.
gendf=calc_eto_faopm('data/CPWG.dat',1995,40.4,1072,True,["simyear","jday","minTemp","maxTemp","precip","rad"])
gendf.head()

## Define crop simulation config options

oAll other env params can be seen in `aquacropgym/envs.py`

In [None]:
IRR_CAP=10_000 # max amount of irrigation (mm/ha) that can be applied in a single season
ACTION_SET='binary' # action sets, alternatives are: 'depth', 'binary', 'smt4'
DAYS_TO_IRR=7 # 'number of days between irrigation decisons (e.g., 1, 3, 5, 7)


In [None]:
envconfig=nebraska_maize_config.copy() # get default config dictionary
envconfig['gendf']=gendf # set weather data
envconfig['year2']=70 # end of the train/test split
envconfig['normalize_obs']=True # normalize input observation (with a pre calculated mean and standard deviation)
envconfig['include_rain']=True # include rainfall within weather data
envconfig['observation_set']='default' # set of variables to pass to agent
envconfig['max_irr']=25 # max irrigation that can be applied in a single irrigation event

envconfig['action_set']=ACTION_SET # action sets, alternatives are: 'depth', 'binary', 'smt4'
envconfig['days_to_irr']=DAYS_TO_IRR # 'number of days between irrigation decisons (e.g., 1, 3, 5, 7)
envconfig['max_irr_season']=IRR_CAP # max amount of irrigation (mm/ha) that can be applied in a single season


In [None]:
env=CropEnv(envconfig)

## initialize ray library

In [None]:
ray.shutdown()

In [None]:
ray.init(num_cpus=1,num_gpus=0) # set number of cpus and gpus available

## set ppo params

In [None]:
from ray.rllib.agents.ppo import ppo

config = ppo.DEFAULT_CONFIG.copy()

config['num_workers'] = 1
config['num_gpus'] = 0
config['observation_filter'] = 'MeanStdFilter' # normalize observations
config['rollout_fragment_length'] = 160
config['train_batch_size'] = 512
config['model']['fcnet_hiddens'] = [64]*3
config['num_cpus_per_worker'] = 0 
config['framework'] = 'torch'
config['gamma'] = 1.
config['env_config']=envconfig
config['model']['vf_share_layers'] = False

## create ppo agent

In [None]:
agent = ppo.PPOTrainer(config, CropEnv)

## train and evaluate agent

In [None]:
proftrain=[]
proftest=[]
timesteps=[]
caps=[]

for i in range(1,500001):
    result = agent.train()

    ts = result['timesteps_total']

    if i%5==0: # evaluate agent on train and test years

        print('eval')
        for irr_cap in [IRR_CAP]:
            test_env_config=copy.deepcopy(envconfig) # make a copy of the training env
            test_env_config['evaluation_run']=True # sets env to evaluation mode

            train_rew, test_rew = evaluate_agent(agent,CropEnv,test_env_config) # evaluate agent
            
            proftrain.append(train_rew)
            proftest.append(test_rew)
            timesteps.append(ts)
            caps.append(irr_cap)

            print(irr_cap,f'Train:{round(train_rew,3)}')
            print(irr_cap,f'Test:{round(test_rew,3)}')


    if i%5==0: # save results
        checkpoint_path = agent.save()
        print(checkpoint_path)

        result_df = pd.DataFrame([timesteps,proftrain,proftest,caps]).T
        result_df.to_csv(f'outputs/neb_corn_ppo_day_{DAYS_TO_IRR}_act_{ACTION_SET}_cap_{IRR_CAP}.csv')
        plt.plot(timesteps,proftrain)
        plt.plot(timesteps,proftest)
        plt.show()