In [1]:
import os
import sys
sys.path.append("../")

In [2]:
from simulation.farm_env import FarmEnv
from simulation.env_wrapper import MultiAgentActionWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback

In [3]:
##### Configuration: SET THE CROP AND MODEL NAME HERE ---

# -- CROP TO TRAIN --
# Change this to "Rice" or "Sugarcane" to train those models.
CROP_TYPE = "Wheat" 
# Corresponding location for weather data
# For Rice: 22.5726 (Kolkata), For Sugarcane: 26.8467 (Lucknow)
LATITUDE = 30.9010 # Ludhiana for Wheat
LONGITUDE = 75.8573

In [4]:
# This is our most advanced environment. It requires a very long training run.
TRAINING_STEPS = 3000000 # 3 Million steps is a solid target
MODEL_NAME = f"ppo_{CROP_TYPE.upper()}_expert.zip"

In [5]:
# -- DIRECTORIES --
models_dir = "../models"
logs_dir = f"../logs/{CROP_TYPE.lower()}_champion_logs/"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)

In [6]:
print(f"--- Creating environment for CROP: {CROP_TYPE} ---")

--- Creating environment for CROP: Wheat ---


In [7]:
# Define a function to create the environment. This is good practice.
def make_env():
    # Pass the crop type to the environment constructor
    env = FarmEnv(crop_type=CROP_TYPE) 
    # The wrapper is essential to handle the multi-action space
    env = MultiAgentActionWrapper(env)
    return env


In [8]:
# Create the wrapped environment
wrapped_env = make_env()
print("Environment ready for training.")

Fetching real weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Sucessfully fetched and processed real weather data from Open-Meteo.
Environment ready for training.


In [9]:
#### Model Definition with a More Powerful Brain ---

# A larger neural network is needed for the complex observations
policy_kwargs = dict(net_arch=dict(pi=[256, 128], vf=[256, 128]))


In [10]:
# We will use advanced PPO parameters for more stable training on this complex task
model = PPO(
    "MlpPolicy",
    wrapped_env,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    learning_rate=0.0001,  # A smaller learning rate is often better for complex tasks
    n_steps=4096,          # Collect more experience before each update
    batch_size=128,        # Process updates in smaller batches
    gamma=0.99,            # Emphasize long-term rewards
    gae_lambda=0.95
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [11]:
####  Training

# Set up a callback to save the model periodically
checkpoint_callback = CheckpointCallback(
  save_freq=100000, # Save a checkpoint every 100,000 steps
  save_path=os.path.join(logs_dir, 'checkpoints'),
  name_prefix=f"{CROP_TYPE.lower()}_model"
)

In [12]:
print(f"\n--- Starting CHAMPION training for {CROP_TYPE} ---")
print(f"Total timesteps: {TRAINING_STEPS}")
print(f"To view live training graphs, open a new terminal and run:")
print(f"tensorboard --logdir {os.path.abspath(logs_dir)}")


--- Starting CHAMPION training for Wheat ---
Total timesteps: 3000000
To view live training graphs, open a new terminal and run:
tensorboard --logdir /Users/aadarshraj/Documents/ai_project/digital_twin_farm/logs/wheat_champion_logs


In [None]:
# Start the training process
model.learn(
    total_timesteps=TRAINING_STEPS,
    tb_log_name=f"PPO_Champion_{CROP_TYPE}",
    callback=checkpoint_callback
)


In [None]:
#### Save the Final Model ---

final_model_path = os.path.join(models_dir, MODEL_NAME)
model.save(final_model_path)

print(f"\n--- CHAMPION Training Complete! ---")
print(f"The definitive model for {CROP_TYPE} is saved at: {final_model_path}")