In [9]:
import sys
import os
sys.path.append('../')

In [10]:
from simulation.farm_env import FarmEnv
from simulation.env_wrapper import MultiAgentActionWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback

In [11]:
#### Configuration: SET THE CROP AND MODEL NAME HERE ---

# -- CROP TO TRAIN --
# Change this to "Rice" or "Sugarcane" to train those models.
CROP_TYPE = "Wheat" 
# Corresponding location for weather data
# For Rice: 22.5726 (Kolkata), For Sugarcane: 26.8467 (Lucknow)
LATITUDE = 30.9010 # Ludhiana for Wheat
LONGITUDE = 75.8573

In [12]:
# -- TRAINING PARAMETERS --
# This is our most complex agent. It needs a very long training run to learn
# how to use the new forecast information effectively.
TRAINING_STEPS = 1000000 # 3 Million steps is a good target
MODEL_NAME = f"ppo_PRESCIENT_{CROP_TYPE.upper()}_champion.zip"

In [13]:
# -- DIRECTORIES --
models_dir = "../models"
logs_dir = f"../logs/prescient_{CROP_TYPE.lower()}_logs/"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)

In [14]:
#### Environment Setup ---
print(f"--- Creating PRESCIENT environment for CROP: {CROP_TYPE} at LOC: ({LATITUDE}, {LONGITUDE}) ---")

# Define a function to create the environment.
def make_env():
    # Pass the crop type and location to the environment constructor
    env = FarmEnv(
        crop_type=CROP_TYPE,
        latitude=LATITUDE,
        longitude=LONGITUDE
    ) 
    # The wrapper is essential to handle the multi-action space
    env = MultiAgentActionWrapper(env)
    return env

--- Creating PRESCIENT environment for CROP: Wheat at LOC: (30.901, 75.8573) ---


In [15]:
# Create the wrapped environment
wrapped_env = make_env()
print("Environment ready for training.")

--- Loading Weather Forecaster Ensemble ---
Loaded model 1/5 successfully.
Loaded model 2/5 successfully.
Loaded model 3/5 successfully.
Loaded model 4/5 successfully.
Loaded model 5/5 successfully.
Stochastic weather forecaster loaded and ready.
Fetching real weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Sucessfully fetched and processed real weather data from Open-Meteo.
Environment ready for training.


In [16]:
#### Model Definition with a More Powerful Brain ---

# A larger neural network is needed for the new, larger observation space.
# We will use three hidden layers to process the complex forecast data.
policy_kwargs = dict(net_arch=dict(pi=[256, 256, 128], vf=[256, 256, 128]))

# We will use advanced PPO parameters for more stable training.
model = PPO(
    "MlpPolicy",
    wrapped_env,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    learning_rate=0.0001,
    n_steps=4096,
    batch_size=128,
    gamma=0.99,
    gae_lambda=0.95
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [17]:
## Training ---

# Set up a callback to save the model periodically
checkpoint_callback = CheckpointCallback(
  save_freq=100000, # Save a checkpoint every 100,000 steps
  save_path=os.path.join(logs_dir, 'checkpoints'),
  name_prefix=f"prescient_{CROP_TYPE.lower()}_model"
)


In [18]:
print(f"\n--- Starting PRESCIENT CHAMPION training for {CROP_TYPE} ---")
print(f"Total timesteps: {TRAINING_STEPS}")
print(f"To view live training graphs, open a new terminal, activate your environment,")
print(f"navigate to the project folder, and run:")
print(f"tensorboard --logdir {os.path.abspath(logs_dir)}")


--- Starting PRESCIENT CHAMPION training for Wheat ---
Total timesteps: 1000000
To view live training graphs, open a new terminal, activate your environment,
navigate to the project folder, and run:
tensorboard --logdir /Users/aadarshraj/Documents/ai_project/digital_twin_farm/logs/prescient_wheat_logs


In [19]:
# Start the training process
model.learn(
    total_timesteps=TRAINING_STEPS,
    tb_log_name=f"PPO_Prescient_Champion_{CROP_TYPE}",
    callback=checkpoint_callback
)

Logging to ../logs/prescient_wheat_logs/PPO_Prescient_Champion_Wheat_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 90       |
|    ep_rew_mean     | 9.71     |
| time/              |          |
|    fps             | 162      |
|    iterations      | 1        |
|    time_elapsed    | 25       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 90          |
|    ep_rew_mean          | 9.22        |
| time/                   |             |
|    fps                  | 163         |
|    iterations           | 2           |
|    time_elapsed         | 50          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.012576029 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.9        |
|

<stable_baselines3.ppo.ppo.PPO at 0x317ad1db0>

In [20]:
### Save the Final Model ---

final_model_path = os.path.join(models_dir, MODEL_NAME)
model.save(final_model_path)

print(f"\n--- PRESCIENT CHAMPION Training Complete! ---")
print(f"The definitive model for {CROP_TYPE} is saved at: {final_model_path}")


--- PRESCIENT CHAMPION Training Complete! ---
The definitive model for Wheat is saved at: ../models/ppo_PRESCIENT_WHEAT_champion.zip


In [None]:
# notebooks/train_ultimate_champion.ipynb

import sys
import os
sys.path.append('../')

from simulation.farm_env import FarmEnv
from simulation.env_wrapper import MultiAgentActionWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import CheckpointCallback

# --- 1. Configuration: UNCOMMENT THE CROP YOU WANT TO TRAIN ---

# === Option 1: WHEAT in Punjab ===
# CROP_TYPE = "Wheat"
# LATITUDE = 30.9010 # Ludhiana, Punjab
# LONGITUDE = 75.8573
# TRAINING_STEPS = 5_000_000 # More steps recommended for this advanced setup

# === Option 2: RICE in Bihar ===
CROP_TYPE = "Rice"
LATITUDE = 24.7500 # Aurangabad, Bihar
LONGITUDE = 84.3700
TRAINING_STEPS = 5_000_000

# === Option 3: RICE in West Bengal ===
# CROP_TYPE = "Rice"
# LATITUDE = 22.5726 # Kolkata, West Bengal
# LONGITUDE = 88.3639
# TRAINING_STEPS = 5_000_000

# === Option 4: SUGARCANE in Uttar Pradesh ===
# CROP_TYPE = "Sugarcane"
# LATITUDE = 26.8467 # Lucknow, UP
# LONGITUDE = 80.9462
# TRAINING_STEPS = 7_000_000 # Longer season, more training

# ----------------------------------------------------------------

# --- Dynamic Naming based on selected crop ---
MODEL_NAME = f"ppo_ULTIMATE_CHAMPION_{CROP_TYPE.upper()}.zip"
models_dir = "../models"
logs_dir = f"../logs/ultimate_champion_{CROP_TYPE.lower()}_logs/"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)


# --- 2. Environment Setup with POWER-UP: Vectorization ---
print(f"--- Creating ULTIMATE environment for CROP: {CROP_TYPE} at LOC: ({LATITUDE}, {LONGITUDE}) ---")

# Define the environment creation function with our parameters
env_kwargs = dict(crop_type=CROP_TYPE, latitude=LATITUDE, longitude=LONGITUDE)
# Create 8 parallel environments to accelerate training
num_cpu = 8 
vec_env = make_vec_env(
    FarmEnv, 
    n_envs=num_cpu, 
    env_kwargs=env_kwargs, 
    wrapper_class=MultiAgentActionWrapper
)
print(f"Created {num_cpu} parallel environments for training.")


# --- 3. Model Definition with a More Powerful Brain & POWER-UPs ---

# A large neural network for the complex observations
policy_kwargs = dict(net_arch=dict(pi=[256, 256, 128], vf=[256, 256, 128]))

# POWER-UP: Adaptive Learning Rate
def linear_schedule(initial_value: float):
    """Linear learning rate schedule."""
    def func(progress_remaining: float) -> float:
        """Progress_remaining goes from 1.0 to 0.0 over the course of training."""
        return progress_remaining * initial_value
    return func

# POWER-UP: Tuned Hyperparameters for a complex task
model = PPO(
    "MlpPolicy",
    vec_env, # Use the vectorized environment
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    learning_rate=linear_schedule(0.0003), # Start at 0.0003 and decrease to 0
    n_steps=2048, # More experience collection before update
    batch_size=64,
    n_epochs=10, # More optimization epochs per update
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01 # Encourage more exploration at the beginning
)

# --- 4. Training ---

# Set up a callback to save the model periodically
# The frequency is now based on total steps, not per-environment steps
checkpoint_callback = CheckpointCallback(
  save_freq=max(100000 // num_cpu, 1),
  save_path=os.path.join(logs_dir, 'checkpoints'),
  name_prefix=f"{CROP_TYPE.lower()}_champion"
)

print(f"\n--- Starting ULTIMATE CHAMPION training for {CROP_TYPE} ---")
print(f"Total timesteps: {TRAINING_STEPS}")
print(f"To view live training graphs, open a new terminal and run:")
print(f"tensorboard --logdir {os.path.abspath(logs_dir)}")

# Start the training process
# Note: total_timesteps is the overall total, it will be divided among the parallel envs
model.learn(
    total_timesteps=TRAINING_STEPS,
    tb_log_name=f"PPO_Ultimate_Champion_{CROP_TYPE}",
    callback=checkpoint_callback
)


# --- 5. Save the Final Model ---
final_model_path = os.path.join(models_dir, MODEL_NAME)
model.save(final_model_path)
# Close the vectorized environment
vec_env.close()

print(f"\n--- ULTIMATE CHAMPION Training Complete! ---")
print(f"The definitive model for {CROP_TYPE} is saved at: {final_model_path}")