In [82]:
# notebooks/train_ultimate_champion.ipynb

import sys
import os
sys.path.append('../')

In [83]:
from simulation.farm_env import FarmEnv
from simulation.env_wrapper import MultiAgentActionWrapper
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback

In [84]:
#### Configuration: UNCOMMENT THE CROP YOU WANT TO TRAIN ---

# === Option 1: WHEAT in Punjab ===
CROP_TYPE = "Wheat"
LATITUDE = 30.9010 # Ludhiana, Punjab
LONGITUDE = 75.8573
TRAINING_STEPS = 1000000

# === Option 2: RICE in Bihar ===
# CROP_TYPE = "Rice"
# LATITUDE = 24.7500 # Aurangabad, Bihar
# LONGITUDE = 84.3700
# TRAINING_STEPS = 3000000

# === Option 3: RICE in West Bengal ===
# CROP_TYPE = "Rice"
# LATITUDE = 22.5726 # Kolkata, West Bengal
# LONGITUDE = 88.3639
# TRAINING_STEPS = 3000000

# === Option 4: SUGARCANE in Uttar Pradesh ===
# CROP_TYPE = "Sugarcane"
# LATITUDE = 26.8467 # Lucknow, UP
# LONGITUDE = 80.9462
# TRAINING_STEPS = 5000000 # Sugarcane has a much longer season, needs more training

In [85]:
#### Dynamic Naming based on selected crop ---
MODEL_NAME = f"one_million_ppo_ULTIMATE_CHAMPION_{CROP_TYPE.upper()}.zip"
models_dir = "../models"
logs_dir = f"../logs/one_million_ultimate_champion_{CROP_TYPE.lower()}_logs/"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)


# --- 2. Environment Setup ---
print(f"--- Creating ULTIMATE environment for CROP: {CROP_TYPE} at LOC: ({LATITUDE}, {LONGITUDE}) ---")

--- Creating ULTIMATE environment for CROP: Wheat at LOC: (30.901, 75.8573) ---


In [86]:
# Define a function to create our most advanced environment
def make_env():
    env = FarmEnv(
        crop_type=CROP_TYPE,
        latitude=LATITUDE,
        longitude=LONGITUDE
    ) 
    env = MultiAgentActionWrapper(env)
    return env

# Create the wrapped environment
wrapped_env = make_env()
print("Environment ready.")

--- Loading ADVANCED Weather Forecaster Ensemble for LUDHIANA ---
Loaded advanced model 1/5 for ludhiana successfully.
Loaded advanced model 2/5 for ludhiana successfully.
Loaded advanced model 3/5 for ludhiana successfully.
Loaded advanced model 4/5 for ludhiana successfully.
Loaded advanced model 5/5 for ludhiana successfully.
Advanced stochastic weather forecaster for LUDHIANA loaded and ready.
Fetching ADVANCED weather data for Lat: 30.901, Long: 75.8573 from Open-Meteo...
Successfully fetched and processed advanced weather data.
Environment ready.


In [87]:
#### 3. Model Definition with a More Powerful Brain ---

# A large neural network to handle the 16 complex observations
policy_kwargs = dict(net_arch=dict(pi=[256, 256, 128], vf=[256, 256, 128]))

model = PPO(
    "MlpPolicy",
    wrapped_env,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    learning_rate=0.0001,
    n_steps=4096,
    batch_size=128,
    gamma=0.995, # Higher gamma for very long-term rewards
    gae_lambda=0.95
)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [88]:
#### 4. Training ---

# Set up a callback to save the model periodically during the long run
checkpoint_callback = CheckpointCallback(
  save_freq=100000,
  save_path=os.path.join(logs_dir, 'checkpoints'),
  name_prefix=f"{CROP_TYPE.lower()}_champion"
)

In [89]:
print(f"\n--- Starting ULTIMATE CHAMPION training for {CROP_TYPE} ---")
print(f"Total timesteps: {TRAINING_STEPS}")
print(f"To view live training graphs, open a new terminal, activate your environment,")
print(f"navigate to the project folder, and run:")
print(f"tensorboard --logdir {os.path.abspath(logs_dir)}")


--- Starting ULTIMATE CHAMPION training for Wheat ---
Total timesteps: 1000000
To view live training graphs, open a new terminal, activate your environment,
navigate to the project folder, and run:
tensorboard --logdir /Users/aadarshraj/Documents/ai_project/digital_twin_farm/logs/one_million_ultimate_champion_wheat_logs


In [90]:
# Start the training process
model.learn(
    total_timesteps=TRAINING_STEPS,
    tb_log_name=f"PPO_Ultimate_Champion_{CROP_TYPE}",
    callback=checkpoint_callback
)


# --- 5. Save the Final Model ---
final_model_path = os.path.join(models_dir, MODEL_NAME)
model.save(final_model_path)

print(f"\n--- ULTIMATE CHAMPION Training Complete! ---")
print(f"The definitive model for {CROP_TYPE} is saved at: {final_model_path}")

Logging to ../logs/one_million_ultimate_champion_wheat_logs/PPO_Ultimate_Champion_Wheat_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 90       |
|    ep_rew_mean     | 7.75     |
| time/              |          |
|    fps             | 89       |
|    iterations      | 1        |
|    time_elapsed    | 45       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 90          |
|    ep_rew_mean          | 8.46        |
| time/                   |             |
|    fps                  | 91          |
|    iterations           | 2           |
|    time_elapsed         | 89          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.011870669 |
|    clip_fraction        | 0.0863      |
|    clip_range           | 0.2         |
|    entropy_loss       