# Phase 4 Validation: RL Reward Modeling

Tests all Phase 4 reward signals:
- Colonist rewards: gather, deposit, build, ship completion, traitor elimination
- Traitor rewards: sabotage, poison kills, ship incomplete, colony collapse
- Voting rewards: correct/incorrect votes
- Ship milestone bonuses
- Energy management rewards

In [1]:
import sys
import importlib

# Clear cached modules
modules_to_clear = [m for m in list(sys.modules.keys()) if 'marooned' in m or m in ['environment', 'config', 'models', 'game_state', 'view_map']]
for module in modules_to_clear:
    if module in sys.modules:
        del sys.modules[module]

sys.path.insert(0, '../marooned_env')

from environment import MaroonedEnv
from config import (
    ActionType, ResourceType, MapLevel, ShipComponent,
    REWARD_COLONIST_GATHER_RESOURCE,
    REWARD_COLONIST_DEPOSIT_RESOURCE,
    REWARD_COLONIST_BUILD_CONTRIBUTE,
    REWARD_SHIP_MILESTONE_25,
    REWARD_TRAITOR_SABOTAGE_SUCCESS,
    REWARD_TRAITOR_POISON_DEATH,
)
from models import Action, Position

print("✅ Modules loaded")

✅ Modules loaded


In [2]:
# Initialize environment
env = MaroonedEnv(seed=42)
observations = env.reset()

# Find roles
traitor = None
colonists = []
for sailor_id, sailor in env.state.sailors.items():
    if env.state.is_traitor(sailor_id):
        traitor = sailor_id
    else:
        colonists.append(sailor_id)

print(f"Traitor: {traitor}")
print(f"Colonists: {colonists}")
print(f"\nReward tracking initialized: {hasattr(env, 'action_rewards')}")
print(f"Ship milestones tracker: {env.ship_milestones_reached}")

Traitor: Alice
Colonists: ['Bob', 'Charlie', 'Diana', 'Eve']

Reward tracking initialized: True
Ship milestones tracker: set()


## TEST 1: Colonist Gather Resource Reward

In [3]:
colonist = colonists[0]
sailor = env.state.sailors[colonist]

# Find nearest resource (any level, but prefer same level)
resources = []
for rid, r in env.state.world_map.resources.items():
    if not r.gathered and r.resource_type in [ResourceType.WOOD, ResourceType.METAL]:
        # Calculate distance (prefer same level)
        distance = r.position.distance_to(sailor.position)
        if r.position.level != sailor.position.level:
            distance += 100  # Penalty for different levels
        resources.append((rid, r, distance))

if not resources:
    print("❌ FAIL: No resources found on map")
    print("This should not happen - check map generation")
else:
    resources.sort(key=lambda x: x[2])
    res_id, resource, _ = resources[0]
    
    print(f"Target: {resource.resource_type.value} at {resource.position.to_tuple()}")
    
    # Navigate to resource (with obstacle avoidance)
    for attempt in range(50):
        if sailor.position == resource.position:
            break
        
        dx = resource.position.x - sailor.position.x
        dy = resource.position.y - sailor.position.y
        
        if abs(dx) > abs(dy) and dx != 0:
            action_type = ActionType.MOVE_EAST if dx > 0 else ActionType.MOVE_WEST
        elif dy != 0:
            action_type = ActionType.MOVE_SOUTH if dy > 0 else ActionType.MOVE_NORTH
        else:
            break
        
        action = Action(sailor_id=colonist, action_type=action_type)
        obs, _, _, _, info = env.step({colonist: action})
        
        if not info[colonist].get('success'):
            # Try perpendicular direction
            if abs(dy) > abs(dx) and dy != 0:
                action_type = ActionType.MOVE_SOUTH if dy > 0 else ActionType.MOVE_NORTH
            elif dx != 0:
                action_type = ActionType.MOVE_EAST if dx > 0 else ActionType.MOVE_WEST
            else:
                break
            action = Action(sailor_id=colonist, action_type=action_type)
            env.step({colonist: action})
    
    if sailor.position != resource.position:
        print(f"⚠️ Could not reach resource (stopped at {sailor.position.to_tuple()})")
    else:
        # Gather resource
        action = Action(sailor_id=colonist, action_type=ActionType.GATHER_RESOURCE, target_resource_id=res_id)
        obs, rewards, done, truncated, info = env.step({colonist: action})
        
        print(f"Gathered: {info[colonist].get('resource_type')} x{info[colonist].get('quantity')}")
        print(f"Reward signal set: {env.action_rewards[colonist].get('gathered_resource')}")
        print(f"Reward received: {rewards[colonist]:.3f}")
        print(f"Expected gather bonus: {REWARD_COLONIST_GATHER_RESOURCE}")
        print(f"✅ PASS" if env.action_rewards[colonist].get('gathered_resource') else "❌ FAIL")

Target: wood at (16, 16, <MapLevel.GROUND: 0>)
Gathered: wood x1
Reward signal set: True
Reward received: 0.140
Expected gather bonus: 0.1
✅ PASS


## TEST 2: Colonist Deposit Resource Reward

In [4]:
# Check if we have items to deposit
has_items = any(item.quantity > 0 for item in sailor.backpack)

if not has_items:
    print("❌ FAIL: No items in backpack to deposit (gather test may have failed)")
else:
    # Navigate to base camp
    base_pos = Position(15, 15, MapLevel.GROUND)
    for attempt in range(50):
        if sailor.position == base_pos:
            break
        
        dx = base_pos.x - sailor.position.x
        dy = base_pos.y - sailor.position.y
        
        if abs(dx) > abs(dy) and dx != 0:
            action_type = ActionType.MOVE_EAST if dx > 0 else ActionType.MOVE_WEST
        elif dy != 0:
            action_type = ActionType.MOVE_SOUTH if dy > 0 else ActionType.MOVE_NORTH
        else:
            break
        
        action = Action(sailor_id=colonist, action_type=action_type)
        obs, _, _, _, info = env.step({colonist: action})
        
        if not info[colonist].get('success'):
            # Try perpendicular
            if abs(dy) > abs(dx) and dy != 0:
                action_type = ActionType.MOVE_SOUTH if dy > 0 else ActionType.MOVE_NORTH
            elif dx != 0:
                action_type = ActionType.MOVE_EAST if dx > 0 else ActionType.MOVE_WEST
            else:
                break
            action = Action(sailor_id=colonist, action_type=action_type)
            env.step({colonist: action})
    
    # Deposit
    item = next((i for i in sailor.backpack if i.quantity > 0), None)
    if item:
        action = Action(sailor_id=colonist, action_type=ActionType.DEPOSIT_ITEM, 
                       resource_type=item.resource_type, quantity=item.quantity)
        obs, rewards, done, truncated, info = env.step({colonist: action})
        
        print(f"Deposited: {item.resource_type.value} x{item.quantity}")
        print(f"Reward signal set: {env.action_rewards[colonist].get('deposited_resource')}")
        print(f"Reward received: {rewards[colonist]:.3f}")
        print(f"Expected deposit bonus: {REWARD_COLONIST_DEPOSIT_RESOURCE}")
        print(f"✅ PASS" if env.action_rewards[colonist].get('deposited_resource') else "❌ FAIL")

Deposited: wood x0
Reward signal set: True
Reward received: 0.240
Expected deposit bonus: 0.2
✅ PASS


## TEST 3: Colonist Build Ship Reward

In [5]:
# Add sufficient resources for building
env.state.add_to_common_inventory(ResourceType.WOOD, 100)
env.state.add_to_common_inventory(ResourceType.METAL, 50)

# Build ship
action = Action(sailor_id=colonist, action_type=ActionType.BUILD_SHIP)
obs, rewards, done, truncated, info = env.step({colonist: action})

print(f"Build success: {info[colonist].get('success')}")
print(f"Component built: {info[colonist].get('component')}")
print(f"Ship progress: {env.state.ship_progress.total_percentage}%")
print(f"Reward signal set: {env.action_rewards[colonist].get('built_ship')}")
print(f"Reward received: {rewards[colonist]:.3f}")
print(f"Expected build bonus: {REWARD_COLONIST_BUILD_CONTRIBUTE}")
print(f"✅ PASS" if env.action_rewards[colonist].get('built_ship') else "❌ FAIL")

Build success: True
Component built: hull
Ship progress: 30%
Reward signal set: True
Reward received: 5.540
Expected build bonus: 0.5
✅ PASS


## TEST 4: Ship Milestone Rewards (25%, 50%, 75%)

In [6]:
# Build to 25% milestone
env.state.add_to_common_inventory(ResourceType.WOOD, 200)
env.state.add_to_common_inventory(ResourceType.METAL, 100)
env.state.add_to_common_inventory(ResourceType.PLANT_FIBER, 100)

milestones_hit = []
for i in range(10):
    prev_progress = env.state.ship_progress.total_percentage
    action = Action(sailor_id=colonist, action_type=ActionType.BUILD_SHIP)
    obs, rewards, done, truncated, info = env.step({colonist: action})
    
    if info[colonist].get('success'):
        curr_progress = env.state.ship_progress.total_percentage
        
        # Check for milestone crossings
        for milestone in [25, 50, 75]:
            if prev_progress < milestone <= curr_progress and milestone not in milestones_hit:
                milestones_hit.append(milestone)
                print(f"Milestone {milestone}% reached! Reward: {rewards[colonist]:.3f}")

print(f"\nMilestones reached: {sorted(milestones_hit)}")
print(f"Tracked milestones: {sorted(env.ship_milestones_reached)}")
print(f"Final ship progress: {env.state.ship_progress.total_percentage}%")
print(f"✅ PASS" if len(milestones_hit) > 0 else "❌ FAIL")

Milestone 50% reached! Reward: 10.540
Milestone 75% reached! Reward: 15.540

Milestones reached: [50, 75]
Tracked milestones: [25, 50, 75]
Final ship progress: 90%
✅ PASS


## TEST 5: Traitor Sabotage Reward

In [7]:
# Setup: Build a component to sabotage
from models import ShipComponentProgress
env.state.ship_progress.components[ShipComponent.HULL] = ShipComponentProgress(
    component=ShipComponent.HULL, progress_percentage=30, completed=True
)
env.state.ship_progress.recalculate_total()
prev_progress = env.state.ship_progress.total_percentage

# Sabotage
action = Action(sailor_id=traitor, action_type=ActionType.SABOTAGE_SHIP, 
               ship_component=ShipComponent.HULL)
obs, rewards, done, truncated, info = env.step({traitor: action})

print(f"Sabotage success: {info[traitor].get('success')}")
print(f"Damage dealt: {info[traitor].get('damage')}%")
print(f"Reward signal set: {env.action_rewards[traitor].get('sabotaged')}")
print(f"Reward received: {rewards[traitor]:.3f}")
print(f"Expected sabotage bonus: {REWARD_TRAITOR_SABOTAGE_SUCCESS}")
print(f"✅ PASS" if env.action_rewards[traitor].get('sabotaged') else "❌ FAIL")

Sabotage success: True
Damage dealt: 34%
Reward signal set: True
Reward received: 1.990
Expected sabotage bonus: 2.0
✅ PASS


## TEST 6: Traitor Poison Kill Reward

In [8]:
# Setup: Poison a colonist and advance to death
victim = colonists[0]
victim_sailor = env.state.sailors[victim]

# Mark as poisoned (simulate poison offering)
victim_sailor.poisoned_on_day = env.state.current_day
victim_sailor.poisoned_by = traitor

print(f"Victim poisoned on day {env.state.current_day}")

# Advance to death day (3 days later)
from config import POISON_DEATH_DAY
for day in range(POISON_DEATH_DAY + 1):
    for turn in range(100):
        env.step({traitor: Action(sailor_id=traitor, action_type=ActionType.WAIT)})

print(f"Current day: {env.state.current_day}")
print(f"Victim alive: {victim_sailor.alive}")
print(f"Reward signal set: {env.action_rewards.get(traitor, {}).get('poison_kill')}")
print(f"Expected poison kill bonus: {REWARD_TRAITOR_POISON_DEATH}")
print(f"✅ PASS" if not victim_sailor.alive else "❌ FAIL")

Victim poisoned on day 1
Current day: 5
Victim alive: False
Reward signal set: None
Expected poison kill bonus: 10.0
✅ PASS


## TEST 7: Voting Rewards (Correct vs Incorrect)

In [9]:
# Reset for voting test
env = MaroonedEnv(seed=100)
observations = env.reset()

traitor = [sid for sid in env.state.sailors.keys() if env.state.is_traitor(sid)][0]
colonists = [sid for sid in env.state.sailors.keys() if not env.state.is_traitor(sid)]

# Advance to discussion phase
while env.state.current_phase != 'discussion':
    env.step({colonists[0]: Action(sailor_id=colonists[0], action_type=ActionType.WAIT)})

# Call vote
env.step({colonists[0]: Action(sailor_id=colonists[0], action_type=ActionType.CALL_VOTE)})

# Everyone votes for traitor
for sid in env.state.sailors.keys():
    if env.state.sailors[sid].alive:
        action = Action(sailor_id=sid, action_type=ActionType.VOTE, vote_target=traitor)
        obs, rewards, done, truncated, info = env.step({sid: action})

print(f"Vote completed: {info[colonists[0]].get('eliminated')}")
print(f"Was traitor: {info[colonists[0]].get('was_traitor')}")

# Check voting rewards
for sid in colonists[:2]:
    voted_correctly = env.action_rewards.get(sid, {}).get('voted_correctly', False)
    print(f"{sid} voted correctly: {voted_correctly}")

print(f"✅ PASS" if any(env.action_rewards.get(sid, {}).get('voted_correctly') for sid in colonists) else "❌ FAIL")

Vote completed: None
Was traitor: None
Alice voted correctly: True
Bob voted correctly: True
✅ PASS


## TEST 8: Energy Management Rewards

In [10]:
env = MaroonedEnv(seed=200)
observations = env.reset()
colonist = [sid for sid in env.state.sailors.keys() if not env.state.is_traitor(sid)][0]
sailor = env.state.sailors[colonist]

# Test high energy (>50) - should get small bonus
sailor.energy = 80
action = Action(sailor_id=colonist, action_type=ActionType.WAIT)
obs, rewards, done, truncated, info = env.step({colonist: action})
high_energy_reward = rewards[colonist]

# Test low energy (<20) - should get penalty
sailor.energy = 15
action = Action(sailor_id=colonist, action_type=ActionType.WAIT)
obs, rewards, done, truncated, info = env.step({colonist: action})
low_energy_reward = rewards[colonist]

print(f"High energy (80) reward: {high_energy_reward:.3f}")
print(f"Low energy (15) reward: {low_energy_reward:.3f}")
print(f"Penalty difference: {low_energy_reward - high_energy_reward:.3f}")
print(f"✅ PASS" if low_energy_reward < high_energy_reward else "❌ FAIL")

High energy (80) reward: 0.040
Low energy (15) reward: -0.510
Penalty difference: -0.550
✅ PASS


## TEST 9: Ship Completion Win Reward

In [11]:
env = MaroonedEnv(seed=300)
observations = env.reset()
colonist = [sid for sid in env.state.sailors.keys() if not env.state.is_traitor(sid)][0]

# Set ship to 100%
env.state.ship_progress.total_percentage = 100
env.state.game_over = True

# Take step to trigger reward
action = Action(sailor_id=colonist, action_type=ActionType.WAIT)
obs, rewards, done, truncated, info = env.step({colonist: action})

print(f"Ship at 100%: {env.state.ship_progress.total_percentage}%")
print(f"Game over: {done}")
print(f"Colonist reward: {rewards[colonist]:.3f}")
from config import REWARD_COLONIST_SHIP_COMPLETE
print(f"Expected bonus: {REWARD_COLONIST_SHIP_COMPLETE}")
print(f"✅ PASS" if rewards[colonist] > 50 else "❌ FAIL")

Ship at 100%: 100%
Game over: {'Alice': True, 'Bob': True, 'Charlie': True, 'Diana': True, 'Eve': True}
Colonist reward: 330.040
Expected bonus: 100.0
✅ PASS


## TEST 10: Traitor Victory (Time Expiry) Reward

In [12]:
env = MaroonedEnv(seed=400)
observations = env.reset()
traitor = [sid for sid in env.state.sailors.keys() if env.state.is_traitor(sid)][0]

# Set to day 100 with incomplete ship
env.state.current_day = 100
env.state.ship_progress.total_percentage = 50
env.state.game_over = True

# Take step to trigger reward
action = Action(sailor_id=traitor, action_type=ActionType.WAIT)
obs, rewards, done, truncated, info = env.step({traitor: action})

print(f"Day 100 reached: {env.state.current_day == 100}")
print(f"Ship incomplete: {env.state.ship_progress.total_percentage}%")
print(f"Game over: {done}")
print(f"Traitor reward: {rewards[traitor]:.3f}")
from config import REWARD_TRAITOR_SHIP_INCOMPLETE
print(f"Expected bonus: {REWARD_TRAITOR_SHIP_INCOMPLETE}")
print(f"✅ PASS" if rewards[traitor] > 50 else "❌ FAIL")

Day 100 reached: True
Ship incomplete: 50%
Game over: {'Alice': True, 'Bob': True, 'Charlie': True, 'Diana': True, 'Eve': True}
Traitor reward: 74.990
Expected bonus: 100.0
✅ PASS


## TEST 11: Comprehensive Reward Summary

In [13]:
from config import (
    REWARD_BASE_TURN_PENALTY,
    REWARD_COLONIST_GATHER_RESOURCE,
    REWARD_COLONIST_DEPOSIT_RESOURCE,
    REWARD_COLONIST_BUILD_CONTRIBUTE,
    REWARD_COLONIST_SHIP_COMPLETE,
    REWARD_COLONIST_TRAITOR_ELIMINATED,
    REWARD_COLONIST_DEATH,
    REWARD_TRAITOR_SABOTAGE_SUCCESS,
    REWARD_TRAITOR_POISON_DEATH,
    REWARD_TRAITOR_SHIP_INCOMPLETE,
    REWARD_SHIP_MILESTONE_25,
    REWARD_SHIP_MILESTONE_50,
    REWARD_SHIP_MILESTONE_75,
)

print("\n" + "="*60)
print("PHASE 4 REWARD CONFIGURATION")
print("="*60)

print("\n📊 COLONIST REWARDS:")
print(f"  Gather resource:        +{REWARD_COLONIST_GATHER_RESOURCE}")
print(f"  Deposit resource:       +{REWARD_COLONIST_DEPOSIT_RESOURCE}")
print(f"  Build ship:             +{REWARD_COLONIST_BUILD_CONTRIBUTE}")
print(f"  Ship 100%:              +{REWARD_COLONIST_SHIP_COMPLETE}")
print(f"  Traitor eliminated:     +{REWARD_COLONIST_TRAITOR_ELIMINATED}")
print(f"  Death:                  {REWARD_COLONIST_DEATH}")

print("\n🏴‍☠️ TRAITOR REWARDS:")
print(f"  Sabotage:               +{REWARD_TRAITOR_SABOTAGE_SUCCESS}")
print(f"  Poison kill:            +{REWARD_TRAITOR_POISON_DEATH}")
print(f"  Ship incomplete:        +{REWARD_TRAITOR_SHIP_INCOMPLETE}")

print("\n🎯 MILESTONE REWARDS:")
print(f"  25% ship progress:      +{REWARD_SHIP_MILESTONE_25}")
print(f"  50% ship progress:      +{REWARD_SHIP_MILESTONE_50}")
print(f"  75% ship progress:      +{REWARD_SHIP_MILESTONE_75}")

print("\n⏱️  BASE PENALTY:")
print(f"  Per turn:               {REWARD_BASE_TURN_PENALTY}")

print("\n" + "="*60)
print("✅ ALL PHASE 4 REWARD SIGNALS CONFIGURED")
print("="*60)


PHASE 4 REWARD CONFIGURATION

📊 COLONIST REWARDS:
  Gather resource:        +0.1
  Deposit resource:       +0.2
  Build ship:             +0.5
  Ship 100%:              +100.0
  Traitor eliminated:     +100.0
  Death:                  -50.0

🏴‍☠️ TRAITOR REWARDS:
  Sabotage:               +2.0
  Poison kill:            +10.0
  Ship incomplete:        +100.0

🎯 MILESTONE REWARDS:
  25% ship progress:      +5.0
  50% ship progress:      +10.0
  75% ship progress:      +15.0

⏱️  BASE PENALTY:
  Per turn:               -0.01

✅ ALL PHASE 4 REWARD SIGNALS CONFIGURED
