In [None]:
# TRAIN GAME V1 (OLD)

import random
import time
import numpy as np

class TrainGameEnv:
    def __init__(self, initial_capacity=100, seed=None, verbose=False):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        # train state
        self.initial_capacity = initial_capacity
        self.capacity = initial_capacity
        self.passengers_onboard = 0

        # scoring trackers
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0

        # stations (LRT-2)
        self.stations = [
            "Recto", "Legarda", "Pureza", "V. Mapa", "J. Ruiz", "Gilmore",
            "Betty Go", "Cubao", "Anonas", "Katipunan",
            "Santolan", "Marikina", "Antipolo"
        ]
        self.num_stations = len(self.stations)
        self.station_idx = 0
        self.direction = +1

        # collapse mechanic
        self.weight_window = []
        self.window_size = 10
        self.base_collapse_threshold = 10.0

        # time + progression
        self.sim_hour = random.randint(0, 23)
        self.steps = 0
        self.max_steps = 2000
        self.station_visits = 0

        self.done = False
        self.done_reason = None
        self.verbose = verbose

    # ----------------- helpers -----------------
    def _time_multiplier(self, hour):
        if 6 <= hour <= 8:   # morning rush
            return 1.9
        if 11 <= hour <= 13: # lunch rush
            return 1.6
        if 17 <= hour <= 19: # evening rush
            return 1.9
        return 0.8 if random.random() < 0.45 else 1.0

    def _arrival_bounds(self, idx):
        if idx in (0, self.num_stations-1, 7):  # terminals + Cubao
            return (40, 150)
        return (10, 70)

    def _simulate_arrivals(self):
        amin, amax = self._arrival_bounds(self.station_idx)
        base = random.randint(amin, amax)
        mult = self._time_multiplier(self.sim_hour)

        # ✅ Passenger Surge Scaling
        surge_factor = 1.0 + (self.steps / 2000) * 2.0
        return max(0, int(round(base * mult * surge_factor)))

    def reset(self):
        self.capacity = self.initial_capacity
        self.passengers_onboard = 0
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0
        self.station_idx = 0
        self.direction = +1
        self.weight_window = []
        self.sim_hour = random.randint(0,23)
        self.steps = 0
        self.station_visits = 0
        self.done = False
        self.done_reason = None
        return self._get_state()

    def _get_state(self):
        return np.array([
            float(self.capacity),
            float(self.passengers_onboard),
            float(self.station_idx),
            float(self.direction),
            float(self.sim_hour)
        ], dtype=np.float32)

    # ----------------- main step -----------------
    def step(self, action):
        if self.done:
            raise RuntimeError("Environment is done. Call reset().")

        # action effects
        if action == 0:  # Dagdag
            self.capacity += 100
            cost = 10.0
            weight = 1.0
        elif action == 1:  # Lapad
            self.capacity += 50
            cost = 5.0
            weight = 0.5
        else:
            cost = 0.0
            weight = 0.0

        # config penalty (small immediate penalty)
        config_penalty = 0.5 * cost
        self.total_config_cost += cost
        self.raw_score -= config_penalty

        # ✅ Soft Collapse Pressure
        collapse_threshold = max(3.0, self.base_collapse_threshold - (self.steps / 200))
        self.weight_window.append(weight)
        if len(self.weight_window) > self.window_size:
            self.weight_window.pop(0)
        if sum(self.weight_window) >= collapse_threshold:
            self.done = True
            self.done_reason = f"Collapse at station {self.stations[self.station_idx]}"
            self.raw_score -= 200.0
            return self._get_state(), -500.0, True, {"reason": self.done_reason}

        # random alighting
        if self.passengers_onboard > 0:
            alight = random.randint(0, self.passengers_onboard)
            self.passengers_onboard -= alight

        # terminal reset
        if self.station_idx in (0, self.num_stations-1):
            self.passengers_onboard = 0

        # arrivals + boarding
        arrivals = self._simulate_arrivals()
        space = max(0, self.capacity - self.passengers_onboard)
        boarded = min(arrivals, space)
        self.passengers_onboard += boarded

        unused = max(0, self.capacity - self.passengers_onboard)

        # Reward for boarding
        reward_board = 1.5 * boarded  # ✅ Lowered multiplier to reduce runaway positive scores

        # Penalty for unused space
        penalty_unused = 0.2 * unused   # ✅ Stronger penalty to discourage overbuilding

        # Penalty for overcapacity / collapse pressure
        penalty_collapse = 0.3 * max(0, (self.passengers_onboard - self.capacity))

        # Config penalty (cost of expanding capacity)
        config_penalty = 0.8 * cost     # ✅ Make choices matter more

        # Station reward formula
        station_reward = reward_board - penalty_unused - penalty_collapse - config_penalty


        # update stats
        self.raw_score += station_reward
        self.total_boarded += boarded
        self.total_unused += unused
        self.station_visits += 1
        self.steps += 1

        # advance time
        self.sim_hour = (self.sim_hour + random.randint(0,2)) % 24

        # station movement
        next_idx = self.station_idx + self.direction
        if next_idx < 0 or next_idx >= self.num_stations:
            self.direction *= -1
            next_idx = self.station_idx + self.direction
            self.passengers_onboard = 0
        self.station_idx = next_idx

        # stop if too long
        if self.steps >= self.max_steps:
            self.done = True
            self.done_reason = "Max steps reached."

        difficulty_scale = 1.0 + (self.steps / 1000)  # grows over time
        station_reward *= difficulty_scale


        return self._get_state(), station_reward - (0.1 * cost), self.done, {}

    def final_score(self):
        distance_bonus = self.station_visits * 50
        effective_score = self.raw_score + distance_bonus
        S_min = -50 * self.station_visits
        S_max = 200 * self.station_visits
        normalized = round(1 + ((effective_score - S_min) / (S_max - S_min)) * 99)
        return max(1, min(100, normalized)), effective_score


# ===============================
# ASCII DISPLAY
# ===============================
def draw_train(env):
    track = ["-"] * env.num_stations
    idx = env.station_idx
    track[idx] = "🚉"
    train = "🚂" if env.direction == 1 else "🚋"
    print("\n" + "".join(track))
    print(" " * idx + train)
    print(f"📍 {env.stations[idx]} | Cap: {env.capacity} | Onboard: {env.passengers_onboard}")



In [None]:
# PLAY FUNCTION V1 (OLD)
def play_game(auto=True, max_rounds=40, delay=0.3):
    env = TrainGameEnv(initial_capacity=100)
    print("🚆 Welcome to Dagdag o Lapad 🚆")
    print("Actions: 0 = Dagdag, 1 = Lapad, 2 = None\n")

    for _ in range(max_rounds):
        draw_train(env)
        if auto:
            action = random.choice([0, 1, 2])
            print(f"🎲 Random Agent chose: {action}")
        else:
            try:
                action = int(input("Choose [0=Dagdag, 1=Lapad, 2=None]: "))
                if action not in [0,1,2]:
                    action = 2
            except:
                action = 2

        _, reward, done, info = env.step(action)
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard}\n")
        time.sleep(delay)

        if done:
            print(f"❌ Game ended: {env.done_reason}")
            break

    final_norm, final_raw = env.final_score()
    print("\n============================")
    print("🏁 Game Over!")
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print("============================")


# ===============================
# RUN SHOWCASE
# ===============================
play_game(auto=True, max_rounds=50, delay=0.2)


🚆 Welcome to Dagdag o Lapad 🚆
Actions: 0 = Dagdag, 1 = Lapad, 2 = None


🚉------------
🚂
📍 Recto | Cap: 100 | Onboard: 0
🎲 Random Agent chose: 2
✅ Reward: 150.15 | Onboard: 100


-🚉-----------
 🚂
📍 Legarda | Cap: 100 | Onboard: 100
🎲 Random Agent chose: 1
✅ Reward: 30.46 | Onboard: 62


--🚉----------
  🚂
📍 Pureza | Cap: 150 | Onboard: 62
🎲 Random Agent chose: 0
✅ Reward: -4.61 | Onboard: 47


---🚉---------
   🚂
📍 V. Mapa | Cap: 250 | Onboard: 47
🎲 Random Agent chose: 2
✅ Reward: 69.48 | Onboard: 71


----🚉--------
    🚂
📍 J. Ruiz | Cap: 250 | Onboard: 71
🎲 Random Agent chose: 0
✅ Reward: 46.94 | Onboard: 96


-----🚉-------
     🚂
📍 Gilmore | Cap: 350 | Onboard: 96
🎲 Random Agent chose: 2
✅ Reward: 101.20 | Onboard: 148


------🚉------
      🚂
📍 Betty Go | Cap: 350 | Onboard: 148
🎲 Random Agent chose: 2
✅ Reward: 136.25 | Onboard: 224


-------🚉-----
       🚂
📍 Cubao | Cap: 350 | Onboard: 224
🎲 Random Agent chose: 2
✅ Reward: 131.75 | Onboard: 246


--------🚉----
        🚂
📍 Anonas | Ca

In [7]:
# TRAIN GAME V2 - OPTIMIZED SCORING
# Efficient transit management simulation with realistic scoring

import random
import time
import numpy as np

class TrainGameEnv:
    def __init__(self, initial_capacity=100, seed=None, verbose=False):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        # Core game state
        self.initial_capacity = initial_capacity
        self.capacity = initial_capacity
        self.passengers_onboard = 0

        # Performance tracking
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0
        self.peak_inefficiency = 0  # Track worst excess capacity

        # Station configuration (LRT-2 line)
        self.stations = [
            "Recto", "Legarda", "Pureza", "V. Mapa", "J. Ruiz", "Gilmore",
            "Betty Go", "Cubao", "Anonas", "Katipunan",
            "Santolan", "Marikina", "Antipolo"
        ]
        self.num_stations = len(self.stations)
        self.station_idx = 0
        self.direction = +1

        # Infrastructure stress management
        self.weight_window = []
        self.window_size = 10
        self.base_collapse_threshold = 10.0

        # Time management (04:00-22:00 operating hours)
        self.sim_minutes = random.randint(4 * 60, 22 * 60 - 1)
        self.steps = 0
        self.max_steps = 2000
        self.station_visits = 0

        # Efficiency tracking
        self.previous_onboard = 0

        self.done = False
        self.done_reason = None
        self.verbose = verbose

    # ----------------- core simulation methods -----------------
    def _time_multiplier(self, hour):
        """Get passenger multiplier based on time of day"""
        if 6 <= hour <= 8:   return 1.9  # Morning rush
        if 11 <= hour <= 13: return 1.6  # Lunch rush  
        if 17 <= hour <= 19: return 1.9  # Evening rush
        return 0.8 if random.random() < 0.45 else 1.0

    def _arrival_bounds(self, idx):
        """Get min/max arrivals for station type"""
        if idx in (0, self.num_stations-1, 7):  # Terminals + major hub
            return (40, 150)
        return (10, 70)

    def _simulate_arrivals(self):
        """Calculate arriving passengers at current station"""
        amin, amax = self._arrival_bounds(self.station_idx)
        base = random.randint(amin, amax)
        
        current_hour = self.sim_minutes // 60
        mult = self._time_multiplier(current_hour)

        # Passenger volume increases over time
        surge_factor = 1.0 + (self.steps / 2000) * 2.0
        return max(0, int(round(base * mult * surge_factor)))

    def _calculate_efficiency_penalty(self, unused_space, alighted_passengers, previous_onboard, current_hour):
        """Calculate penalty based on operational efficiency"""
        base_penalty = 0.3 * unused_space
        penalty_growth = 1.0 + (self.steps / 1000)
        
        # Reduced penalty for empty trains starting service
        if previous_onboard == 0:
            return base_penalty * penalty_growth * 0.1
        
        alighting_ratio = alighted_passengers / previous_onboard
        
        # Time-based efficiency expectations
        if 6 <= current_hour <= 8 or 17 <= current_hour <= 19:
            expected_efficiency = 0.7  # Rush hour
        elif 11 <= current_hour <= 13:
            expected_efficiency = 0.6  # Lunch hour
        else:
            expected_efficiency = 0.4  # Off-peak

        # Station-type efficiency expectations
        if self.station_idx in (0, self.num_stations-1):
            station_factor = 0.9  # Terminals
        elif self.station_idx == 7:
            station_factor = 0.8  # Major hub
        else:
            station_factor = 0.6  # Regular stations

        target_efficiency = max(0.3, min(0.95, expected_efficiency * station_factor))
        
        # Larger capacity = higher waste penalty
        capacity_factor = 0.5 + (self.capacity / 2000)
        
        # Efficiency-based penalty scaling
        if alighting_ratio > target_efficiency + 0.15:
            efficiency_multiplier = 0.1  # High efficiency
        elif alighting_ratio > target_efficiency:
            efficiency_multiplier = 0.3  # Good efficiency
        elif alighting_ratio > target_efficiency * 0.7:
            efficiency_multiplier = 0.6  # Moderate efficiency
        else:
            efficiency_multiplier = 1.2  # Poor efficiency
        
        efficiency_multiplier *= capacity_factor
        return base_penalty * penalty_growth * efficiency_multiplier

    def reset(self):
        """Reset environment to initial state"""
        self.capacity = self.initial_capacity
        self.passengers_onboard = 0
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0
        self.peak_inefficiency = 0
        self.station_idx = 0
        self.direction = +1
        self.weight_window = []
        
        self.sim_minutes = random.randint(4 * 60, 22 * 60 - 1)
        self.steps = 0
        self.station_visits = 0
        self.previous_onboard = 0
        self.done = False
        self.done_reason = None
        return self._get_state()

    def _get_state(self):
        """Get current environment state vector"""
        hour = self.sim_minutes // 60
        minute = self.sim_minutes % 60
        return np.array([
            float(self.capacity),
            float(self.passengers_onboard),
            float(self.station_idx),
            float(self.direction),
            float(hour),
            float(minute)
        ], dtype=np.float32)

    def step(self, action):
        """Execute one game step with given action"""
        if self.done:
            raise RuntimeError("Environment is done. Call reset().")

        alighted_passengers = 0
        self.previous_onboard = self.passengers_onboard

        # Process capacity actions
        if action == 0:  # Add carriage (+100 capacity)
            self.capacity += 100
            cost, weight = 10.0, 1.0
        elif action == 1:  # Widen carriage (+50 capacity)  
            self.capacity += 50
            cost, weight = 5.0, 0.5
        else:  # No action
            cost, weight = 0.0, 0.0

        # Apply configuration cost penalty
        config_penalty = 2.0 * cost
        self.total_config_cost += cost
        self.raw_score -= config_penalty

        # Check for infrastructure collapse
        collapse_threshold = max(3.0, self.base_collapse_threshold - (self.steps / 200))
        self.weight_window.append(weight)
        if len(self.weight_window) > self.window_size:
            self.weight_window.pop(0)
        if sum(self.weight_window) >= collapse_threshold:
            self.done = True
            self.done_reason = f"Collapse at station {self.stations[self.station_idx]}"
            self.raw_score -= 500.0
            return self._get_state(), -1000.0, True, {"reason": self.done_reason, "alighted": 0}

        # Passenger alighting simulation
        if self.passengers_onboard > 0:
            alighted_passengers = random.randint(0, self.passengers_onboard)
            self.passengers_onboard -= alighted_passengers

        # Clear train at terminal stations
        if self.station_idx in (0, self.num_stations-1):
            self.passengers_onboard = 0

        # Passenger boarding simulation
        arrivals = self._simulate_arrivals()
        space = max(0, self.capacity - self.passengers_onboard)
        boarded = min(arrivals, space)
        self.passengers_onboard += boarded

        unused = max(0, self.capacity - self.passengers_onboard)
        self.peak_inefficiency = max(self.peak_inefficiency, unused)

        # Calculate rewards and penalties
        current_hour = self.sim_minutes // 60
        penalty_unused = self._calculate_efficiency_penalty(
            unused, alighted_passengers, self.previous_onboard, current_hour
        )
        
        reward_board = 1.5 * boarded
        station_reward = reward_board - penalty_unused

        # Update game state
        self.raw_score += station_reward
        self.total_boarded += boarded
        self.total_unused += unused
        self.station_visits += 1
        self.steps += 1

        # Advance simulation time
        self.sim_minutes += 5
        if self.sim_minutes >= 22 * 60:
            self.done = True
            self.done_reason = "End of operating hours (22:00)"

        # Move to next station
        next_idx = self.station_idx + self.direction
        if next_idx < 0 or next_idx >= self.num_stations:
            self.direction *= -1
            next_idx = self.station_idx + self.direction
            self.passengers_onboard = 0
        self.station_idx = next_idx

        # Check step limit
        if self.steps >= self.max_steps:
            self.done = True
            self.done_reason = "Max steps reached."

        return self._get_state(), station_reward - (0.2 * cost), self.done, {
            "alighted": alighted_passengers,
            "penalty_unused": penalty_unused,
            "efficiency_ratio": alighted_passengers / max(1, self.previous_onboard)
        }

    def final_score(self):
        """Calculate final normalized score (1-100)"""
        distance_bonus = self.station_visits * 10
        efficiency_penalty = self.peak_inefficiency * 0.1
        config_efficiency = max(0, 1 - (self.total_config_cost / (self.total_boarded + 1)))
        
        effective_score = (self.raw_score + distance_bonus - efficiency_penalty) * config_efficiency
        
        # Normalize to 1-100 scale
        S_min = -80 * self.station_visits
        S_max = 100 * self.station_visits
        normalized = round(1 + ((effective_score - S_min) / (S_max - S_min)) * 99)
        
        return max(1, min(100, normalized)), effective_score
    
    
# ===============================
# VISUALIZATION
# ===============================
def draw_train(env):
    """Display current train position and status"""
    track = ["-"] * env.num_stations
    idx = env.station_idx
    track[idx] = "🚉"
    train = "🚂" if env.direction == 1 else "🚋"
    print("\n" + "".join(track))
    print(" " * idx + train)
    print(f"📍 {env.stations[idx]} | Cap: {env.capacity} | Onboard: {env.passengers_onboard}")

In [8]:
# SMART RANDOM PLAY FUNCTION

def play_game_smart_random(auto=True, max_rounds=40, delay=0.3):
    env = TrainGameEnv(initial_capacity=100)
    
    # Track metrics
    total_alighted = 0
    peak_excess_space = 0
    total_excess_space = 0
    station_count = 0
    
    print("🚆 Welcome to Dagdag o Lapad 🚆")
    print("Actions: 0 = Dagdag, 1 = Lapad, 2 = None")
    
    # Display starting time
    start_hour = env.sim_minutes // 60
    start_minute = env.sim_minutes % 60
    print(f"🕐 Starting at: {start_hour:02d}:{start_minute:02d}\n")

    for _ in range(max_rounds):
        draw_train(env)
        
        # Display current time and direction
        current_hour = env.sim_minutes // 60
        current_minute = env.sim_minutes % 60
        direction = "NB" if env.direction == 1 else "SB"
        print(f"🕐 Time: {current_hour:02d}:{current_minute:02d} | Direction: {direction}")
        
        if auto:
            # SMART RANDOM: Weight actions based on current situation
            utilization = env.passengers_onboard / max(1, env.capacity)
            current_hour = env.sim_minutes // 60
            
            # Base weights (favor doing nothing)
            weights = [0.2, 0.2, 0.6]  # [Dagdag, Lapad, None]
            
            # Adjust weights based on utilization
            if utilization > 0.8:  # High utilization - favor expansion
                weights = [0.5, 0.3, 0.2]
            elif utilization < 0.3:  # Low utilization - favor doing nothing
                weights = [0.1, 0.1, 0.8]
            
            # Adjust based on time of day (rush hour vs off-peak)
            if 6 <= current_hour <= 8 or 17 <= current_hour <= 19:  # Rush hours
                weights = [weights[0] + 0.1, weights[1] + 0.1, weights[2] - 0.2]
            
            # Choose action with weighted probabilities
            action = random.choices([0, 1, 2], weights=weights)[0]
            action_names = {0: "Dagdag", 1: "Lapad", 2: "None"}
            print(f"🎯 Smart Random chose: {action} ({action_names[action]})")
            print(f"📊 Utilization: {utilization:.1%} | Weights: Dagdag:{weights[0]:.1%}, Lapad:{weights[1]:.1%}, None:{weights[2]:.1%}")
        else:
            try:
                action = int(input("Choose [0=Dagdag, 1=Lapad, 2=None]: "))
                if action not in [0,1,2]:
                    action = 2
            except:
                action = 2

        # Store current state before step
        previous_onboard = env.passengers_onboard
        
        # Take step and get the actual alighted count from the environment
        state, reward, done, info = env.step(action)
        
        alighted = info.get('alighted', 0)
        total_alighted += alighted
        
        # Calculate excess space metrics
        excess_space = max(0, env.capacity - env.passengers_onboard)
        total_excess_space += excess_space
        peak_excess_space = max(peak_excess_space, excess_space)
        station_count += 1
        
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard} | Alighted: {alighted}")
        print(f"📊 Excess Space: {excess_space} | Capacity: {env.capacity}\n")
        time.sleep(delay)

        if done:
            print(f"❌ Game ended: {env.done_reason}")
            break

    # Calculate final metrics
    final_norm, final_raw = env.final_score()
    average_excess_space = total_excess_space / station_count if station_count > 0 else 0
    
    print("\n" + "="*50)
    print("🏁 GAME OVER - FINAL STATISTICS")
    print("="*50)
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print(f"👥 Total Passengers Carried: {env.total_boarded}")
    print(f"🚪 Total Passengers Alighted: {total_alighted}")
    print(f"📦 Average Excess Space: {average_excess_space:.1f}")
    print(f"📈 Peak Capacity Inefficiency: {peak_excess_space}")
    print(f"🛑 Stations Visited: {env.station_visits}")
    print(f"⏱️  Total Steps: {env.steps}")
    print(f"💰 Total Configuration Cost: {env.total_config_cost:.1f}")
    print("="*50)

# Run smart random
play_game_smart_random(auto=True, max_rounds=50, delay=0.2)

🚆 Welcome to Dagdag o Lapad 🚆
Actions: 0 = Dagdag, 1 = Lapad, 2 = None
🕐 Starting at: 12:58


🚉------------
🚂
📍 Recto | Cap: 100 | Onboard: 0
🕐 Time: 12:58 | Direction: NB
🎯 Smart Random chose: 2 (None)
📊 Utilization: 0.0% | Weights: Dagdag:10.0%, Lapad:10.0%, None:80.0%
✅ Reward: 150.00 | Onboard: 100 | Alighted: 0
📊 Excess Space: 0 | Capacity: 100


-🚉-----------
 🚂
📍 Legarda | Cap: 100 | Onboard: 100
🕐 Time: 13:03 | Direction: NB
🎯 Smart Random chose: 2 (None)
📊 Utilization: 100.0% | Weights: Dagdag:50.0%, Lapad:30.0%, None:20.0%
✅ Reward: 76.50 | Onboard: 100 | Alighted: 51
📊 Excess Space: 0 | Capacity: 100


--🚉----------
  🚂
📍 Pureza | Cap: 100 | Onboard: 100
🕐 Time: 13:08 | Direction: NB
🎯 Smart Random chose: 0 (Dagdag)
📊 Utilization: 100.0% | Weights: Dagdag:50.0%, Lapad:30.0%, None:20.0%
✅ Reward: 87.68 | Onboard: 150 | Alighted: 17
📊 Excess Space: 50 | Capacity: 200


---🚉---------
   🚂
📍 V. Mapa | Cap: 200 | Onboard: 150
🕐 Time: 13:13 | Direction: NB
🎯 Smart Random chose: 2 

In [9]:
# REGULAR RANDOM PLAY FUNCTION

def play_game(auto=True, max_rounds=40, delay=0.3):
    env = TrainGameEnv(initial_capacity=100)
    
    # Track metrics
    total_alighted = 0
    peak_excess_space = 0
    total_excess_space = 0
    station_count = 0
    
    print("🚆 Welcome to Dagdag o Lapad 🚆")
    print("Actions: 0 = Dagdag, 1 = Lapad, 2 = None")
    
    # Display starting time
    start_hour = env.sim_minutes // 60
    start_minute = env.sim_minutes % 60
    print(f"🕐 Starting at: {start_hour:02d}:{start_minute:02d}\n")

    for _ in range(max_rounds):
        draw_train(env)
        
        # Display current time and direction
        current_hour = env.sim_minutes // 60
        current_minute = env.sim_minutes % 60
        direction = "NB" if env.direction == 1 else "SB"
        print(f"🕐 Time: {current_hour:02d}:{current_minute:02d} | Direction: {direction}")
        
        if auto:
            action = random.choice([0, 1, 2])
            print(f"🎲 Random Agent chose: {action}")
        else:
            try:
                action = int(input("Choose [0=Dagdag, 1=Lapad, 2=None]: "))
                if action not in [0,1,2]:
                    action = 2
            except:
                action = 2

        # Store current state before step
        previous_onboard = env.passengers_onboard
        
        # Take step and get the actual alighted count from the environment
        state, reward, done, info = env.step(action)
        
        # NEW: Get alighted passengers from the environment's internal tracking
        # We'll need to modify the step method to return this info
        alighted = info.get('alighted', 0)
        total_alighted += alighted
        
        # Calculate excess space metrics
        excess_space = max(0, env.capacity - env.passengers_onboard)
        total_excess_space += excess_space
        peak_excess_space = max(peak_excess_space, excess_space)
        station_count += 1
        
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard} | Alighted: {alighted}")
        print(f"📊 Excess Space: {excess_space} | Capacity: {env.capacity}\n")
        time.sleep(delay)

        if done:
            print(f"❌ Game ended: {env.done_reason}")
            break

    # Calculate final metrics
    final_norm, final_raw = env.final_score()
    average_excess_space = total_excess_space / station_count if station_count > 0 else 0
    
    print("\n" + "="*50)
    print("🏁 GAME OVER - FINAL STATISTICS")
    print("="*50)
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print(f"👥 Total Passengers Carried: {env.total_boarded}")
    print(f"🚪 Total Passengers Alighted: {total_alighted}")
    print(f"📦 Average Excess Space: {average_excess_space:.1f}")
    print(f"📈 Peak Capacity Inefficiency: {peak_excess_space}")
    print(f"🛑 Stations Visited: {env.station_visits}")
    print(f"⏱️  Total Steps: {env.steps}")
    print(f"💰 Total Configuration Cost: {env.total_config_cost:.1f}")
    print("="*50)

# ===============================
# RUN SHOWCASE
# ===============================
play_game(auto=True, max_rounds=50, delay=0.2)

🚆 Welcome to Dagdag o Lapad 🚆
Actions: 0 = Dagdag, 1 = Lapad, 2 = None
🕐 Starting at: 09:22


🚉------------
🚂
📍 Recto | Cap: 100 | Onboard: 0
🕐 Time: 09:22 | Direction: NB
🎲 Random Agent chose: 0
✅ Reward: 120.52 | Onboard: 84 | Alighted: 0
📊 Excess Space: 116 | Capacity: 200


-🚉-----------
 🚂
📍 Legarda | Cap: 200 | Onboard: 84
🕐 Time: 09:27 | Direction: NB
🎲 Random Agent chose: 0
✅ Reward: 11.43 | Onboard: 114 | Alighted: 8
📊 Excess Space: 186 | Capacity: 300


--🚉----------
  🚂
📍 Pureza | Cap: 300 | Onboard: 114
🕐 Time: 09:32 | Direction: NB
🎲 Random Agent chose: 0
✅ Reward: 3.30 | Onboard: 80 | Alighted: 51
📊 Excess Space: 320 | Capacity: 400


---🚉---------
   🚂
📍 V. Mapa | Cap: 400 | Onboard: 80
🕐 Time: 09:37 | Direction: NB
🎲 Random Agent chose: 2
✅ Reward: 51.29 | Onboard: 96 | Alighted: 31
📊 Excess Space: 304 | Capacity: 400


----🚉--------
    🚂
📍 J. Ruiz | Cap: 400 | Onboard: 96
🕐 Time: 09:42 | Direction: NB
🎲 Random Agent chose: 2
✅ Reward: -16.34 | Onboard: 122 | Alighted:

In [10]:
# WORST CASE TEST - INEFFICIENT OPERATIONS

def test_worst_case_fixed():
    print("🚆 WORST CASE TEST - Inefficient Operations 🚆")
    print("=" * 60)
    
    # Create environment with fixed seed for reproducibility
    env = TrainGameEnv(initial_capacity=100, seed=42)
    
    # Store original final_score method
    original_final_score = env.final_score
    
    # Define fixed final_score method
    def fixed_final_score():
        distance_bonus = env.station_visits * 20  # Reduced from 50
        effective_score = env.raw_score + distance_bonus
        # More realistic normalization
        S_min = -100 * env.station_visits  # More negative minimum
        S_max = 150 * env.station_visits   # Lower maximum
        normalized = round(1 + ((effective_score - S_min) / (S_max - S_min)) * 99)
        return max(1, min(100, normalized)), effective_score
    
    # Replace the method
    env.final_score = fixed_final_score
    
    # Force worst-case conditions
    env.sim_minutes = 10 * 60  # 10:00 AM - moderate traffic
    env.capacity = 500  # Start with oversized capacity
    
    # Track metrics
    total_alighted = 0
    peak_excess_space = 0
    total_excess_space = 0
    station_count = 0
    
    # WORST-CASE ACTIONS: Always expand capacity even when empty
    worst_case_actions = [0, 1] * 25  # Alternate between Dagdag and Lapad
    
    for i, action in enumerate(worst_case_actions):
        if i >= 50:
            break
            
        draw_train(env)
        current_hour = env.sim_minutes // 60
        current_minute = env.sim_minutes % 60
        direction = "NB" if env.direction == 1 else "SB"
        print(f"🕐 Time: {current_hour:02d}:{current_minute:02d} | Direction: {direction}")
        print(f"🎲 WORST-CASE Action: {action} (Always expanding)")
        
        previous_onboard = env.passengers_onboard
        state, reward, done, info = env.step(action)
        
        alighted = info.get('alighted', 0)
        total_alighted += alighted
        
        excess_space = max(0, env.capacity - env.passengers_onboard)
        total_excess_space += excess_space
        peak_excess_space = max(peak_excess_space, excess_space)
        station_count += 1
        
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard} | Alighted: {alighted}")
        print(f"📊 Excess Space: {excess_space} | Capacity: {env.capacity}")
        print(f"💸 Config Cost: {env.total_config_cost:.1f}")
        print()
        
        time.sleep(0.05)

        if done:
            break

    final_norm, final_raw = env.final_score()
    average_excess_space = total_excess_space / station_count if station_count > 0 else 0
    
    print("\n" + "="*60)
    print("🏁 WORST CASE RESULTS - INEFFICIENT OPERATIONS")
    print("="*60)
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print(f"👥 Total Passengers Carried: {env.total_boarded}")
    print(f"🚪 Total Passengers Alighted: {total_alighted}")
    print(f"📦 Average Excess Space: {average_excess_space:.1f}")
    print(f"📈 Peak Capacity Inefficiency: {peak_excess_space}")
    print(f"🛑 Stations Visited: {env.station_visits}")
    print(f"⏱️  Total Steps: {env.steps}")
    print(f"💰 Total Configuration Cost: {env.total_config_cost:.1f}")
    print(f"🔧 Capacity Actions: {len(worst_case_actions)}")
    print("="*60)

# Run worst case test
test_worst_case_fixed()

🚆 WORST CASE TEST - Inefficient Operations 🚆

🚉------------
🚂
📍 Recto | Cap: 500 | Onboard: 0
🕐 Time: 10:00 | Direction: NB
🎲 WORST-CASE Action: 0 (Always expanding)
✅ Reward: 45.79 | Onboard: 43 | Alighted: 0
📊 Excess Space: 557 | Capacity: 600
💸 Config Cost: 10.0


-🚉-----------
 🚂
📍 Legarda | Cap: 600 | Onboard: 43
🕐 Time: 10:05 | Direction: NB
🎲 WORST-CASE Action: 1 (Always expanding)
✅ Reward: -17.32 | Onboard: 47 | Alighted: 15
📊 Excess Space: 603 | Capacity: 650
💸 Config Cost: 15.0


--🚉----------
  🚂
📍 Pureza | Cap: 650 | Onboard: 47
🕐 Time: 10:10 | Direction: NB
🎲 WORST-CASE Action: 0 (Always expanding)
✅ Reward: -129.55 | Onboard: 94 | Alighted: 6
📊 Excess Space: 656 | Capacity: 750
💸 Config Cost: 25.0


---🚉---------
   🚂
📍 V. Mapa | Cap: 750 | Onboard: 94
🕐 Time: 10:15 | Direction: NB
🎲 WORST-CASE Action: 1 (Always expanding)
✅ Reward: 0.92 | Onboard: 40 | Alighted: 69
📊 Excess Space: 760 | Capacity: 800
💸 Config Cost: 30.0


----🚉--------
    🚂
📍 J. Ruiz | Cap: 800 | Onboa

In [11]:
# BEST CASE TEST - EFFICIENT OPERATIONS

def test_best_case_fixed():
    print("🚆 BEST CASE TEST - Efficient Operations 🚆")
    print("=" * 60)
    
    # Create environment with fixed seed for reproducibility
    env = TrainGameEnv(initial_capacity=100, seed=42)
    
    # Store original final_score method
    original_final_score = env.final_score
    
    # Define fixed final_score method
    def fixed_final_score():
        distance_bonus = env.station_visits * 20  # Reduced from 50
        effective_score = env.raw_score + distance_bonus
        # More realistic normalization
        S_min = -100 * env.station_visits  # More negative minimum
        S_max = 150 * env.station_visits   # Lower maximum
        normalized = round(1 + ((effective_score - S_min) / (S_max - S_min)) * 99)
        return max(1, min(100, normalized)), effective_score
    
    # Replace the method
    env.final_score = fixed_final_score
    
    # Force best-case conditions
    env.sim_minutes = 8 * 60  # 8:00 AM - rush hour
    env.capacity = 150  # Start with reasonable capacity
    
    # Track metrics
    total_alighted = 0
    peak_excess_space = 0
    total_excess_space = 0
    station_count = 0
    
    # BEST-CASE ACTIONS: Strategic capacity expansion only when needed
    best_case_actions = [
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  # No expansion initially
        1, 2, 2, 2, 2,  # Small expansion when actually needed
        1, 2, 2, 2, 2,
        0, 2, 2, 2, 2,  # Medium expansion during actual peak
        1, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  # Minimal expansion
        1, 2, 2, 2, 2
    ]
    
    for i, action in enumerate(best_case_actions):
        if i >= 50:
            break
            
        draw_train(env)
        current_hour = env.sim_minutes // 60
        current_minute = env.sim_minutes % 60
        direction = "NB" if env.direction == 1 else "SB"
        print(f"🕐 Time: {current_hour:02d}:{current_minute:02d} | Direction: {direction}")
        
        action_names = {0: "Dagdag", 1: "Lapad", 2: "None"}
        print(f"🎯 STRATEGIC Action: {action} ({action_names[action]})")
        
        previous_onboard = env.passengers_onboard
        state, reward, done, info = env.step(action)
        
        alighted = info.get('alighted', 0)
        total_alighted += alighted
        
        excess_space = max(0, env.capacity - env.passengers_onboard)
        total_excess_space += excess_space
        peak_excess_space = max(peak_excess_space, excess_space)
        station_count += 1
        
        efficiency_ratio = info.get('efficiency_ratio', 0)
        penalty_unused = info.get('penalty_unused', 0)
        
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard} | Alighted: {alighted}")
        print(f"📊 Excess Space: {excess_space} | Capacity: {env.capacity}")
        print(f"📈 Efficiency Ratio: {efficiency_ratio:.2f} | Penalty: {penalty_unused:.2f}")
        print()
        
        time.sleep(0.05)

        if done:
            break

    final_norm, final_raw = env.final_score()
    average_excess_space = total_excess_space / station_count if station_count > 0 else 0
    
    print("\n" + "="*60)
    print("🏁 BEST CASE RESULTS - EFFICIENT OPERATIONS")
    print("="*60)
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print(f"👥 Total Passengers Carried: {env.total_boarded}")
    print(f"🚪 Total Passengers Alighted: {total_alighted}")
    print(f"📦 Average Excess Space: {average_excess_space:.1f}")
    print(f"📈 Peak Capacity Inefficiency: {peak_excess_space}")
    print(f"🛑 Stations Visited: {env.station_visits}")
    print(f"⏱️  Total Steps: {env.steps}")
    print(f"💰 Total Configuration Cost: {env.total_config_cost:.1f}")
    print(f"🔧 Capacity Actions: {sum(1 for a in best_case_actions if a != 2)}")
    print("="*60)

# Run best case test
test_best_case_fixed()

🚆 BEST CASE TEST - Efficient Operations 🚆

🚉------------
🚂
📍 Recto | Cap: 150 | Onboard: 0
🕐 Time: 08:00 | Direction: NB
🎯 STRATEGIC Action: 2 (None)
✅ Reward: 120.96 | Onboard: 82 | Alighted: 0
📊 Excess Space: 68 | Capacity: 150
📈 Efficiency Ratio: 0.00 | Penalty: 2.04


-🚉-----------
 🚂
📍 Legarda | Cap: 150 | Onboard: 82
🕐 Time: 08:05 | Direction: NB
🎯 STRATEGIC Action: 2 (None)
✅ Reward: 69.15 | Onboard: 95 | Alighted: 35
📊 Excess Space: 55 | Capacity: 150
📈 Efficiency Ratio: 0.43 | Penalty: 2.85


--🚉----------
  🚂
📍 Pureza | Cap: 150 | Onboard: 95
🕐 Time: 08:10 | Direction: NB
🎯 STRATEGIC Action: 2 (None)
✅ Reward: 45.92 | Onboard: 101 | Alighted: 28
📊 Excess Space: 49 | Capacity: 150
📈 Efficiency Ratio: 0.29 | Penalty: 5.08


---🚉---------
   🚂
📍 V. Mapa | Cap: 150 | Onboard: 101
🕐 Time: 08:15 | Direction: NB
🎯 STRATEGIC Action: 2 (None)
✅ Reward: 43.04 | Onboard: 37 | Alighted: 94
📊 Excess Space: 113 | Capacity: 150
📈 Efficiency Ratio: 0.93 | Penalty: 1.96


----🚉--------
    🚂


SCORES FOR V1

============================================================
🏁 WORST CASE RESULTS - INEFFICIENT OPERATIONS
============================================================
📊 Raw Score: 6254.32
⭐ Normalized Score: 90/100
👥 Total Passengers Carried: 3465
🚪 Total Passengers Alighted: 2760
📦 Average Excess Space: 2324.1
📈 Peak Capacity Inefficiency: 4237
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 375.0
🔧 Capacity Actions: 50
============================================================

============================================================
🏁 BEST CASE RESULTS - EFFICIENT OPERATIONS
============================================================
📊 Raw Score: 5794.10
⭐ Normalized Score: 92/100
👥 Total Passengers Carried: 2575
🚪 Total Passengers Alighted: 2064
📦 Average Excess Space: 208.9
📈 Peak Capacity Inefficiency: 400
🛑 Stations Visited: 45
⏱️  Total Steps: 45
💰 Total Configuration Cost: 30.0
🔧 Capacity Actions: 5
============================================================

==================================================
🏁 RANDOMIZED - FINAL STATISTICS
==================================================
📊 Raw Score: 8480.41
⭐ Normalized Score: 88/100
👥 Total Passengers Carried: 3382
🚪 Total Passengers Alighted: 2767
📦 Average Excess Space: 1310.8
📈 Peak Capacity Inefficiency: 2650
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 255.0
==================================================

========================================================================================================================
========================================================================================================================
========================================================================================================================

SCORES FOR V2

==================================================
🏁 RANDOM 1
==================================================
📊 Raw Score: -5645.36
⭐ Normalized Score: 1/100
👥 Total Passengers Carried: 3655
🚪 Total Passengers Alighted: 2724
📦 Average Excess Space: 1302.0
📈 Peak Capacity Inefficiency: 2677
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 260.0
==================================================

==================================================
🏁 RANDOM 2
==================================================
📊 Raw Score: -244.64
⭐ Normalized Score: 41/100
👥 Total Passengers Carried: 1623
🚪 Total Passengers Alighted: 1226
📦 Average Excess Space: 653.8
📈 Peak Capacity Inefficiency: 1288
🛑 Stations Visited: 30
⏱️  Total Steps: 30
💰 Total Configuration Cost: 130.0
==================================================

==================================================
🏁 RANDOM 3
==================================================
📊 Raw Score: -2036.69
⭐ Normalized Score: 23/100
👥 Total Passengers Carried: 3382
🚪 Total Passengers Alighted: 2767
📦 Average Excess Space: 1310.8
📈 Peak Capacity Inefficiency: 2650
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 255.0
==================================================

==================================================
🏁 RANDOM 4
==================================================
📊 Raw Score: -3942.13
⭐ Normalized Score: 2/100
👥 Total Passengers Carried: 4260
🚪 Total Passengers Alighted: 3271
📦 Average Excess Space: 1460.8
📈 Peak Capacity Inefficiency: 2990
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 290.0
==================================================

==================================================
🏁 RANDOM 5
==================================================
📊 Raw Score: -5180.73
⭐ Normalized Score: 1/100
👥 Total Passengers Carried: 4317
🚪 Total Passengers Alighted: 3408
📦 Average Excess Space: 1262.8
📈 Peak Capacity Inefficiency: 2687
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 260.0
==================================================

==================================================
🏁 RANDOM 5
==================================================
📊 Raw Score: -6805.76
⭐ Normalized Score: 1/100
👥 Total Passengers Carried: 3242
🚪 Total Passengers Alighted: 2411
📦 Average Excess Space: 1166.1
📈 Peak Capacity Inefficiency: 2300
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 225.0
==================================================

============================================================
🏁 WORST CASE RESULTS - INEFFICIENT OPERATIONS
============================================================
📊 Raw Score: -18804.73
⭐ Normalized Score: 1/100
👥 Total Passengers Carried: 3465
🚪 Total Passengers Alighted: 2760
📦 Average Excess Space: 2324.1
📈 Peak Capacity Inefficiency: 4237
🛑 Stations Visited: 50
⏱️  Total Steps: 50
💰 Total Configuration Cost: 375.0
🔧 Capacity Actions: 50
============================================================

============================================================
🏁 BEST CASE RESULTS - EFFICIENT OPERATIONS
============================================================
📊 Raw Score: 3942.11
⭐ Normalized Score: 75/100
👥 Total Passengers Carried: 2575
🚪 Total Passengers Alighted: 2064
📦 Average Excess Space: 208.9
📈 Peak Capacity Inefficiency: 400
🛑 Stations Visited: 45
⏱️  Total Steps: 45
💰 Total Configuration Cost: 30.0
🔧 Capacity Actions: 5
============================================================
