In [10]:
# @title
# ===============================
# TRAIN GAME WITH SCALING + ASCII
# ===============================
import random
import time
import numpy as np

class TrainGameEnv:
    def __init__(self, initial_capacity=100, seed=None, verbose=False):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        # train state
        self.initial_capacity = initial_capacity
        self.capacity = initial_capacity
        self.passengers_onboard = 0

        # scoring trackers
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0

        # stations (LRT-2)
        self.stations = [
            "Recto", "Legarda", "Pureza", "V. Mapa", "J. Ruiz", "Gilmore",
            "Betty Go", "Cubao", "Anonas", "Katipunan",
            "Santolan", "Marikina", "Antipolo"
        ]
        self.num_stations = len(self.stations)
        self.station_idx = 0
        self.direction = +1

        # collapse mechanic
        self.weight_window = []
        self.window_size = 10
        self.base_collapse_threshold = 10.0

        # time + progression
        self.sim_hour = random.randint(0, 23)
        self.steps = 0
        self.max_steps = 2000
        self.station_visits = 0

        self.done = False
        self.done_reason = None
        self.verbose = verbose

    # ----------------- helpers -----------------
    def _time_multiplier(self, hour):
        if 6 <= hour <= 8:   # morning rush
            return 1.9
        if 11 <= hour <= 13: # lunch rush
            return 1.6
        if 17 <= hour <= 19: # evening rush
            return 1.9
        return 0.8 if random.random() < 0.45 else 1.0

    def _arrival_bounds(self, idx):
        if idx in (0, self.num_stations-1, 7):  # terminals + Cubao
            return (40, 150)
        return (10, 70)

    def _simulate_arrivals(self):
        amin, amax = self._arrival_bounds(self.station_idx)
        base = random.randint(amin, amax)
        mult = self._time_multiplier(self.sim_hour)

        # ✅ Passenger Surge Scaling
        surge_factor = 1.0 + (self.steps / 2000) * 2.0
        return max(0, int(round(base * mult * surge_factor)))

    def reset(self):
        self.capacity = self.initial_capacity
        self.passengers_onboard = 0
        self.raw_score = 0.0
        self.total_boarded = 0
        self.total_unused = 0.0
        self.total_config_cost = 0.0
        self.station_idx = 0
        self.direction = +1
        self.weight_window = []
        self.sim_hour = random.randint(0,23)
        self.steps = 0
        self.station_visits = 0
        self.done = False
        self.done_reason = None
        return self._get_state()

    def _get_state(self):
        return np.array([
            float(self.capacity),
            float(self.passengers_onboard),
            float(self.station_idx),
            float(self.direction),
            float(self.sim_hour)
        ], dtype=np.float32)

    # ----------------- main step -----------------
    def step(self, action):
        if self.done:
            raise RuntimeError("Environment is done. Call reset().")

        # action effects
        if action == 0:  # Dagdag
            self.capacity += 100
            cost = 10.0
            weight = 1.0
        elif action == 1:  # Lapad
            self.capacity += 50
            cost = 5.0
            weight = 0.5
        else:
            cost = 0.0
            weight = 0.0

        # config penalty (small immediate penalty)
        config_penalty = 0.5 * cost
        self.total_config_cost += cost
        self.raw_score -= config_penalty

        # ✅ Soft Collapse Pressure
        collapse_threshold = max(3.0, self.base_collapse_threshold - (self.steps / 200))
        self.weight_window.append(weight)
        if len(self.weight_window) > self.window_size:
            self.weight_window.pop(0)
        if sum(self.weight_window) >= collapse_threshold:
            self.done = True
            self.done_reason = f"Collapse at station {self.stations[self.station_idx]}"
            self.raw_score -= 200.0
            return self._get_state(), -500.0, True, {"reason": self.done_reason}

        # random alighting
        if self.passengers_onboard > 0:
            alight = random.randint(0, self.passengers_onboard)
            self.passengers_onboard -= alight

        # terminal reset
        if self.station_idx in (0, self.num_stations-1):
            self.passengers_onboard = 0

        # arrivals + boarding
        arrivals = self._simulate_arrivals()
        space = max(0, self.capacity - self.passengers_onboard)
        boarded = min(arrivals, space)
        self.passengers_onboard += boarded

        unused = max(0, self.capacity - self.passengers_onboard)

        # ✅ Penalty Growth
        penalty_growth = 1.0 + (self.steps / 1000)
        reward_board = 2.0 * boarded
        penalty_unused = 0.1 * unused * penalty_growth
        station_reward = reward_board - penalty_unused

        # update stats
        self.raw_score += station_reward
        self.total_boarded += boarded
        self.total_unused += unused
        self.station_visits += 1
        self.steps += 1

        # advance time
        self.sim_hour = (self.sim_hour + random.randint(0,2)) % 24

        # station movement
        next_idx = self.station_idx + self.direction
        if next_idx < 0 or next_idx >= self.num_stations:
            self.direction *= -1
            next_idx = self.station_idx + self.direction
            self.passengers_onboard = 0
        self.station_idx = next_idx

        # stop if too long
        if self.steps >= self.max_steps:
            self.done = True
            self.done_reason = "Max steps reached."

        return self._get_state(), station_reward - (0.1 * cost), self.done, {}

    def final_score(self):
        distance_bonus = self.station_visits * 50
        effective_score = self.raw_score + distance_bonus
        S_min = -50 * self.station_visits
        S_max = 200 * self.station_visits
        normalized = round(1 + ((effective_score - S_min) / (S_max - S_min)) * 99)
        return max(1, min(100, normalized)), effective_score


# ===============================
# ASCII DISPLAY
# ===============================
def draw_train(env):
    track = ["-"] * env.num_stations
    idx = env.station_idx
    track[idx] = "🚉"
    train = "🚂" if env.direction == 1 else "🚋"
    print("\n" + "".join(track))
    print(" " * idx + train)
    print(f"📍 {env.stations[idx]} | Cap: {env.capacity} | Onboard: {env.passengers_onboard}")




In [31]:

# ===============================
# PLAY TEST 
# ===============================
def play_games(auto=True, max_rounds=40, delay=0.3):
    env = TrainGameEnv(initial_capacity=100)
    print("🚆 Welcome to Dagdag o Lapad 🚆")
    print("Actions: 0 = Dagdag, 1 = Lapad, 2 = None\n")

    for _ in range(max_rounds):
        draw_train(env)
        if auto:
            action = random.choice([0, 1, 2])
            print(f"🎲 Random Agent chose: {action}")
        else:
            try:
                action = int(input("Choose [0=Dagdag, 1=Lapad, 2=None]: "))
                if action not in [0,1,2]:
                    action = 2
            except:
                action = 2

        _, reward, done, info = env.step(action)
        print(f"✅ Reward: {reward:.2f} | Onboard: {env.passengers_onboard}\n")
        time.sleep(delay)

        if done:
            print(f"❌ Game ended: {env.done_reason}")
            break

    final_norm, final_raw = env.final_score()
    print("\n============================")
    print("🏁 Game Over!")
    print(f"📊 Raw Score: {final_raw:.2f}")
    print(f"⭐ Normalized Score: {final_norm}/100")
    print("============================")


# ===============================
# RUN SHOWCASE
# ===============================
play_game(auto=True, max_rounds=50, delay=0.2)


TypeError: TrainGameEnv.__init__() got an unexpected keyword argument 'initial_capacity'