### libs and imports

In [None]:
!pip install pyRDDLGym
import pyRDDLGym
!pip install rddlrepository

from rddlrepository.core.manager import RDDLRepoManager
manager = RDDLRepoManager(rebuild=True)

from IPython.display import Image # for displaying gifs in colab

base_path = '/content/'

Collecting pyRDDLGym
  Downloading pyrddlgym-2.6-py3-none-any.whl.metadata (1.3 kB)
Collecting pygame-ce (from pyRDDLGym)
  Downloading pygame_ce-2.5.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (12 kB)
Downloading pyrddlgym-2.6-py3-none-any.whl (111 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.7/111.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pygame_ce-2.5.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (12.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pygame-ce, pyRDDLGym
Successfully installed pyRDDLGym-2.6 pygame-ce-2.5.6
Collecting rddlrepository
  Downloading rddlrepository-2.1-py3-none-any.whl.metadata (959 bytes)
Downloading rddlrepository-2.1-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00

###Our domain file

In [None]:
base_path = '/content/'

TRAIN_DOMAIN = """
domain train_system {

    requirements = {
        concurrent,
        reward-deterministic,
        intermediate-nodes,
        constrained-state
    };

    types {
        train: object;
        station: object;
    };

    pvariables{

        //train constants
        CAPACITY(train): {non-fluent, int, default=1000};
        PASSENGERS_BOARDING_PER_MINUTE(train): {non-fluent, int, default=75};
        PASSENGERS_DISEMBARKING_PER_MINUTE(train): {non-fluent, int, default=75};
        NEXT_TRAIN(train, train): {non-fluent, bool, default=false};
        MIN_STOP_TIME(train): {non-fluent, real, default=2.0};

        //station constants
        INF : {non-fluent, real, default=1000000.0};
        FIRST_TRAIN : {non-fluent, train};
        DEPOT_STATION(station): {non-fluent, bool, default=false};
        NEXT_STATION(station, station): {non-fluent, bool, default=false};
        DRIVE_TIME(station, station): {non-fluent, real, default=0.0};
        FIND_NEXT_STATION(station): {non-fluent, station};
        DISEMBARKING_PRECENTAGE(station): {non-fluent, real, default=0.2};
        PASSENGER_ARRIVAL_RATE(station): {non-fluent, int, default=6};
        PLANNED_DEPARTURE_TIME(train, station): {non-fluent, real, default=0.0};

        //timing constants
        SCHEDULED_DEPARTURE_TIME(train, station): {non-fluent, real, default=0.0};

        //timer variables
        train_timer(train): {state-fluent, real, default=0.0};

        // --- NEW: Absolute Clock ---
        current_time: {state-fluent, real, default=0.0};

        //interm fluent
        active_time_calc(train): {interm-fluent, real};
        people_boarding_calc(train, station): {interm-fluent, real};
        people_disembarking_calc(train, station): {interm-fluent, real};
        entering_empty_queue(train): {interm-fluent, bool};
        leaving_queue(train): {interm-fluent, bool};
        global_timer: {interm-fluent, real};

        // train state enum
        TRAIN_IN_ROUTE   : {non-fluent, int, default=0};
        TRAIN_IN_QUEUE   : {non-fluent, int, default=1};
        TRAIN_WAITING    : {non-fluent, int, default=2};
        TRAIN_ACTIVE     : {non-fluent, int, default=3};
        TRAIN_FINISHED   : {non-fluent, int, default=4};

        //state variables - passengers
        passengers_at_station(station): {state-fluent, real, default=0};
        passengers_on_train(train): {state-fluent, real, default=0};

        //state variables - train states
        train_num_at_queue(train): {state-fluent, real, default=-1};
        current_station(train): {state-fluent, station};
        current_state(train): {state-fluent, int, default=0};

        //actions (RENAMED from wait_action -> wait)
        wait(station): {action-fluent, int, default=0};

        //action-interm (RENAMED from wait -> delay)
        delay(train) : {interm-fluent, int};
    };

    cpfs {

        // (RENAMED: delay depends on wait)
        delay(?t) = wait(current_station(?t));

        global_timer() = min_{?t: train} [train_timer(?t)];

        current_time'() = current_time() + global_timer();

        leaving_queue(?t) =  current_state(?t) == TRAIN_ACTIVE ^ train_timer(?t) == global_timer();

        entering_empty_queue(?t) = (FIRST_TRAIN == ?t) | exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ ((leaving_queue(?t2) ^ (current_station(?t) == current_station(?t2))) | ~(current_station(?t) == current_station(?t2)))];

        people_disembarking_calc(?t,?s) = floor[DISEMBARKING_PRECENTAGE(?s) * passengers_on_train(?t)];

        people_boarding_calc(?t,?s) = abs[min[CAPACITY(?t) - passengers_on_train(?t) + people_disembarking_calc(?t,?s), passengers_at_station(?s)]];

        active_time_calc(?t) = max[MIN_STOP_TIME(?t),
                                   ceil[sum_{?s: station}[[current_station(?t) == ?s ^ (current_state(?t) == TRAIN_WAITING | current_state(?t) == TRAIN_IN_ROUTE)] *
                                           [people_disembarking_calc(?t,?s) + people_boarding_calc(?t,?s)] /
                                            PASSENGERS_DISEMBARKING_PER_MINUTE(?t)]]];

        passengers_at_station'(?s) =
                                if(DEPOT_STATION(?s))
                                    then 0
                                else if(exists_{?t: train} [current_station(?t) == ?s ^ current_state(?t) == TRAIN_IN_ROUTE ^ train_timer(?t) == global_timer()])
                                    then passengers_at_station(?s) + PASSENGER_ARRIVAL_RATE(?s) -
                                      sum_{?t: train} [(current_station(?t) == ?s ^ current_state(?t) == TRAIN_ACTIVE) * people_boarding_calc(?t,?s)]
                                else passengers_at_station(?s) + PASSENGER_ARRIVAL_RATE(?s);

        passengers_on_train'(?t) =
                                if(exists_{?s: station} [(current_station(?t) == ?s ^ current_state(?t) == TRAIN_ACTIVE) ^ train_timer(?t) == global_timer()])
                                    then passengers_on_train(?t) +
                                         sum_{?s: station}[(current_station(?t) == ?s ^ current_state(?t) == TRAIN_ACTIVE) *
                                                         [people_boarding_calc(?t,?s) - people_disembarking_calc(?t,?s)]]
                                else passengers_on_train(?t);

        current_state'(?t) =
                              if(current_state(?t) == TRAIN_FINISHED)
                                    then TRAIN_FINISHED
                              else if(exists_{?s: station} [(current_station(?t) == ?s ^ current_state(?t) == TRAIN_IN_ROUTE) ^ train_timer(?t) == global_timer() ^ DEPOT_STATION(?s)] | train_timer(?t) == INF)
                                    then TRAIN_FINISHED
                              else if(current_state(?t) == TRAIN_ACTIVE ^ (train_timer(?t) == global_timer()))
                                    then TRAIN_IN_ROUTE

                              // (RENAMED wait -> delay)
                              else if(current_state(?t) == TRAIN_IN_ROUTE ^ (train_timer(?t) == global_timer()) ^ (delay(?t) > 0) ^ entering_empty_queue(?t))
                                    then TRAIN_WAITING
                              else if((current_state(?t) == TRAIN_IN_QUEUE) ^ delay(?t) > 0 ^
                                                    (exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ (current_station(?t2) == current_station(?t)) ^ leaving_queue(?t2)]))
                                    then TRAIN_WAITING
                              else if(current_state(?t) == TRAIN_IN_ROUTE ^ (train_timer(?t) == global_timer()) ^ (delay(?t) == 0) ^ entering_empty_queue(?t))
                                    then TRAIN_ACTIVE
                              else if((current_state(?t) == TRAIN_IN_QUEUE) ^ (delay(?t) == 0) ^
                                                    exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ (current_station(?t2) == current_station(?t)) ^ leaving_queue(?t2)])
                                    then TRAIN_ACTIVE
                              else if(current_state(?t) == TRAIN_WAITING ^ (train_timer(?t) == global_timer()))
                                    then TRAIN_ACTIVE
                              else if(current_state(?t) == TRAIN_IN_ROUTE ^ (train_timer(?t) == global_timer()) ^
                                     exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ (current_station(?t2) == current_station(?t)) ^ (train_num_at_queue(?t2) >= 0.0) ^ ~(train_timer(?t2) == global_timer())])
                                    then TRAIN_IN_QUEUE
                              else current_state(?t);

      current_station'(?t) = if((current_state(?t) == TRAIN_ACTIVE) ^ (train_timer(?t) == global_timer()))
                                 then FIND_NEXT_STATION(current_station(?t))
                             else current_station(?t);

        train_num_at_queue'(?t) = if((current_state(?t) == TRAIN_IN_ROUTE) ^ (train_timer(?t) == global_timer()))
                                    then[
                                         if(exists_{?s: station} [DEPOT_STATION(?s) ^ current_station(?t) == ?s])
                                            then -1.0
                                         else if(exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ ~(current_station(?t) == current_station(?t2))])
                                            then 0.0
                                         else
                                            sum_{?t2: train}[NEXT_TRAIN(?t2,?t) * (train_num_at_queue(?t2) + 1.0 -
                                            sum_{?t3: train}[(current_station(?t) == current_station(?t3)) ^ current_state(?t3) == TRAIN_ACTIVE ^ train_timer(?t3) == global_timer()])]]
                                    else if(train_num_at_queue(?t) > 0.0 ^ exists_{?t2: train} [current_station(?t) == current_station(?t2) ^
                                                                        train_num_at_queue(?t2) == 0.0 ^
                                                                        train_timer(?t2) == global_timer() ^
                                                                        current_state(?t2) == TRAIN_ACTIVE])
                                        then train_num_at_queue(?t) - 1.0
                                    else if(train_num_at_queue(?t) == 0.0 ^ current_state(?t) == TRAIN_ACTIVE ^ train_timer(?t) == global_timer())
                                        then -1.0
                                    else train_num_at_queue(?t);

        train_timer'(?t) =
                    if(current_state(?t) == TRAIN_FINISHED | exists_{?s: station} [(current_station(?t) == ?s) ^ (current_state(?t) == TRAIN_IN_ROUTE) ^ (train_timer(?t) == global_timer()) ^ DEPOT_STATION(?s)])
                      then INF
                    // (RENAMED wait -> delay)
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (current_state(?t) == TRAIN_IN_ROUTE) ^ (train_timer(?t) == global_timer()) ^ (delay(?t) == 0) ^ entering_empty_queue(?t)])
                      then active_time_calc(?t)
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (train_num_at_queue(?t) == 1) ^ (delay(?t) == 0) ^
                          exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ (current_station(?t2) == ?s) ^ leaving_queue(?t2)]])
                      then active_time_calc(?t)
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (current_state(?t) == TRAIN_WAITING)] ^ (train_timer(?t) == global_timer()))
                      then active_time_calc(?t)
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (current_state(?t) == TRAIN_IN_ROUTE) ^ (train_timer(?t) == global_timer()) ^ (delay(?t) > 0) ^ entering_empty_queue(?t)])
                      then delay(?t)
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (train_num_at_queue(?t) == 1) ^ (delay(?t) > 0) ^
                            exists_{?t2: train} [NEXT_TRAIN(?t2,?t) ^ (current_station(?t2) == ?s) ^ leaving_queue(?t2)]])
                      then round[delay(?t)]
                    else if(exists_{?s: station} [(current_station(?t) == ?s) ^ (current_state(?t) == TRAIN_ACTIVE)] ^ (train_timer(?t) == global_timer()))
                      then sum_{?s: station, ?s2: station} [(current_station(?t) == ?s) * (current_state(?t) == TRAIN_ACTIVE) * NEXT_STATION(?s,?s2) * DRIVE_TIME(?s,?s2)]
                    else max[train_timer(?t) - global_timer() , 0.0];

    };

reward = -1 * abs[sum_{?t: train} [
        (current_state(?t) == TRAIN_ACTIVE ^ train_timer(?t) == global_timer()) *
        (PLANNED_DEPARTURE_TIME(?t, current_station(?t)) - (current_time + global_timer()))
    ]];

action-preconditions {
          // (RENAMED wait_action -> wait)
          forall_{?s : station} wait(?s) >= 0;
          forall_{?s : station} wait(?s) <= 10;
      };

    state-invariants {

    };
}
"""

domain_file = open(base_path+'domain.rddl','w')
domain_file.write(TRAIN_DOMAIN)
domain_file.close()
print("Updated RDDL file written successfully!")

Updated RDDL file written successfully!


### Our instance files

#### instance generator - implementation

In [None]:
import random
import math

def generate_pyrddlgym_instance(num_trains, num_stations, horizon, interval=30, start_delay=10, train_capacity=1000, boarding_speed=75):
    if num_trains < 2 or num_stations < 2:
        raise ValueError("There must be at least 2 trains and 2 stations.")

    trains = [f"t{i}" for i in range(1, num_trains + 1)]
    stations = [f"s{i}" for i in range(1, num_stations + 1)]

    # Define random drive times
    drive_times = {
        (stations[i], stations[i + 1]): random.randint(10, 60)
        for i in range(len(stations) - 1)
    }

    # t1 starts at 'start_delay', t2 at 'start_delay + interval', etc.
    # This allows passengers to accumulate at s1 before t1 arrives.
    train_timers = [start_delay + (i * interval) for i in range(num_trains)]

    # Disembarking Percentages
    station_weights = [random.randint(1, 10) for _ in range(num_stations)]
    total_weight = sum(station_weights)
    if total_weight == 0: total_weight = 1

    disembarking_percentages = {
        stations[i]: station_weights[i] / total_weight
        for i in range(num_stations)
    }

    # Passenger Arrival Rates
    # Last station (Depot) gets 0 arrival rate. s1 gets a normal random rate.
    passenger_arrival_rates = {}
    for i, station in enumerate(stations):
        if i == len(stations) - 1: # Last station
            passenger_arrival_rates[station] = 0
        else:
            passenger_arrival_rates[station] = random.randint(2, 8)

    # --- SIMULATION START ---
    planned_departures = {}

    station_free_time = {s: 0.0 for s in stations}
    station_last_snapshot_time = {s: 0.0 for s in stations}
    station_leftover_passengers = {s: 0.0 for s in stations}

    for t_idx, train in enumerate(trains):
        current_train_load = 0.0

        # 1. Determine Natural Arrival at s1 (includes the start_delay now)
        natural_arrival_time = train_timers[t_idx]

        for s_idx, station in enumerate(stations):
            if s_idx > 0:
                prev_station = stations[s_idx - 1]
                drive = drive_times[(prev_station, station)]
                natural_arrival_time += drive

            # --- SNAPSHOT LOGIC ---
            # For t1 at s1, accumulation_interval will be 'start_delay' (e.g. 10 - 0 = 10 mins).
            accumulation_interval = natural_arrival_time - station_last_snapshot_time[station]
            if accumulation_interval < 0: accumulation_interval = 0

            new_arrivals = accumulation_interval * passenger_arrival_rates[station]
            total_waiting = new_arrivals + station_leftover_passengers[station]

            station_last_snapshot_time[station] = natural_arrival_time

            # --- BLOCKING LOGIC ---
            actual_boarding_start = max(natural_arrival_time, station_free_time[station])

            # --- CAPACITY & DWELL ---
            # Disembark
            want_to_get_off = current_train_load * disembarking_percentages[station]
            current_train_load -= want_to_get_off
            if current_train_load < 0: current_train_load = 0

            # Board
            space_available = train_capacity - current_train_load

            if total_waiting <= space_available:
                actually_boarding = total_waiting
                leftover = 0
            else:
                actually_boarding = space_available
                leftover = total_waiting - space_available

            current_train_load += actually_boarding
            station_leftover_passengers[station] = leftover

            # Dwell Time
            time_to_disembark = want_to_get_off / boarding_speed
            time_to_board = actually_boarding / boarding_speed

            raw_dwell = max(time_to_disembark, time_to_board)
            dwell_time = math.ceil(raw_dwell)

            # --- DEPARTURE ---
            departure_time = actual_boarding_start + dwell_time
            planned_departures[(train, station)] = departure_time

            station_free_time[station] = departure_time
            natural_arrival_time = departure_time

    # --- OUTPUT GENERATION ---
    output = []
    output.append("non-fluents nf_simple_train_model{\n")
    output.append("    domain = train_system;\n\n")

    output.append("    objects{\n")
    output.append(f"        train : {{{', '.join(trains)}}};\n")
    output.append(f"        station : {{{', '.join(stations)}}};\n")
    output.append("    };\n\n")

    output.append("    non-fluents{\n")

    for i in range(num_trains):
        next_train = trains[(i + 1) % num_trains]
        output.append(f"        NEXT_TRAIN({trains[i]}, {next_train}) = true;\n")

    output.append(f"\n        DEPOT_STATION({stations[-1]}) = true;\n\n")

    for i in range(len(stations) - 1):
        output.append(f"        NEXT_STATION({stations[i]}, {stations[i+1]}) = true;\n")
        output.append(f"        FIND_NEXT_STATION({stations[i]}) = {stations[i+1]};\n")

    output.append(
     f"        FIND_NEXT_STATION({stations[-1]}) = {stations[-1]};\n"
    )

    output.append(f"        FIRST_TRAIN = {trains[0]};\n")


    output.append("\n")
    for (s1, s2), time in drive_times.items():
        output.append(f"        DRIVE_TIME({s1}, {s2}) = {time};\n")

    output.append("\n")
    for station, percentage in disembarking_percentages.items():
        output.append(f"        DISEMBARKING_PRECENTAGE({station}) = {percentage:.2f};\n")

    output.append("\n")
    for station, rate in passenger_arrival_rates.items():
        output.append(f"        PASSENGER_ARRIVAL_RATE({station}) = {rate};\n")

    output.append("\n")
    for (train, station), time in planned_departures.items():
        output.append(f"        PLANNED_DEPARTURE_TIME({train}, {station}) = {int(time)};\n")

    output.append("\n    };\n")
    output.append("}\n\n")

    output.append("instance simple_train_model{\n")
    output.append("    domain = train_system;\n")
    output.append("    non-fluents = nf_simple_train_model;\n\n")

    output.append("    init-state{\n")
    for i, train in enumerate(trains):
        output.append(f"        train_timer({train}) = {train_timers[i]};\n")

    output.append("\n")
    for train in trains:
        output.append(f"        current_state({train}) = {0};\n")
        output.append(f"        current_station({train}) = {stations[0]};\n")

    output.append("    };\n\n")
    output.append(f"    max-nondef-actions = pos-inf;\n")
    output.append(f"    horizon = {horizon};\n")
    output.append("    discount = 1.0;\n\n")
    output.append("}\n")

    return "".join(output)

#### instance generator - usage

In [None]:
instance_file_path = base_path + 'instance.rddl'

horizon = 50

rddl_instance = generate_pyrddlgym_instance(
    num_trains=3,
    num_stations=4,
    horizon=horizon,
    interval=30,
    start_delay=15
)

with open(instance_file_path, 'w') as instance_file:
    instance_file.write(rddl_instance)

print(f"Successfully generated RDDL instance at: {instance_file_path}")

print("\n\n" + rddl_instance)

Successfully generated RDDL instance at: /content/instance.rddl


non-fluents nf_simple_train_model{
    domain = train_system;

    objects{
        train : {t1, t2, t3};
        station : {s1, s2, s3, s4};
    };

    non-fluents{
        NEXT_TRAIN(t1, t2) = true;
        NEXT_TRAIN(t2, t3) = true;
        NEXT_TRAIN(t3, t1) = true;

        DEPOT_STATION(s4) = true;

        NEXT_STATION(s1, s2) = true;
        FIND_NEXT_STATION(s1) = s2;
        NEXT_STATION(s2, s3) = true;
        FIND_NEXT_STATION(s2) = s3;
        NEXT_STATION(s3, s4) = true;
        FIND_NEXT_STATION(s3) = s4;
        FIND_NEXT_STATION(s4) = s4;
        FIRST_TRAIN = t1;

        DRIVE_TIME(s1, s2) = 45;
        DRIVE_TIME(s2, s3) = 53;
        DRIVE_TIME(s3, s4) = 36;

        DISEMBARKING_PRECENTAGE(s1) = 0.12;
        DISEMBARKING_PRECENTAGE(s2) = 0.35;
        DISEMBARKING_PRECENTAGE(s3) = 0.24;
        DISEMBARKING_PRECENTAGE(s4) = 0.29;

        PASSENGER_ARRIVAL_RATE(s1) = 8;
        PASSENGER_ARRIVAL_R

## RDDL usage NEW - Our custom Visualizer

###Viz implementation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, Circle
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from PIL import Image

from pyRDDLGym.core.compiler.model import RDDLPlanningModel

class TrainRouteVisualizer:
    def __init__(self, model):
        self._model = model

        # Reduced figsize and set DPI to avoid exceeding maximum image size
        self.fig, self.ax = plt.subplots(figsize=(5, 5), dpi=80)  # Adjusted figsize and DPI
        self.route_radius = 0.4  # Set the route radius
        self.train_radius = self.route_radius - 0.1  # Slightly inside the route
        self.station_size = 0.05  # Adjust station square size as needed
        self.train_size = 0.01  # Adjust train size
        self.x_center = 0.5  # Center of the route
        self.y_center = 0.5  # Center of the route
        self._nonfluents = model.ground_vars_with_values(model.non_fluents)  # Load all non-fluents

        self.train_patches = {}  # Store train circles by train ID

        # Create the route circle
        self.create_route_circle()

        #store drive time to stations
        self.drive_time_to_dest = {}
        for k, v in self._nonfluents.items():
          var, objects = RDDLPlanningModel.parse_grounded(k)
          if var == 'DRIVE_TIME':
            _, dest = objects  # Only care about the destination
            if v > 0:
              self.drive_time_to_dest[dest] = v  # Store time to destination

        #store depot station
        for k, v in self._nonfluents.items():
          var, objects = RDDLPlanningModel.parse_grounded(k)
          if var == 'DEPOT_STATION':
             if v:
               self.depot_station = objects[0]  # Store the depot station






    def get_drive_time_to_dest(self, dest):
      return self.drive_time_to_dest.get(dest, -1)  # Default to -1 if not found

    def create_route_circle(self):
        """Draw the circular route."""
        route_circle = plt.Circle((self.x_center, self.y_center), self.route_radius, color='blue', fill=False, linestyle='dashed')
        self.ax.add_patch(route_circle)


    #set the station color such that it shows the crowding level
    def set_station_color(self, station, state):
      capacity = 1000
      passengers_at_station = state.get(f"passengers_at_station___{station}", 0)
      crowding_level = passengers_at_station / capacity

      cmap = plt.get_cmap('RdYlGn_r')
      color = cmap(crowding_level)
      return color

    #set the "s1/s2/s3.." text, so it will be seen regardless to station color
    def set_text_color(self, station_color):
      r, g, b, *_ = map(float, station_color)
      luminance = 0.299 * r + 0.587 * g + 0.114 * b  # Calculate luminance

      # Return black for bright colors, white for dark colors
      return 'black' if luminance > 0.5 else 'white'



    def draw_stations(self, state):

        """Draw stations equally spaced along the route."""
        stations = self._model.type_to_objects['station']
        num_stations = len(stations)
        delta_theta = 2 * np.pi / num_stations  # Angle between stations


        for i, station in enumerate(stations):

            theta = i * delta_theta  # Compute station angle
            x = self.x_center + self.route_radius * np.cos(theta)
            y = self.y_center + self.route_radius * np.sin(theta)

            if station == self.depot_station:
              station_color = 'black'
              text_color = 'white'
              self.ax.text(x , y -0.1 , "Depot", fontsize=10, ha='center', va='bottom', color='red')

            else:
              station_color = self.set_station_color(station, state)
              text_color = self.set_text_color(station_color)

            # Create and draw station (square)
            square = Rectangle((x - self.station_size / 2, y - self.station_size / 2),
                               self.station_size, self.station_size, color = station_color)
            self.ax.add_patch(square)

            # Add station number inside the station
            self.ax.text(x , y - self.station_size / 2, f"{station}", fontsize=10, ha='center', va='bottom', color = text_color)


    def draw_train(self, last_train_state, train_state, train, station, station_theta, delta_theta, train_timer, num_in_queue):
        """Draw the train on the route based on its state."""

        #delete last draw of train
        if self.train_patches.get(train) is not None:
          old_patch, old_text = self.train_patches[train]
          old_patch.remove()  # Removes the train's old circle from the figure
          old_text.remove()  # Remove previous train label
          del self.train_patches[train]

        #delete trains at the end
        if train_state == 'train_finished':
          return

        #if train in now in station, draw it in station
        if train_state in ['train_waiting_at_station', 'train_active_at_station']:
          x = self.x_center + self.route_radius * np.cos(station_theta)
          y = self.y_center +  self.route_radius * np.sin(station_theta)
          train_circle = Circle((x, y), self.train_size)
          x_text = x + 0.07*np.cos(station_theta)
          y_text = y + 0.07*np.sin(station_theta)
          train_text = self.ax.text(x_text, y_text, f"{train}", fontsize=10, ha='left', va='center', color='black')
          self.ax.add_patch(train_circle)
          self.train_patches[train] = (train_circle, train_text)


        if train_state == 'train_num_at_queue':

          x = self.x_center + (self.route_radius - 0.05*num_in_queue) * np.cos(station_theta)
          y = self.y_center +  (self.route_radius - 0.05*num_in_queue) * np.sin(station_theta)

          train_circle = Circle((x, y), self.train_size)
          x_text = x + 0.065*np.cos(station_theta + np.pi/2)
          y_text = y + 0.065*np.sin(station_theta + np.pi/2)
          train_text = self.ax.text(x_text, y_text, f"{train}", fontsize=10, ha='left', va='center', color='black')
          self.ax.add_patch(train_circle)
          self.train_patches[train] = (train_circle, train_text)



        if train_state == 'train_in_route':

          #if the train is in route to the first station, and the time is longer than the

          total_drive_time = self.get_drive_time_to_dest(station)
          progress_theta = delta_theta / total_drive_time;

          if(train_timer <= total_drive_time):

            x = self.x_center + self.route_radius * np.cos(station_theta - progress_theta* (train_timer))
            y = self.y_center +  self.route_radius * np.sin(station_theta - progress_theta*(train_timer))

            train_circle = Circle((x, y), self.train_size)
            train_text = self.ax.text(x + 0.05, y, f"{train}", fontsize=10, ha='left', va='center', color='black')
            self.ax.add_patch(train_circle)
            self.train_patches[train] = (train_circle, train_text)



    def render(self, state):
        """Render the train route visualization."""
        trains = self._model.type_to_objects['train']
        stations = self._model.type_to_objects['station']
        num_stations = len(stations)
        delta_theta = 2 * np.pi / num_stations  # Angle between stations

        self.draw_stations(state)


        last_train_state = {train: None for train in trains}
        # Iterate through trains and stations
        for train in trains:

          # Determine the state of the train for this station
          train_state = None
          if state.get(f"current_state___{train}") == 0:
              train_state = 'train_in_route'
          elif state.get(f"train_num_at_queue___{train}") > 0:
              train_state = 'train_num_at_queue'
          elif state.get(f"current_state___{train}") == 2:
              train_state = 'train_waiting_at_station'
          elif state.get(f"current_state___{train}") == 3:
              train_state = 'train_active_at_station'
          elif state.get(f"current_state___{train}") == 4:
              train_state = 'train_finished'

          station = state.get(f"current_station___{train}")

          if train_state:


              # Compute station angle on the circle
              station_index = stations.index(station)
              station_theta = station_index * delta_theta

              #train timer
              train_timer = state.get(f"train_timer___{train}", 0)

              #train num at queue
              num_in_queue = state.get(f"train_num_at_queue___{train}", 0)


              # Draw train
              self.draw_train(last_train_state, train_state, train, station, station_theta, delta_theta, train_timer, num_in_queue)
              last_train_state[train] = train_state

        # Finalize and return image
        self.fig.canvas.draw()
        img = self.convert2img(self.fig.canvas)
        return img

    def convert2img(self, canvas):
        """Convert Matplotlib figure to a PIL Image."""
        canvas.draw()
        buf, (width, height) = canvas.print_to_buffer()
        img = np.frombuffer(buf, dtype=np.uint8).reshape((height, width, 4))  # RGBA format
        return Image.fromarray(img)


###our Environment Wrapper

In [None]:
import gym

class RDDLDecisionWrapper(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.trains = env.model.type_to_objects['train']
        self.stations = env.model.type_to_objects['station']

        self.TRAIN_IN_ROUTE = 0
        self.TRAIN_IN_QUEUE = 1
        self.TRAIN_WAITING = 2
        self.TRAIN_ACTIVE = 3

        self.full_logs = []
        self.last_obs = None

    def reset(self, **kwargs):
        self.full_logs = []
        obs, info = self.env.reset(**kwargs)
        self.last_obs = obs.copy()

        self.full_logs.append({
            'step_type': 'RESET',
            'state': obs.copy()
        })

        if not self.get_active_stations(obs):
            obs, _, terminated, truncated, info = self._skip_forward(0, False, False, info)

        return obs, info

    def step(self, action):
        pre_step_obs = self.last_obs.copy()

        # 1. FILTER ACTION
        active_stations = self.get_active_stations(pre_step_obs)

        env_action = {}
        log_action = {}

        for s in self.stations:
            key = f'wait___{s}'

            if s in active_stations:
                val = action.get(key, 0)
                env_action[key] = val
                log_action[key] = val
            else:
                env_action[key] = 0
                log_action[key] = None

        # 2. Apply ENV Action
        obs, reward, terminated, truncated, info = self.env.step(env_action)
        self.last_obs = obs.copy()

        # Inject LOG Action
        info['filtered_action'] = log_action

        self.full_logs.append({
            'step_type': 'AGENT_ACTION',
            'state': pre_step_obs,
            'action': log_action,
            'original_action': action,
            'next_state': obs.copy(),
            'reward': reward
        })

        # 3. Skipping Loop
        if not terminated and not truncated:
            obs, extra_reward, terminated, truncated, info = self._skip_forward(reward, terminated, truncated, info)
            reward = extra_reward

        info['filtered_action'] = log_action

        return obs, reward, terminated, truncated, info

    def _skip_forward(self, current_reward, terminated, truncated, info):
        """Helper to run the skipping loop."""
        done = terminated or truncated
        total_reward = current_reward
        obs = self.last_obs

        while not done and not self.get_active_stations(obs):

            pre_internal_obs = self.last_obs.copy()
            default_action = {f'wait___{s}': 0 for s in self.stations}

            obs, reward, terminated, truncated, info = self.env.step(default_action)
            self.last_obs = obs.copy()

            self.full_logs.append({
                'step_type': 'INTERNAL_SKIP',
                'state': pre_internal_obs,
                'action': default_action,
                'next_state': obs.copy(),
                'reward': reward
            })

            done = terminated or truncated
            total_reward += reward

        return obs, total_reward, terminated, truncated, info

    def get_active_stations(self, obs):
        active_set = set()
        current_timers = {t: obs[f'train_timer___{t}'] for t in self.trains}
        valid_timers = [t for t in current_timers.values() if t < 1e9]

        if not valid_timers: return set()
        global_timer = min(valid_timers)

        station_occupant = {s: None for s in self.stations}
        station_has_queue = {s: False for s in self.stations}

        for t in self.trains:
            s = obs[f'current_station___{t}']
            state = obs[f'current_state___{t}']
            if state == self.TRAIN_IN_QUEUE:
                station_has_queue[s] = True
            elif state in [self.TRAIN_WAITING, self.TRAIN_ACTIVE]:
                station_occupant[s] = t

        for t in self.trains:
            timer = current_timers[t]
            state = obs[f'current_state___{t}']
            station = obs[f'current_station___{t}']

            if timer == global_timer:
                if state == self.TRAIN_IN_ROUTE:
                    occupant = station_occupant[station]
                    if occupant is None:
                        active_set.add(station)
                    else:
                        occupant_state = obs[f'current_state___{occupant}']
                        occupant_timer = current_timers[occupant]
                        if (occupant_state == self.TRAIN_ACTIVE and
                            occupant_timer == global_timer and
                            not station_has_queue[station]):
                            active_set.add(station)

                if state == self.TRAIN_ACTIVE and station_has_queue[station]:
                    active_set.add(station)

        return active_set

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


##PPO implementation and usage

###PPO adapter wrapper

In [None]:
import gym
from gym import spaces
import numpy as np

class PPOAdapter(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        sample_obs, _ = self.env.reset()
        self.flat_size = self._dict_to_vec(sample_obs).shape[0]
        print(f"PPO Adapter Initialized. Flattened State Size: {self.flat_size}")

        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.flat_size,), dtype=np.float32)
        self.action_space = spaces.Discrete(11)

    def reset(self, **kwargs):
        obs, info = self.env.reset(**kwargs)
        return self._dict_to_vec(obs), info

    def step(self, action_int):
        action_dict = {}
        for s in self.env.stations:
            action_dict[f'wait___{s}'] = int(action_int)
        obs, reward, terminated, truncated, info = self.env.step(action_dict)
        return self._dict_to_vec(obs), reward, terminated, truncated, info

    def _dict_to_vec(self, obs_dict):
        values = []
        for key in sorted(obs_dict.keys()):
            val = obs_dict[key]
            if isinstance(val, str) or np.issubdtype(type(val), np.str_):
                try:
                    num = float(''.join(filter(str.isdigit, str(val))))
                    values.append(num)
                except ValueError: values.append(0.0)
            elif np.isscalar(val) or (isinstance(val, np.ndarray) and val.ndim == 0):
                try: values.append(float(val))
                except: values.append(0.0)
            elif isinstance(val, np.ndarray):
                values.extend(val.flatten().astype(float))
            else: values.append(0.0)
        return np.array(values, dtype=np.float32)

###PPO algorithm

In [None]:
import torch
import torch.nn as nn
from torch.distributions import Categorical
import gym
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Memory:
    def __init__(self):
        self.actions, self.states, self.logprobs, self.rewards, self.is_terminals = [], [], [], [], []
    def clear(self):
        del self.actions[:], self.states[:], self.logprobs[:], self.rewards[:], self.is_terminals[:]

class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, n_latent_var):
        super(ActorCritic, self).__init__()
        self.action_layer = nn.Sequential(
            nn.Linear(state_dim, n_latent_var), nn.Tanh(),
            nn.Linear(n_latent_var, n_latent_var), nn.Tanh(),
            nn.Linear(n_latent_var, action_dim), nn.Softmax(dim=-1)
        )
        self.value_layer = nn.Sequential(
            nn.Linear(state_dim, n_latent_var), nn.Tanh(),
            nn.Linear(n_latent_var, n_latent_var), nn.Tanh(),
            nn.Linear(n_latent_var, 1)
        )
    def act(self, state, memory):
        state = torch.from_numpy(state).float().to(device)
        action_probs = self.action_layer(state)
        dist = Categorical(action_probs)
        action = dist.sample()
        memory.states.append(state)
        memory.actions.append(action)
        memory.logprobs.append(dist.log_prob(action))
        return action.item()
    def evaluate(self, state, action):
        action_probs = self.action_layer(state)
        dist = Categorical(action_probs)
        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_value = self.value_layer(state)
        return action_logprobs, torch.squeeze(state_value), dist_entropy

class PPO:
    def __init__(self, state_dim, action_dim, lr, betas, gamma, K_epochs, eps_clip):
        self.lr, self.betas, self.gamma, self.eps_clip, self.K_epochs = lr, betas, gamma, eps_clip, K_epochs
        self.policy = ActorCritic(state_dim, action_dim, 64).to(device)
        self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr, betas=betas)
        self.policy_old = ActorCritic(state_dim, action_dim, 64).to(device)
        self.policy_old.load_state_dict(self.policy.state_dict())
        self.MseLoss = nn.MSELoss()

    def select_action(self, state, memory):
        return self.policy_old.act(state, memory)

    def update(self, memory):
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(memory.rewards), reversed(memory.is_terminals)):
            if is_terminal: discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-5)
        old_states = torch.stack(memory.states).to(device).detach()
        old_actions = torch.stack(memory.actions).to(device).detach()
        old_logprobs = torch.stack(memory.logprobs).to(device).detach()

        for _ in range(self.K_epochs):
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)
            ratios = torch.exp(logprobs - old_logprobs.detach())
            advantages = rewards - state_values.detach()
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()
        self.policy_old.load_state_dict(self.policy.state_dict())

    # --- UPDATED SAVE FUNCTION ---
    def save_checkpoint(self, path, episode_num):
        torch.save({
            'model_state': self.policy_old.state_dict(),
            'optimizer_state': self.optimizer.state_dict(),
            'episode': episode_num
        }, path)

    # --- UPDATED LOAD FUNCTION ---
    def load_checkpoint(self, path):
        print(f"Loading checkpoint from {path}...")
        checkpoint = torch.load(path, map_location=device)

        # Load weights
        self.policy.load_state_dict(checkpoint['model_state'])
        self.policy_old.load_state_dict(checkpoint['model_state'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state'])

        # Return the saved episode number
        return checkpoint.get('episode', 1)

  return datetime.utcnow().replace(tzinfo=utc)


###ppo interaction loop

In [None]:
from google.colab import drive
import os

# 1. Mount Drive
drive.mount('/content/drive')

# 2. Define Save Path
checkpoint_dir = '/content/drive/My Drive/Train_Scheduler_PPO'
os.makedirs(checkpoint_dir, exist_ok=True)

print(f"Checkpoints will be saved to: {checkpoint_dir}")

Mounted at /content/drive
Checkpoints will be saved to: /content/drive/My Drive/Train_Scheduler_PPO


In [None]:
from pyRDDLGym.core.visualizer.movie import MovieGenerator
import os

# --- CONTROL PANEL ---
FORCE_NEW_RUN = False  # Set to True to ignore saved files and start fresh
# ---------------------

# 1. Setup Environment
myEnv = pyRDDLGym.make(domain=base_path+'domain.rddl', instance=base_path+'instance.rddl')
ENV = 'Train_model'
MovieGen = MovieGenerator(base_path, ENV, horizon)
myEnv.set_visualizer(TrainRouteVisualizer, movie_gen=MovieGen, movie_per_episode=True)
myEnv = RDDLDecisionWrapper(myEnv)
myEnv = PPOAdapter(myEnv)

# 2. Hyperparameters
state_dim = myEnv.observation_space.shape[0]
action_dim = 11
lr = 0.002
betas = (0.9, 0.999)
gamma = 0.99
K_epochs = 4
eps_clip = 0.2
update_timestep = 2000

memory = Memory()
ppo = PPO(state_dim, action_dim, lr, betas, gamma, K_epochs, eps_clip)

# 3. CHECK FOR SAVED FILES & RESUME
latest_model_path = os.path.join(checkpoint_dir, 'latest_model.pth')
best_model_path = os.path.join(checkpoint_dir, 'best_model.pth')

start_episode = 1

if FORCE_NEW_RUN:
    print("!!! FORCE_NEW_RUN is True. Starting fresh (ignoring any saved checkpoints).")
elif os.path.exists(latest_model_path):
    # Load weights AND get the episode number
    loaded_ep = ppo.load_checkpoint(latest_model_path)
    start_episode = loaded_ep + 1
    print(f"RESUMING TRAINING FROM EPISODE {start_episode}")
else:
    print("No checkpoint found. Starting fresh from Episode 1.")

# 4. Training Loop
running_reward = 0
time_step = 0
log_interval = 20
save_interval = 50
max_episodes = 5000
best_reward = -float('inf')

print(f"STARTING PPO TRAINING ON {device}...")

# --- UPDATED LOOP RANGE ---
for i_episode in range(start_episode, max_episodes+1):
    state, _ = myEnv.reset()
    current_ep_reward = 0

    for t in range(myEnv.env.horizon):
        time_step += 1

        action = ppo.select_action(state, memory)
        state, reward, done, truncated, info = myEnv.step(action)

        memory.rewards.append(reward)
        memory.is_terminals.append(done)
        current_ep_reward += reward

        if time_step % update_timestep == 0:
            print(f"   [UPDATE] Learning Step (Total Timesteps: {time_step})...")
            ppo.update(memory)
            memory.clear()
            time_step = 0

        if done or truncated: break

    running_reward += current_ep_reward

    # 5. LOGGING AND SAVING
    if current_ep_reward > best_reward:
        best_reward = current_ep_reward
        print(f"   [NEW RECORD] Best Reward: {best_reward:.2f}. Saving best model...")
        # Save best model with current episode
        ppo.save_checkpoint(best_model_path, i_episode)

    # Periodic Save (Standard Checkpoint)
    if i_episode % save_interval == 0:
        print(f"   [CHECKPOINT] Saving latest model to Drive (Episode {i_episode})...")
        ppo.save_checkpoint(latest_model_path, i_episode)

    if i_episode % log_interval == 0:
        avg_reward = running_reward / log_interval
        print(f'Episode {i_episode} \t Avg Reward: {avg_reward:.2f} \t Best: {best_reward:.2f}')
        running_reward = 0

print("TRAINING COMPLETE!")
myEnv.close()

Generating LALR tables
  deprecation(


PPO Adapter Initialized. Flattened State Size: 20
Loading checkpoint from /content/drive/My Drive/Train_Scheduler_PPO/latest_model.pth...


  return datetime.utcnow().replace(tzinfo=utc)


RESUMING TRAINING FROM EPISODE 2151
STARTING PPO TRAINING ON cpu...




   [NEW RECORD] Best Reward: -23.00. Saving best model...
   [NEW RECORD] Best Reward: -19.00. Saving best model...
   [NEW RECORD] Best Reward: -14.00. Saving best model...
   [NEW RECORD] Best Reward: -11.00. Saving best model...
   [NEW RECORD] Best Reward: -8.00. Saving best model...
Episode 2160 	 Avg Reward: -10.20 	 Best: -8.00
Episode 2180 	 Avg Reward: -27.40 	 Best: -8.00
   [CHECKPOINT] Saving latest model to Drive (Episode 2200)...
Episode 2200 	 Avg Reward: -24.50 	 Best: -8.00
Episode 2220 	 Avg Reward: -26.30 	 Best: -8.00
Episode 2240 	 Avg Reward: -25.40 	 Best: -8.00
   [CHECKPOINT] Saving latest model to Drive (Episode 2250)...
Episode 2260 	 Avg Reward: -24.90 	 Best: -8.00
Episode 2280 	 Avg Reward: -23.90 	 Best: -8.00
   [CHECKPOINT] Saving latest model to Drive (Episode 2300)...
Episode 2300 	 Avg Reward: -27.90 	 Best: -8.00
   [UPDATE] Learning Step (Total Timesteps: 2000)...
Episode 2320 	 Avg Reward: -23.25 	 Best: -8.00
Episode 2340 	 Avg Reward: -20.70 	 B

##Debug environment (our old interaction loop)

### Interaction loop - Our viz

In [None]:
# import sys
# import time
# import numpy as np
# from pyRDDLGym.core.visualizer.movie import MovieGenerator
# from pyRDDLGym.core.policy import RandomAgent

# # 1. Setup Environment
# # IMPORTANT: This must be called AFTER Block 1 has written the new domain.rddl file
# myEnv = pyRDDLGym.make(domain=base_path+'domain.rddl', instance=base_path+'instance.rddl')
# ENV = 'Train_model'
# MovieGen = MovieGenerator(base_path, ENV, horizon)
# myEnv.set_visualizer(TrainRouteVisualizer, movie_gen=MovieGen, movie_per_episode=True)
# myEnv = RDDLDecisionWrapper(myEnv)

# # 2. Agent
# agent = RandomAgent(action_space=myEnv.action_space,
#                         num_actions=myEnv.max_allowed_actions,
#                         seed=43)

# gif_name =  ENV+'_vizexample'
# total_reward = 0
# state, _ = myEnv.reset()

# agent_view_log = ""
# bar_length = 50
# total_steps = myEnv.horizon
# step = 0

# # 3. Run Loop
# while True:
#     myEnv.render(to_display=False)

#     action = agent.sample_action()

#     prev_state = state.copy()

#     next_state, reward, terminated, truncated, info = myEnv.step(action)

#     real_action = info.get('filtered_action', action)

#     total_reward += reward

#     agent_view_log += f'\nstep       = {step}\n'
#     agent_view_log += f'state      = {prev_state}\n'
#     agent_view_log += f'action     = {real_action}\n'
#     agent_view_log += f'next state = {next_state}\n'
#     agent_view_log += f'reward     = {reward}\n'

#     state = next_state
#     step += 1

#     percent = 100 * (step / float(total_steps))
#     filled_length = int(bar_length * step // total_steps)
#     bar = '█' * filled_length + '░' * (bar_length - filled_length)
#     sys.stdout.write(f'\r|{bar}| {percent:.1f}%')
#     sys.stdout.flush()

#     if terminated or truncated:
#         break

# agent_view_log += f'\nEpisode ended with Total Reward: {total_reward}\n'
# sys.stdout.write(f'\r|{"█" * bar_length}| 100.0%\n')

# myEnv.close()

# # --- SAVE LOGS ---
# with open('output.txt', 'w') as f:
#     f.write(agent_view_log)

# full_log_str = "FULL DEBUG LOG (Includes Skipped Steps)\n=======================================\n"
# for i, entry in enumerate(myEnv.full_logs):
#     full_log_str += f"\n[Index {i}] Type: {entry['step_type']}\n"
#     if 'state' in entry: full_log_str += f"state      = {entry['state']}\n"
#     if 'action' in entry: full_log_str += f"action     = {entry['action']}\n"
#     if 'next_state' in entry: full_log_str += f"next state = {entry['next_state']}\n"
#     if 'reward' in entry: full_log_str += f"reward     = {entry['reward']}\n"

# with open('full_debug_log.txt', 'w') as f:
#     f.write(full_log_str)

# print("\nLogs generated: 'output.txt' and 'full_debug_log.txt'")

# from moviepy.editor import VideoFileClip
# MovieGen.save_animation(gif_name)
# clip = VideoFileClip(base_path + gif_name + '.gif')
# clip.ipython_display()

###Agent view log

In [None]:
# with open('output.txt', 'r') as f: # Open the file in read mode
#   contents = f.read() # Read the entire contents of the file
#   print(contents)

###Full debug log

In [None]:
# with open('full_debug_log.txt', 'r') as f: # Open the file in read mode
#   contents = f.read() # Read the entire contents of the file
#   print(contents)