In [1]:
# Reinforcement Learning Setup for Multi-Modal Transport (Gym-like Environment)

import pandas as pd
import numpy as np
import random
from itertools import product
import gym
from gym import spaces

KeyboardInterrupt: 

In [None]:
# Load dataset
df = pd.read_excel("trainingtestingset_latest.xlsx")


In [None]:
# Format environment data per trip
env_data = []
segments = ['AB', 'BC', 'CD']
modes = ['taxi', 'bus', 'bike']

for _, row in df.iterrows():
    episode = {
        'traffic': {
            'AB': row['traffic_AB'],
            'BC': row['traffic_BC'],
            'CD': row['traffic_CD']
        },
        'travel_time': {
            'AB': {
                'taxi': row['taxi_AB'],
                'bus': row['bus_AB'],
                'bike': row['bike_AB']
            },
            'BC': {
                'taxi': row['taxi_BC'],
                'bus': row['bus_BC'],
                'bike': row['bike_BC']
            },
            'CD': {
                'taxi': row['taxi_CD'],
                'bus': row['bus_CD'],
                'bike': row['bike_CD']
            }
        }
    }
    env_data.append(episode)

In [None]:
# Define custom environment class
class TransportEnv(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.data = data
        self.action_space = spaces.Discrete(3)  # taxi, bus, bike
        self.observation_space = spaces.MultiDiscrete([3, 4])  # segment index (0-2), traffic (1-3)
        self.reset()

    def reset(self):
        self.index = random.randint(0, len(self.data) - 1)
        self.segment = 0
        self.total_time = 0
        self.trip = self.data[self.index]
        return self._get_state()

    def _get_state(self):
        traffic_level = self.trip['traffic'][['AB', 'BC', 'CD'][self.segment]]
        return np.array([self.segment, traffic_level])

    def step(self, action):
        mode = modes[action]
        segment_key = segments[self.segment]
        time_cost = self.trip['travel_time'][segment_key][mode]

        reward = -time_cost
        self.total_time += time_cost

        self.segment += 1
        done = self.segment == 3
        next_state = self._get_state() if not done else np.array([0, 0])

        return next_state, reward, done, {}

    def render(self):
        print(f"Segment: {self.segment}, Total Time: {self.total_time}")

In [None]:
# Instantiate and test environment
env = TransportEnv(env_data)

# Manual agent loop (random actions)
episodes = 5
for ep in range(episodes):
    state = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = env.action_space.sample()  # random agent
        next_state, reward, done, _ = env.step(action)
        total_reward += reward
    print(f"Episode {ep+1} finished. Total Travel Time: {-total_reward:.2f} mins")
