# Demonstrating `mobile-env:smart-city`

`mobile-env` is a simple and open environment for training, testing, and evaluating a decentralized metaverse environment.

* `mobile-env:smart-city` is written in pure Python
* It allows simulating various scenarios with moving users in a cellular network with a single base station and multiple stationary sensors
* `mobile-env:smart-city` implements the standard [Gymnasium](https://gymnasium.farama.org/) (previously [OpenAI Gym](https://gym.openai.com/)) interface such that it can be used with all common frameworks for reinforcement learning
* `mobile-env:smart-city` is not restricted to reinforcement learning approaches but can also be used with conventional control approaches or dummy benchmark algorithms
* It can be configured easily (e.g., adjusting number and movement of users, properties of cells, etc.)
* It is also easy to extend `mobile-env:smart-city`, e.g., implementing different observations, actions, or reward

As such `mobile-env:smart-city` is a simple platform to test RL algorithms in a decentralized metaverse environment.


**Demonstration Steps:**

This demonstration consists of the following steps:

1. Installation and usage of `mobile-env` with dummy actions
2. Configuration of `mobile-env` and adjustment of the observation space (optional)
3. Training a single-agent reinforcement learning approach with [`stable-baselines3`](https://github.com/DLR-RM/stable-baselines3)

In [1]:
# First, install stable baselines; only SB3 v2.0.0+ supports Gymnasium
%pip install stable-baselines3==2.0.0 tensorboard

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Importing necessary libraries
import gymnasium
import mobile_env
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_checker import check_env

# predefined small scenarios
from mobile_env.scenarios.smart_city import MComSmartCity

# easy access to the default configuration
MComSmartCity.default_config()

2024-11-16 23:21:47.209066: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731795707.228413    6931 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731795707.233650    6931 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-16 23:21:47.255860: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{'width': 200,
 'height': 200,
 'EP_MAX_TIME': 100,
 'seed': 666,
 'reset_rng_episode': False,
 'arrival': mobile_env.core.arrival.NoDeparture,
 'channel': mobile_env.core.channels.OkumuraHata,
 'scheduler': mobile_env.core.schedules.RoundRobin,
 'movement': mobile_env.core.movement.RandomWaypointMovement,
 'utility': mobile_env.core.utilities.BoundedLogUtility,
 'handler': mobile_env.handlers.smart_city_handler.MComSmartCityHandler,
 'bs': {'bw': 100000000.0,
  'freq': 2500,
  'tx': 40,
  'height': 50,
  'computational_power': 100},
 'ue': {'velocity': 1.5, 'snr_tr': 2e-08, 'noise': 1e-09, 'height': 1.5},
 'sensor': {'height': 1.5,
  'snr_tr': 2e-08,
  'noise': 1e-09,
  'velocity': 0,
  'radius': 500,
  'logs': {}},
 'ue_job': {'job_generation_probability': 0.7,
  'communication_job_lambda_value': 2.875,
  'computation_job_lambda_value': 10.0},
 'sensor_job': {'communication_job_lambda_value': 1.125,
  'computation_job_lambda_value': 5.0},
 'e2e_delay_threshold': 5,
 'reward_calculati

In [3]:
from gymnasium.envs.registration import register

# Register the new environment
register(
    id='mobile-smart_city-smart_city_handler-rl-v0',
    entry_point='mobile_env.scenarios.smart_city:MComSmartCity',  # Adjust this if the entry point is different
    kwargs={'config': {}, 'render_mode': None}
)

In [4]:
import gymnasium as gym

# List all registered environments
env_specs = gym.envs.registry.keys()
print(env_specs)

# Verify your specific environment is listed
assert 'mobile-smart_city-smart_city_handler-rl-v0' in env_specs, "Environment not registered correctly"
print("Environment 'mobile-smart_city-smart_city_handler-rl-v0' registered successfully!")

dict_keys(['CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v1', 'Acrobot-v1', 'CartPoleJax-v0', 'CartPoleJax-v1', 'PendulumJax-v0', 'LunarLander-v2', 'LunarLanderContinuous-v2', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3', 'CarRacing-v2', 'Blackjack-v1', 'FrozenLake-v1', 'FrozenLake8x8-v1', 'CliffWalking-v0', 'Taxi-v3', 'Jax-Blackjack-v0', 'Reacher-v2', 'Reacher-v4', 'Pusher-v2', 'Pusher-v4', 'InvertedPendulum-v2', 'InvertedPendulum-v4', 'InvertedDoublePendulum-v2', 'InvertedDoublePendulum-v4', 'HalfCheetah-v2', 'HalfCheetah-v3', 'HalfCheetah-v4', 'Hopper-v2', 'Hopper-v3', 'Hopper-v4', 'Swimmer-v2', 'Swimmer-v3', 'Swimmer-v4', 'Walker2d-v2', 'Walker2d-v3', 'Walker2d-v4', 'Ant-v2', 'Ant-v3', 'Ant-v4', 'Humanoid-v2', 'Humanoid-v3', 'Humanoid-v4', 'HumanoidStandup-v2', 'HumanoidStandup-v4', 'GymV21Environment-v0', 'GymV26Environment-v0', 'mobile-smart_city-smart_city_handler-v0', 'mobile-smart_city-smart_city_handler-rl-v0'])
Environment 'mobile-sm

In [5]:
# create a small mobile environment for a single, centralized control agent
# pass rgb_array as render mode so the env can be rendered inside the notebook
env = gymnasium.make("mobile-smart_city-smart_city_handler-rl-v0", render_mode="rgb_array")

print(f"\nSmart city environment for RL with {env.NUM_USERS} users, {env.NUM_SENSORS} sensors and {env.NUM_STATIONS} cells.")


Smart city environment for RL with 10 users, 20 sensors and 1 cells.


In [8]:
# Step 4: Train a Single-Agent Reinforcement Learning

import gymnasium
from gymnasium.wrappers import TimeLimit
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, BaseCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure

# Custom TensorBoard Callback
class TensorboardCallback(BaseCallback):
    def __init__(self, verbose=1):
        super(TensorboardCallback, self).__init__(verbose)
    
    def _on_step(self) -> bool:
        # Log custom metrics to TensorBoard
        reward = self.locals['rewards'][-1] if 'rewards' in self.locals else 0
        self.logger.record('custom/reward', reward)
        return True

# Wrapping the environment with a TimeLimit wrapper to enforce 200 timesteps per episode
def wrap_environment(env_name, max_episode_steps=200):
    raw_env = gymnasium.make(env_name)
    return TimeLimit(raw_env, max_episode_steps=max_episode_steps)

# Train RL Model
def train_rl_model(env_name, eval_env_name):
    """Train a PPO RL model with callbacks and logging."""
    # Wrap environments with TimeLimit
    env = wrap_environment(env_name, max_episode_steps=100)
    eval_env = wrap_environment(eval_env_name, max_episode_steps=100)

    # Wrap environments with Monitor for logging
    env = Monitor(env)
    eval_env = Monitor(eval_env)

    # Logger setup
    log_dir = "results_sb"
    new_logger = configure(log_dir, ["tensorboard"])
    
    # Define model
    model = PPO("MlpPolicy", env, tensorboard_log=log_dir, verbose=1)
    model.set_logger(new_logger)
    
    # Define callbacks
    eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/best_model',
                                 log_path='./logs/results', eval_freq=500)
    checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/checkpoints/',
                                             name_prefix='ppo_smartcity')
    tensorboard_callback = TensorboardCallback()
    
    # Train model
    print("Starting training...")
    model.learn(total_timesteps=10000, callback=[eval_callback, checkpoint_callback, tensorboard_callback])
    print("Training finished!")

    # Save the trained model
    model.save("ppo_smartcity_model")
    return model


# To visualize the logs, run `tensorboard --logdir results_sb` in your terminal

In [7]:
# Train model
trained_model = train_rl_model("mobile-smart_city-smart_city_handler-rl-v0", "mobile-smart_city-smart_city_handler-rl-v0")

Using cpu device
Wrapping the env in a DummyVecEnv.
Starting training...
Eval num_timesteps=500, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=1000, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
Eval num_timesteps=1500, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
Eval num_timesteps=2000, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
Eval num_timesteps=2500, episode_reward=7.40 +/- 3.72
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=3000, episode_reward=3.60 +/- 4.41
Episode length: 100.00 +/- 0.00
Eval num_timesteps=3500, episode_reward=11.02 +/- 8.43
Episode length: 100.00 +/- 0.00
New best mean reward!
Eval num_timesteps=4000, episode_reward=9.20 +/- 5.71
Episode length: 100.00 +/- 0.00
Eval num_timesteps=4500, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
Eval num_timesteps=5000, episode_reward=0.00 +/- 0.00
Episode length: 100.00 +/- 0.00
E

In [None]:
# Step 5: Test the Trained Model

# Load the saved model
model = PPO.load("ppo_mobile_env")

In [None]:
queue_lengths = np.array(env.get_queue_lengths()).ravel()
print(f"Queue lengths shape: {queue_lengths.shape}, values: {queue_lengths}")

resource_utilization = np.array(env.get_resource_utilization()).ravel()
print(f"Resource utilization shape: {resource_utilization.shape}, values: {resource_utilization}")

print(f"Environment observation space: {env.observation_space}")
print(f"Model observation space: {model.observation_space}")

In [None]:
# Step 6: Test the model in the environment

import matplotlib.pyplot as plt
from IPython import display

done = False
obs, info = env.reset()

total_episode_reward = 0
total_reward_over_time = []  # List to store the reward at each time step

for step in range(100):
    # Extract the array part of the observation, ignoring the empty dictionary

    # Use the trained model to predict the action
    action, _states = model.predict(obs)

    # Take the action in the environment
    obs, reward, terminated, truncated, info = env.step(action)
    
    total_episode_reward += reward
    total_reward_over_time.append(total_episode_reward) 

    # Print observation and reward
    print(f"Step {step+1} | Action: {action} | Observation: {obs[0]} | Reward: {reward}")
    
    # render the environment
    plt.imshow(env.render())
    display.display(plt.gcf())
    display.clear_output(wait=True)
    


In [None]:
import matplotlib.pyplot as plt

# Plot the reward over time
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(total_reward_over_time) + 1), total_reward_over_time, marker='o')
plt.title('Total Reward Over Time')
plt.xlabel('Time Step')
plt.ylabel('Total Reward')
plt.grid(True)
plt.show()

In [None]:
# Step 7: Plot Results

# Example of plotting some metrics
import matplotlib.pyplot as plt

# Example plotting of dummy reward over episodes (assuming we have a list of rewards)
# This is just an illustrative example - you'll need to replace this with your own logic for recording rewards
rewards = [np.random.uniform(-1, 1) for _ in range(100)]  # Replace with actual data

plt.plot(rewards)
plt.title("Reward Over Episodes")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.show()