# Demonstrating `mobile-env:smart-city`

`mobile-env` is a simple and open environment for training, testing, and evaluating a decentralized metaverse environment.

* `mobile-env:smart-city` is written in pure Python
* It allows simulating various scenarios with moving users in a cellular network with a single base station and multiple stationary sensors
* `mobile-env:smart-city` implements the standard [Gymnasium](https://gymnasium.farama.org/) (previously [OpenAI Gym](https://gym.openai.com/)) interface such that it can be used with all common frameworks for reinforcement learning
* `mobile-env:smart-city` is not restricted to reinforcement learning approaches but can also be used with conventional control approaches or dummy benchmark algorithms
* It can be configured easily (e.g., adjusting number and movement of users, properties of cells, etc.)
* It is also easy to extend `mobile-env:smart-city`, e.g., implementing different observations, actions, or reward

As such `mobile-env:smart-city` is a simple platform to test RL algorithms in a decentralized metaverse environment.


**Demonstration Steps:**

This demonstration consists of the following steps:

1. Installation and usage of `mobile-env` with dummy actions
2. Configuration of `mobile-env` and adjustment of the observation space (optional)
3. Training a single-agent reinforcement learning approach with [`stable-baselines3`](https://github.com/DLR-RM/stable-baselines3)

In [None]:
# First, install stable baselines; only SB3 v2.0.0+ supports Gymnasium
%pip install stable-baselines3==2.0.0 tensorboard

In [None]:
# Importing necessary libraries
import gymnasium
import mobile_env
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_checker import check_env

# predefined small scenarios
from mobile_env.scenarios.smart_city import MComSmartCity

# easy access to the default configuration
MComSmartCity.default_config()

In [None]:
from gymnasium.envs.registration import register

# Register the new environment
register(
    id='mobile-smart_city-smart_city_handler-rl-v0',
    entry_point='mobile_env.scenarios.smart_city:MComSmartCity',  # Adjust this if the entry point is different
    kwargs={'config': {}, 'render_mode': None}
)

In [None]:
import gymnasium as gym

# List all registered environments
env_specs = gym.envs.registry.keys()
print(env_specs)

# Verify your specific environment is listed
assert 'mobile-smart_city-smart_city_handler-rl-v0' in env_specs, "Environment not registered correctly"
print("Environment 'mobile-smart_city-smart_city_handler-rl-v0' registered successfully!")

In [None]:
# create a small mobile environment for a single, centralized control agent
# pass rgb_array as render mode so the env can be rendered inside the notebook
env = gymnasium.make("mobile-smart_city-smart_city_handler-rl-v0", render_mode="rgb_array")

print(f"\nSmart city environment for RL with {env.NUM_USERS} users, {env.NUM_SENSORS} sensors and {env.NUM_STATIONS} cells.")

In [None]:
# Step 4: Train a Single-Agent Reinforcement Learning

import os
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, BaseCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env
import gymnasium
from stable_baselines3.common.logger import configure
from torch.utils.tensorboard import SummaryWriter

# Custom callback to log additional metrics to TensorBoard
class TensorboardCallback(BaseCallback):
    def __init__(self, verbose=1):
        super(TensorboardCallback, self).__init__(verbose)
    
    def _on_step(self) -> bool:
        # Add custom logging here, for example:
        # Log the number of timesteps to TensorBoard
        self.logger.record('custom/number_of_steps', self.num_timesteps)

        # You can log any custom metric by accessing the environment, model, or episode rewards
        # For example, log reward
        reward = self.locals['rewards']
        self.logger.record('custom/reward', reward)
        
        return True

# Wrap the environment with Monitor
env = Monitor(env)

# Define the model for training (PPO algorithm)
model = PPO('MlpPolicy', env, tensorboard_log='results_sb', verbose=1)

# Create an evaluation environment for logging
eval_env = gymnasium.make("mobile-smart_city-smart_city_handler-rl-v0", render_mode="rgb_array")
eval_env = Monitor(eval_env)  # Wrap the eval environment with Monitor

# Set up TensorBoard logging configuration
# Optional: You can configure the logger more explicitly if needed
log_dir = "results_sb"
new_logger = configure(log_dir, ["tensorboard"])
model.set_logger(new_logger)

# Define callbacks
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/best_model',
                             log_path='./logs/results', eval_freq=500)
checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/', name_prefix='ppo_model')

# Create a Tensorboard callback
tensorboard_callback = TensorboardCallback()

# Train the model with custom TensorBoard logging and callbacks
model.learn(total_timesteps=30000, callback=[eval_callback, checkpoint_callback, tensorboard_callback])

# Save the trained model for future use
model.save("ppo_mobile_env")

# To visualize the logs, run `tensorboard --logdir results_sb` in your terminal

In [None]:
# Step 5: Test the Trained Model

# Load the saved model
model = PPO.load("ppo_mobile_env")

In [None]:
queue_lengths = np.array(env.get_queue_lengths()).ravel()
print(f"Queue lengths shape: {queue_lengths.shape}, values: {queue_lengths}")

resource_utilization = np.array(env.get_resource_utilization()).ravel()
print(f"Resource utilization shape: {resource_utilization.shape}, values: {resource_utilization}")

print(f"Environment observation space: {env.observation_space}")
print(f"Model observation space: {model.observation_space}")

In [None]:
# Step 6: Test the model in the environment

import matplotlib.pyplot as plt
from IPython import display

done = False
obs, info = env.reset()

total_episode_reward = 0
total_reward_over_time = []  # List to store the reward at each time step

for step in range(100):
    # Extract the array part of the observation, ignoring the empty dictionary

    # Use the trained model to predict the action
    action, _states = model.predict(obs)

    # Take the action in the environment
    obs, reward, terminated, truncated, info = env.step(action)
    
    total_episode_reward += reward
    total_reward_over_time.append(total_episode_reward) 

    # Print observation and reward
    print(f"Step {step+1} | Action: {action} | Observation: {obs[0]} | Reward: {reward}")
    
    # render the environment
    plt.imshow(env.render())
    display.display(plt.gcf())
    display.clear_output(wait=True)
    


In [None]:
import matplotlib.pyplot as plt

# Plot the reward over time
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(total_reward_over_time) + 1), total_reward_over_time, marker='o')
plt.title('Total Reward Over Time')
plt.xlabel('Time Step')
plt.ylabel('Total Reward')
plt.grid(True)
plt.show()

In [None]:
# Step 7: Plot Results

# Example of plotting some metrics
import matplotlib.pyplot as plt

# Example plotting of dummy reward over episodes (assuming we have a list of rewards)
# This is just an illustrative example - you'll need to replace this with your own logic for recording rewards
rewards = [np.random.uniform(-1, 1) for _ in range(100)]  # Replace with actual data

plt.plot(rewards)
plt.title("Reward Over Episodes")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.show()