<a href="https://colab.research.google.com/github/dhivyadharsin/dhivyadharsin/blob/main/mec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Save utils.py inside Google Colab session
%%writefile utils.py

import numpy as np
import pickle  # for saving/loading data

# Utility functions
def dBm(dBm): return 10**((dBm-30)/10)
def dB(dB): return 10**(dB/10)
def to_dB(x): return 10*np.log10(x)
def MHz(Mhz): return Mhz*10**6
def GHz(GHz): return GHz*10**9
def msec(msec): return msec*10**(-3)
def Mbits(Mbits): return Mbits*10**6
def mW(mW): return mW*10**(-3)

# Save and load functions
def save_data(obj, filepath):
    with open(filepath, "wb") as f:
        pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)

def load_data(filepath):
    with open(filepath, "rb") as f:
        return pickle.load(f)

print("✅ utils.py saved successfully!")


Writing utils.py


In [2]:
%%writefile system_paras.py

import numpy as np
from sklearn.preprocessing import MinMaxScaler
from utils import MHz, dBm, dB  # ✅ Import functions from utils.py

# System Parameters
num_slots = 15000
slot_len = 10e-3
total_time = int(num_slots * slot_len)
time_max = 1 + np.int64(total_time / slot_len)

num_users = 8
bw_uplink = MHz(0.2)  # ✅ No more NameError!
limit_channel_UAV = 2
limit_channel_BS = 2
bw_total_uav = bw_uplink * limit_channel_UAV
bw_total_mbs = bw_uplink * limit_channel_BS

# Noise & Path Loss
noise_density = dBm(-174)
sigma_sq_uav = bw_total_uav * noise_density
sigma_sq_mbs = bw_total_mbs * noise_density
g0 = dB(-50)
gamma = 2.7601

# Computing Parameters
cycles_per_bit = 737.5
kappa = 0.1e-27
Vlyapunov = 1e9

# Neural Network Parameters
learning_rate = 1e-3
training_interval = 20
epochs = 1
Memory = 1024
batch_size = 256

scaler = MinMaxScaler()
print("✅ system_paras.py saved successfully!")

Writing system_paras.py


In [3]:
import importlib
import utils
import system_paras

importlib.reload(utils)
importlib.reload(system_paras)

print("✅ Modules imported successfully!")



✅ utils.py saved successfully!
✅ system_paras.py saved successfully!
✅ utils.py saved successfully!
✅ system_paras.py saved successfully!
✅ Modules imported successfully!


In [4]:
# ✅ Install necessary libraries in Google Colab (if not installed)
!pip install tensorflow numpy matplotlib pandas scikit-learn -q

# ✅ Import Libraries
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import random
import time

print(f"✅ TensorFlow Version: {tf.__version__}")
print("✅ Required libraries imported.")


✅ TensorFlow Version: 2.18.0
✅ Required libraries imported.


In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import random
import matplotlib.pyplot as plt

In [6]:
class MECEnvironment:
    """Simulates the MEC environment with dynamic task arrivals, bandwidth, and resource constraints."""

    def __init__(self, num_users=8, max_queue=1000, bandwidth=10, energy_limit=500):
        self.num_users = num_users
        self.max_queue = max_queue  # Maximum queue length
        self.bandwidth = bandwidth  # Available bandwidth
        self.energy_limit = energy_limit  # Energy threshold
        self.queue_lengths = np.zeros(num_users)  # Task queues per user
        self.energy_usage = np.zeros(num_users)  # Energy consumption per user

    def step(self, actions):
        """Update environment based on DRL agent's actions (offloading decisions)."""
        actions = np.array(actions).flatten()  # Ensure actions is a NumPy array (array of size num_users)

        new_tasks = np.random.randint(1, 10, size=self.num_users)  # New task arrivals
        self.queue_lengths += new_tasks  # Add new tasks to the queue

        # Apply offloading decisions (1 = offload, 0 = local execution)
        for i in range(self.num_users):
            action = actions[i]  # Directly use actions[i] (correctly indexed now)
            if action == 1:  # Offload
                self.queue_lengths[i] -= min(self.queue_lengths[i], self.bandwidth // self.num_users)
                self.energy_usage[i] += np.random.uniform(5, 15)  # Energy cost of offloading
            else:  # Local execution
                self.queue_lengths[i] -= min(self.queue_lengths[i], 5)  # Local CPU processing
                self.energy_usage[i] += np.random.uniform(2, 10)  # Energy cost

        self.queue_lengths = np.maximum(self.queue_lengths, 0)  # Prevent negative queues

        # Compute reward function based on multiple metrics
        latency = np.mean(self.queue_lengths)
        energy_cost = np.mean(self.energy_usage)
        task_success_rate = np.sum(self.queue_lengths < self.max_queue) / self.num_users
        utilization = np.mean(self.bandwidth / (self.queue_lengths + 1))

        reward = -(0.3 * latency + 0.2 * energy_cost - 0.3 * task_success_rate + 0.2 * utilization)
        return self.queue_lengths, reward

    def reset(self):
        """Reset the environment for a new episode."""
        self.queue_lengths = np.zeros(self.num_users)  # Reset the task queue lengths
        self.energy_usage = np.zeros(self.num_users)  # Reset the energy usage
        return self.queue_lengths


In [7]:
class DRLAgent:
    """Deep Reinforcement Learning Agent for optimizing MEC resource management."""

    def __init__(self, state_size, action_size, num_users, learning_rate=0.001, gamma=0.95, epsilon=1.0, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.num_users = num_users  # Store num_users to manage actions for each user
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.epsilon_decay = epsilon_decay  # Decay rate for exploration
        self.epsilon_min = 0.01
        self.learning_rate = learning_rate
        self.memory = []
        self.batch_size = 32

        # Build the DRL model
        self.model = self.build_model()

    def build_model(self):
        """Builds a Deep Q-Network (DQN) for decision-making."""
        model = tf.keras.Sequential([
            layers.Input(shape=(self.state_size,)),  # Input layer for state
            layers.Dense(64, activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(self.action_size, activation='linear')  # Output layer for action size
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state):
        """Store experience in replay memory."""
        self.memory.append((state, action, reward, next_state))
        if len(self.memory) > 1000:  # Limit memory size
            self.memory.pop(0)

    def act(self, state):
        """Selects an action using epsilon-greedy policy."""
        if np.random.rand() <= self.epsilon:
            # Random actions for all users (0 or 1 for offload or local execution)
            return np.random.randint(0, 2, size=self.num_users)  # Correct action size matching num_users

        q_values = self.model.predict(np.array(state).reshape(1, -1), verbose=0)
        return np.argmax(q_values, axis=1)  # Returns actions for all users (num_users actions)

    def train(self):
        """Trains the DRL model using replay memory."""
        if len(self.memory) < self.batch_size:
            return  # Not enough experiences to train

        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states = zip(*batch)

        states = np.array(states)
        next_states = np.array(next_states)
        rewards = np.array(rewards).reshape(-1, 1)
        actions = np.array(actions)

        # Compute target Q-values
        target_qs = self.model.predict(states, verbose=0)
        next_qs = self.model.predict(next_states, verbose=0)

        for i in range(self.batch_size):
            action_index = actions[i]  # Action for each user
            target_q = rewards[i] + self.gamma * np.max(next_qs[i])
            target_qs[i, action_index] = target_q

        # Train the model with the updated Q-values
        self.model.fit(states, target_qs, epochs=1, verbose=0)

        # Decay exploration rate for epsilon-greedy policy
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [8]:
class PPOAgent:
    """Proximal Policy Optimization (PPO) Agent for optimizing MEC resource management."""

    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99, epsilon=0.2, epsilon_decay=0.995, lam=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  # Discount factor for future rewards
        self.epsilon = epsilon  # Clipping factor for PPO
        self.epsilon_decay = epsilon_decay  # Exploration decay
        self.epsilon_min = 0.01  # Minimum exploration factor
        self.learning_rate = learning_rate
        self.lam = lam  # Lambda for GAE (Generalized Advantage Estimation)

        # Experience buffer for storing states, actions, rewards, etc.
        self.memory = []

        # Actor-Critic model: the actor predicts the action probabilities, and the critic evaluates the action.
        self.actor = self.build_actor_model()
        self.critic = self.build_critic_model()

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)

    def build_actor_model(self):
        """Builds the actor (policy) network."""
        model = tf.keras.Sequential([
            layers.Input(shape=(self.state_size,)),
            layers.Dense(64, activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(self.action_size, activation='softmax')  # Action probabilities
        ])
        return model

    def build_critic_model(self):
        """Builds the critic (value) network."""
        model = tf.keras.Sequential([
            layers.Input(shape=(self.state_size,)),
            layers.Dense(64, activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(1, activation='linear')  # Value prediction for the given state
        ])
        return model

    def remember(self, state, action, reward, next_state, old_prob, done):
        """Store experience in replay memory."""
        self.memory.append((state, action, reward, next_state, old_prob, done))

    def act(self, state):
        """Select an action using the actor (policy) network."""
        state = np.array(state).reshape(1, -1)
        prob = self.actor.predict(state)
        action = np.random.choice(self.action_size, p=prob[0])  # Sample an action based on probabilities
        return action, prob[0][action]

    def compute_advantages(self, rewards, values, next_values, dones):
        """Compute the advantages using GAE (Generalized Advantage Estimation)."""
        advantages = np.zeros_like(rewards)
        last_advantage = 0
        for t in reversed(range(len(rewards))):
            if dones[t]:
                delta = rewards[t] - values[t]
            else:
                delta = rewards[t] + self.gamma * next_values[t] - values[t]
            advantages[t] = last_advantage = delta + self.gamma * self.lam * last_advantage
        return advantages

    def train(self):
        """Train the PPO model using the collected experiences."""
        if len(self.memory) < 32:
            return  # Not enough experiences to train

        states, actions, rewards, next_states, old_probs, dones = zip(*self.memory)
        states = np.array(states)
        actions = np.array(actions)
        rewards = np.array(rewards)
        next_states = np.array(next_states)
        old_probs = np.array(old_probs)
        dones = np.array(dones)

        values = self.critic.predict(states)  # Get current value estimates
        next_values = self.critic.predict(next_states)  # Get next state value estimates
        advantages = self.compute_advantages(rewards, values, next_values, dones)

        # Update the actor and critic models
        with tf.GradientTape(persistent=True) as tape:
            # Actor loss
            prob = self.actor(states)
            prob_actions = tf.gather(prob, actions, axis=1, batch_dims=1)
            ratio = prob_actions / old_probs
            surrogate_loss = ratio * advantages
            clipped_loss = tf.clip_by_value(ratio, 1.0 - self.epsilon, 1.0 + self.epsilon) * advantages
            actor_loss = -tf.reduce_mean(tf.minimum(surrogate_loss, clipped_loss))

            # Critic loss
            value_loss = tf.reduce_mean(tf.square(rewards - self.critic(states)))

        # Compute the gradients and apply them
        actor_grads = tape.gradient(actor_loss, self.actor.trainable_variables)
        critic_grads = tape.gradient(value_loss, self.critic.trainable_variables)

        self.optimizer.apply_gradients(zip(actor_grads, self.actor.trainable_variables))
        self.optimizer.apply_gradients(zip(critic_grads, self.critic.trainable_variables))

        # Decay exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # Clear the memory buffer after training
        self.memory.clear()


In [1]:
!pip install numpy torch  # Add this cell if using PyTorch
import numpy as np
import torch  # If using PyTorch


