Importing The Libraries

In [None]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, Input
import gym
from gym import spaces
import matplotlib.pyplot as plt

Defining Simulation Parameters


In [None]:
# Simulation Parameters (Defined as per research paper)
NUM_VEHICLES = 80
NUM_RSUS = 30
ROAD_LENGTH_KM = 10
VEHICLE_SPEED_RANGE = (30, 120)  # km/h
COMMUNICATION_RANGE = 200  # meters
VEHICLE_CPU_RANGE = (2, 8)  # GHz
RSU_CPU_RANGE = (8, 16)  # GHz
TASK_SIZE = 16  # Mbits
PROCESSING_OVERHEAD = 1000  # cycles/bit
BANDWIDTH = 10  # MHz
POWER_CONSUMPTION_COEFFICIENT = 1e-27  # W·s³/cycles³
GAMMA = 0.9  # Discount factor
EPSILON = 0.9  # Exploration rate
BATCH_SIZE = 32
UPDATE_TARGET_EVERY = 50
EPISODES = 256


Method to calculate Energy Consumption and Delay

In [None]:
# Function to calculate energy consumption
def calculate_energy(cpu_cycles, cpu_frequency):
    return POWER_CONSUMPTION_COEFFICIENT * cpu_cycles * (cpu_frequency ** 2)

# Function to calculate delay
def calculate_delay(task_size, bandwidth):
    return task_size / bandwidth

Method To build Model

In [None]:
def build_model(state_size, action_size):
    inputs = Input(shape=(state_size,))
    x = layers.Dense(64, activation="relu")(inputs)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(action_size, activation="linear")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")
    return model

 To define Vehicular Offloading Environment

In [None]:

# Step 1: Define Vehicular Offloading Environment
class VehicularOffloadingEnv(gym.Env):
    def __init__(self):
        super(VehicularOffloadingEnv, self).__init__()

        self.observation_space = spaces.Box(low=np.array([VEHICLE_SPEED_RANGE[0], VEHICLE_CPU_RANGE[0], RSU_CPU_RANGE[0], 0]),
                                            high=np.array([VEHICLE_SPEED_RANGE[1], VEHICLE_CPU_RANGE[1], RSU_CPU_RANGE[1], COMMUNICATION_RANGE]), dtype=np.float32)
        self.action_space = spaces.Discrete(3)
        self.reset()

    def step(self, action):
        delay = calculate_delay(TASK_SIZE, BANDWIDTH)
        energy = calculate_energy(PROCESSING_OVERHEAD * TASK_SIZE, VEHICLE_CPU_RANGE[1] if action == 0 else RSU_CPU_RANGE[1])
        cost = delay + energy + np.random.uniform(-0.5, 0.5)  # Adding slight randomness
        reward = -cost  # Minimize cost
        next_state = self.observation_space.sample()
        done = False  # Continuous scenario
        return next_state, reward, done, {}

    def reset(self):
        return self.observation_space.sample()

env = VehicularOffloadingEnv()

state_size = env.observation_space.shape[0]
action_size = env.action_space.n
policy_model = build_model(state_size, action_size)
target_model = build_model(state_size, action_size)
target_model.set_weights(policy_model.get_weights())

def update_policy():
    if len(replay_memory) < BATCH_SIZE:
        return
    batch = random.sample(replay_memory, BATCH_SIZE)
    states, actions, rewards, next_states, dones = zip(*batch)
    states, next_states = np.array(states), np.array(next_states)

    q_values_next = policy_model.predict(next_states, verbose=0)
    q_values_target = target_model.predict(next_states, verbose=0)
    q_values = policy_model.predict(states, verbose=0)

    for i in range(BATCH_SIZE):
        best_action = np.argmax(q_values_next[i])
        target = rewards[i] if dones[i] else rewards[i] + GAMMA * q_values_target[i][best_action]
        q_values[i][actions[i]] = target

    policy_model.fit(states, q_values, verbose=0, batch_size=BATCH_SIZE)

replay_memory = []
rewards_history = []

for episode in range(EPISODES):
    state = env.reset()
    total_reward = 0
    L = TASK_SIZE

    while L > 0:
        if random.random() < EPSILON:
            action = env.action_space.sample()
        else:
            q_values = policy_model.predict(state.reshape(1, -1), verbose=0)
            action = np.argmax(q_values[0])

        next_state, reward, done, _ = env.step(action)
        replay_memory.append((state, action, reward, next_state, done))
        if len(replay_memory) > 2000:
            replay_memory.pop(0)

        update_policy()

        if episode % UPDATE_TARGET_EVERY == 0:
            target_model.set_weights(policy_model.get_weights())

        L -= 1
        state = next_state
        total_reward += reward
        if done:
            break

    rewards_history.append(total_reward)
    EPSILON = max(0.1, EPSILON * 0.995)
    print(f"Episode {episode+1}: Total Reward = {total_reward}")

plt.plot(rewards_history)
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.title("DDQN Training Performance")
plt.show()


Episode 1: Total Reward = -25.180805657674007
Episode 2: Total Reward = -25.439667883956474
Episode 3: Total Reward = -24.39159026239767
Episode 4: Total Reward = -26.338143939577044
Episode 5: Total Reward = -24.394315282128915
Episode 6: Total Reward = -24.91191017280461
Episode 7: Total Reward = -26.32512313657124
Episode 8: Total Reward = -25.890713460587005
Episode 9: Total Reward = -26.75741494117814
Episode 10: Total Reward = -25.618136853954297
Episode 11: Total Reward = -24.694091232603867
Episode 12: Total Reward = -25.863547643490175
Episode 13: Total Reward = -25.810608061647425
Episode 14: Total Reward = -26.12530466791838
Episode 15: Total Reward = -25.149749437706014
Episode 16: Total Reward = -24.27082140722535
Episode 17: Total Reward = -26.486866109563547
Episode 18: Total Reward = -24.96145088836847
Episode 19: Total Reward = -23.472038713127954
Episode 20: Total Reward = -24.49247444575023
Episode 21: Total Reward = -25.31459429584846
Episode 22: Total Reward = -24.