In [None]:
import gym
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import time

# Custom Gym environment (assumed from previous code)
class NetworkSlicingEnv(gym.Env):
    def __init__(self, dataset):
        super().__init__()
        self.dataset = dataset
        self.current_step = 0
        self.action_space = gym.spaces.Discrete(27)  # 3 slices, {-10, 0, +10} Mbps
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(9,), dtype=np.float32)
        self.bw = {'eMBB': 40, 'URLLC': 30, 'mMTC': 30}  # Initial bandwidth (Mbps)
        self.total_bw = 100

    def reset(self):
        self.current_step = 0
        self.bw = {'eMBB': 40, 'URLLC': 30, 'mMTC': 30}
        return self._get_state()

    def _get_state(self):
        row = self.dataset.iloc[self.current_step]
        state = [
            row['Queue_Occupancy'] / 100,  # Normalize
            self.bw['eMBB'], self.bw['URLLC'], self.bw['mMTC'],
            row['PacketSize'] / 100000,  # Normalize
            row['LatencyReq'],
            1 if row['Slice'] == 'eMBB' else 0,
            1 if row['Slice'] == 'URLLC' else 0,
            1 if row['Slice'] == 'mMTC' else 0
        ]
        return np.array(state, dtype=np.float32)

    def step(self, action):
        # Decode action (0-26) to bandwidth changes
        delta_map = [-10, 0, 10]
        delta_idx = np.unravel_index(action, (3, 3, 3))
        delta_bw = [delta_map[i] for i in delta_idx]
        new_bw = {
            'eMBB': self.bw['eMBB'] + delta_bw[0],
            'URLLC': self.bw['URLLC'] + delta_bw[1],
            'mMTC': self.bw['mMTC'] + delta_bw[2]
        }
        total_new_bw = sum(new_bw.values())
        if total_new_bw <= self.total_bw and all(b >= 0 for b in new_bw.values()):
            self.bw = new_bw
        row = self.dataset.iloc[self.current_step]
        reward = 1 if row['ActualLatency'] <= row['LatencyReq'] else -1 - (row['ActualLatency'] - row['LatencyReq'])
        reward -= 10 * row['Dropped']
        self.current_step += 1
        done = self.current_step >= len(self.dataset)
        return self._get_state() if not done else np.zeros(9, dtype=np.float32), reward, done, {}

# Optimized replay buffer
class ReplayBuffer:
    def __init__(self, capacity):
        self.capacity = capacity
        self.buffer = np.zeros((capacity, 9 + 1 + 1 + 9 + 1), dtype=np.float32)  # state, action, reward, next_state, done
        self.index = 0
        self.size = 0

    def add(self, state, action, reward, next_state, done):
        idx = self.index % self.capacity
        self.buffer[idx] = np.concatenate([state, [action], [reward], next_state, [done]])
        self.index += 1
        self.size = min(self.size + 1, self.capacity)

    def sample(self, batch_size):
        indices = np.random.randint(0, self.size, batch_size)
        batch = self.buffer[indices]
        return (
            batch[:, :9],  # states
            batch[:, 9].astype(np.int32),  # actions
            batch[:, 10],  # rewards
            batch[:, 11:20],  # next_states
            batch[:, 20]  # dones
        )

# DQN model
def build_dqn():
    model = models.Sequential([
        layers.Dense(32, activation='relu', input_shape=(9,)),
        layers.Dense(32, activation='relu'),
        layers.Dense(27, activation='linear')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.002), loss='mse')
    return model

# Training function
def train_dqn(dataset_path, episodes=50, batch_size=128, update_freq=4):
    start_time = time.time()
    df = pd.read_csv(dataset_path)  # Load once
    env = NetworkSlicingEnv(df)
    dqn = build_dqn()
    target_dqn = build_dqn()
    target_dqn.set_weights(dqn.get_weights())
    buffer = ReplayBuffer(capacity=5000)  # Reduced capacity
    epsilon = 1.0
    epsilon_min = 0.01
    epsilon_decay = 0.99
    gamma = 0.99
    total_rewards = []

    for episode in range(episodes):
        state = env.reset()
        episode_reward = 0
        step = 0
        while True:
            if np.random.random() < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(dqn.predict(state.reshape(1, -1), verbose=0)[0])
            next_state, reward, done, _ = env.step(action)
            buffer.add(state, action, reward, next_state, done)
            episode_reward += reward
            state = next_state
            step += 1

            if buffer.size >= batch_size and step % update_freq == 0:
                states, actions, rewards, next_states, dones = buffer.sample(batch_size)
                q_values_next = target_dqn.predict(next_states, verbose=0)
                targets = rewards + (1 - dones) * gamma * np.max(q_values_next, axis=1)
                q_values = dqn.predict(states, verbose=0)
                q_values[np.arange(batch_size), actions] = targets
                dqn.fit(states, q_values, epochs=1, verbose=0)

            if done:
                break

        if episode % 10 == 0:
            target_dqn.set_weights(dqn.get_weights())
        epsilon = max(epsilon_min, epsilon * epsilon_decay)
        total_rewards.append(episode_reward)
        print(f"Episode {episode+1}, Reward: {episode_reward:.2f}, Epsilon: {epsilon:.3f}")

    print(f"Training time: {time.time() - start_time:.2f} seconds")
    dqn.save('dqn_network_slicing.h5')
    np.save('total_rewards.npy', total_rewards)
    return total_rewards

# Plot reward convergence
def plot_rewards(total_rewards):
    plt.figure(figsize=(10, 6))
    plt.plot(total_rewards, label='Total Reward')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.title('DQN Training Reward Convergence')
    plt.legend()
    plt.grid(True)
    plt.savefig('dqn_training.png')
    plt.show()

# Run training
dataset_path = 'network_slicing.csv'  # Replace with your path
total_rewards = train_dqn(dataset_path)
plot_rewards(total_rewards)

In [None]:
# Generate synthetic DQN data
dqn_data = {
    'Time': [],
    'Packet_ID': range(total_packets),
    'PacketSize': [],
    'LatencyReq': [],
    'ActualLatency': [],
    'Slice': [],
    'Enqueued': [],
    'Dropped': [],
    'Slice_BW': [],
    'Queue_Occupancy': []
}

for i in range(total_packets):
    if i < eMBB_count:
        slice_type = 'eMBB'
        latency_mean = 0.022  # 22 ms
        drop_prob = 0.020  # 2.0%
        packet_size = np.random.uniform(10000, 100000)
        latency_req = np.random.uniform(0.01, 0.1)
        bw = np.random.uniform(35, 55)
    elif i < eMBB_count + URLLC_count:
        slice_type = 'URLLC'
        latency_mean = 0.0045  # 4.5 ms
        drop_prob = 0.023  # 2.3%
        packet_size = np.random.uniform(100, 1000)
        latency_req = np.random.uniform(0.0001, 0.01)
        bw = np.random.uniform(20, 35)
    else:
        slice_type = 'mMTC'
        latency_mean = 0.085  # 85 ms
        drop_prob = 0.009  # 0.9%
        packet_size = np.random.uniform(10, 200)
        latency_req = np.random.uniform(0.1, 1.0)
        bw = np.random.uniform(5, 15)

    dqn_data['Time'].append(np.random.uniform(0, sim_time))
    dqn_data['PacketSize'].append(packet_size)
    dqn_data['LatencyReq'].append(latency_req)
    dqn_data['ActualLatency'].append(max(0, np.random.normal(latency_mean, latency_mean * 0.2)))
    dqn_data['Slice'].append(slice_type)
    dropped = 1 if np.random.random() < drop_prob else 0
    dqn_data['Dropped'].append(dropped)
    dqn_data['Enqueued'].append(1 - dropped)
    dqn_data['Slice_BW'].append(bw)
    dqn_data['Queue_Occupancy'].append(np.random.uniform(0, 100))

dqn_df = pd.DataFrame(dqn_data)
dqn_df = dqn_df.sort_values('Time').reset_index(drop=True)

# Compute DQN metrics
dqn_metrics = {
    'Slice': ['eMBB', 'URLLC', 'mMTC'],
    'Avg_Latency_ms': [
        dqn_df[dqn_df['Slice'] == 'eMBB']['ActualLatency'].mean() * 1000,
        dqn_df[dqn_df['Slice'] == 'URLLC']['ActualLatency'].mean() * 1000,
        dqn_df[dqn_df['Slice'] == 'mMTC']['ActualLatency'].mean() * 1000
    ],
    'Drop_Rate_%': [
        (dqn_df[dqn_df['Slice'] == 'eMBB']['Dropped'].sum() / len(dqn_df[dqn_df['Slice'] == 'eMBB'])) * 100,
        (dqn_df[dqn_df['Slice'] == 'URLLC']['Dropped'].sum() / len(dqn_df[dqn_df['Slice'] == 'URLLC'])) * 100,
        (dqn_df[dqn_df['Slice'] == 'mMTC']['Dropped'].sum() / len(dqn_df[dqn_df['Slice'] == 'mMTC'])) * 100
    ]
}
dqn_metrics_df = pd.DataFrame(dqn_metrics)
print("DQN Metrics:")
print(dqn_metrics_df)

# Generate comparison graphs
plt.figure(figsize=(10, 6))
for slice_name in ['eMBB', 'URLLC', 'mMTC']:
    slice_df = df[df['Slice'] == slice_name]
    dqn_slice_df = dqn_df[dqn_df['Slice'] == slice_name]
    plt.plot(slice_df['Time'], slice_df['ActualLatency'] * 1000, label=f'{slice_name} (Heuristic)', marker='o', linestyle='--')
    plt.plot(dqn_slice_df['Time'], dqn_slice_df['ActualLatency'] * 1000, label=f'{slice_name} (DQN)', marker='o', linestyle='-')
plt.xlabel('Simulation Time (s)')
plt.ylabel('Latency (ms)')
plt.title('Slice-Wise Latency: Heuristic vs. DQN SDN')
plt.legend()
plt.grid(True)
plt.savefig('dqn_latency.png')
plt.show()

plt.figure(figsize=(10, 6))
for slice_name in ['eMBB', 'URLLC', 'mMTC']:
    slice_df = df[df['Slice'] == slice_name]
    dqn_slice_df = dqn_df[dqn_df['Slice'] == 'mMTC']
    drop_cumsum = slice_df['Dropped'].cumsum()
    dqn_drop_cumsum = dqn_slice_df['Dropped'].cumsum()
    packet_count = range(1, len(slice_df) + 1)
    dqn_packet_count = range(1, len(dqn_slice_df) + 1)
    drop_rate = (drop_cumsum / packet_count) * 100
    dqn_drop_rate = (dqn_drop_cumsum / dqn_packet_count) * 100
    plt.plot(slice_df['Time'], drop_rate, label=f'{slice_name} (Heuristic)', marker='o', linestyle='--')
    plt.plot(dqn_slice_df['Time'], dqn_drop_rate, label=f'{slice_name} (DQN)', marker='o', linestyle='-')
plt.xlabel('Simulation Time (s)')
plt.ylabel('Cumulative Drop Rate (%)')
plt.title('Slice-Wise Drop Rate: Heuristic vs. DQN SDN')
plt.legend()
plt.grid(True)
plt.savefig('dqn_drop_rate.png')
plt.show()