In [None]:
import numpy as np
import random
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt
from lightgbm import LGBMRegressor

# Hyperparameters and constants
RSU_COVERAGE = 800          # meters
HANDOVER_AREA = 300         # meters
TASK_SIZE_RANGE = (1, 10)   # MB
CPU_CYCLES_PER_MB = 500     # cycles per MB
VEHICLE_CPU = 5000          # MHz
COOPERATIVE_VEHICLE_CPU = (10000, 50000)  # MHz range
RSU_CPU = (50000, 100000)   # MHz range
V2R_BANDWIDTH = 10          # MHz
V2V_BANDWIDTH = 5           # MHz
BANDWIDTH_COST_RSU = 1      # Cost per MHz for RSU
BANDWIDTH_COST_V2V = 1.5     # Cost per MHz for V2V
LEARNING_RATE = 0.0001      
GAMMA = 0.99                
EPSILON_DECAY = 0.999       
EPSILON_MIN = 0.1         
TAU = 0.01                
ALPHA = 1.5               
BETA = 1.5                
POWER_LOCAL = 1.0         
POWER_RSU = 0.8           
POWER_V2V = 1.2           

# Global tracking variables
global_total_latency_local = 0
global_total_latency_rsu = 0
global_total_latency_v2v = 0
global_total_energy_local = 0
global_total_energy_rsu = 0
global_total_energy_v2v = 0
global_count_local = 0
global_count_rsu = 0
global_count_v2v = 0
episode_rewards = []

# -----------------------------
# Utility Functions
# -----------------------------
def transmission_delay(task_size, bandwidth):
    return task_size * 8 / bandwidth

def compute_handover_cost(vehicle, rsu1, rsu2):
    if vehicle.x >= RSU_COVERAGE:
        return (vehicle.task_size * 8) / V2R_BANDWIDTH
    return 0

# -----------------------------
# Trajectory Prediction Functions
# -----------------------------
def load_trajectory_data():
    pred_data = np.load("batch_data/test_predictions_full_300.npz", allow_pickle=True)
    true_positions = pred_data['true']
    predicted_positions = pred_data['predicted']
    predicted_positions_dict = {i: predicted_positions[i] for i in range(len(predicted_positions))}
    return true_positions, predicted_positions, predicted_positions_dict

def train_trajectory_model(true_positions, predicted_positions):
    X, y_lat, y_lon = [], [], []
    for i in range(len(true_positions)):
        X.append(true_positions[i, :4])  # [x, y, velocity, direction]
        y_lat.append(predicted_positions[i, 0])
        y_lon.append(predicted_positions[i, 1])
    X = np.array(X)
    y_lat = np.array(y_lat)
    y_lon = np.array(y_lon)
    model_lat = LGBMRegressor()
    model_lon = LGBMRegressor()
    model_lat.fit(X, y_lat)
    model_lon.fit(X, y_lon)
    return model_lat, model_lon

def predict_trajectory(vehicle, model_lat, model_lon):
    traj_lat = model_lat.predict([[vehicle.x, vehicle.y, vehicle.velocity, vehicle.direction]])
    traj_lon = model_lon.predict([[vehicle.x, vehicle.y, vehicle.velocity, vehicle.direction]])
    return traj_lat[0], traj_lon[0]

def select_cooperative_vehicle(vehicle, cooperative_vehicles, model_lat, model_lon):
    pred_lat, pred_lon = predict_trajectory(vehicle, model_lat, model_lon)
    selected_vehicle = None
    min_distance = float('inf')
    for coop_vehicle in cooperative_vehicles:
        coop_lat, coop_lon = predict_trajectory(coop_vehicle, model_lat, model_lon)
        distance = np.linalg.norm(np.array([pred_lat, pred_lon]) - np.array([coop_lat, coop_lon]))
        if distance < min_distance:
            min_distance = distance
            selected_vehicle = coop_vehicle
    return selected_vehicle

# -----------------------------
# DDQN Agent Implementation
# -----------------------------
class DDQNAgent:
    def __init__(self, input_dim=9, action_dim=3):
        self.input_dim = input_dim
        self.action_dim = action_dim
        self.online_network = self.build_network()
        self.target_network = self.build_network()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
        self.memory = deque(maxlen=5000)
        self.epsilon = 1.0
        self.update_target_network(hard=True)

    def build_network(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu', input_shape=(self.input_dim,)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(self.action_dim, activation=None)
        ])
        return model

    def update_target_network(self, hard=False):
        if hard:
            # Hard copy all weights from online to target network
            self.target_network.set_weights(self.online_network.get_weights())
        else:
            # Soft update using factor TAU
            online_weights = self.online_network.get_weights()
            target_weights = self.target_network.get_weights()
            new_weights = []
            for ow, tw in zip(online_weights, target_weights):
                new_weights.append(TAU * ow + (1 - TAU) * tw)
            self.target_network.set_weights(new_weights)

    def store_experience(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.epsilon:
            return random.randrange(self.action_dim)
        state = np.array([state])
        q_values = self.online_network(state)
        return np.argmax(q_values.numpy()[0])

    def train(self):
        if len(self.memory) < 32:
            return
        batch = random.sample(self.memory, 32)
        states, actions, rewards, next_states, dones = [], [], [], [], []
        for s, a, r, ns, d in batch:
            states.append(s)
            actions.append(a)
            rewards.append(r)
            next_states.append(ns)
            dones.append(d)
        states = np.array(states)
        next_states = np.array(next_states)
        # Compute Q-values from online network for next_states to select best action
        next_q_online = self.online_network(next_states)
        best_actions = np.argmax(next_q_online.numpy(), axis=1)
        # Evaluate Q-values of best actions using target network
        next_q_target = self.target_network(next_states).numpy()
        target_q = []
        for i in range(len(batch)):
            if dones[i]:
                target_q.append(rewards[i])
            else:
                target_q.append(rewards[i] + GAMMA * next_q_target[i][best_actions[i]])
        target_q = np.array(target_q)
        
        with tf.GradientTape() as tape:
            q_values = self.online_network(states)
            q_action = tf.reduce_sum(q_values * tf.one_hot(actions, self.action_dim), axis=1)
            loss = tf.reduce_mean(tf.square(target_q - q_action))
        grads = tape.gradient(loss, self.online_network.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.online_network.trainable_variables))
        # Soft update target network
        self.update_target_network()
        
# -----------------------------
# Classes for Vehicles and RSU 
# -----------------------------
class MissionVehicle:
    def __init__(self, vehicle_id):
        self.vehicle_id = vehicle_id
        self.x, self.y = true_positions[vehicle_id, :2]
        self.velocity, self.direction = true_positions[vehicle_id, 2:]
        self.cpu = VEHICLE_CPU
        self.task_size = random.uniform(*TASK_SIZE_RANGE)
        self.task_cycles = self.task_size * CPU_CYCLES_PER_MB

    def update_position(self, predicted_positions_dict):
        if self.vehicle_id in predicted_positions_dict:
            predicted_values = predicted_positions_dict[self.vehicle_id]
            self.x, self.y, self.velocity, self.direction = predicted_values

class CooperativeVehicle:
    def __init__(self, vehicle_id):
        self.vehicle_id = vehicle_id
        self.x, self.y = true_positions[vehicle_id, :2]
        self.cpu = random.randint(*COOPERATIVE_VEHICLE_CPU)
        self.velocity = true_positions[vehicle_id, 2]
        self.direction = true_positions[vehicle_id, 3]

    def update_position(self, predicted_positions_dict):
        if self.vehicle_id in predicted_positions_dict:
            predicted_values = predicted_positions_dict[self.vehicle_id]
            self.x, self.y, self.velocity, self.direction = predicted_values

class RSU:
    def __init__(self, location):
        self.location = location
        self.cpu = random.randint(*RSU_CPU)
        self.bandwidth = random.randint(1000, 5000)

    def update_resources(self):
        self.cpu = random.randint(*RSU_CPU)
        self.bandwidth = random.randint(1000, 5000)

# -----------------------------
# Load Data and Initialize Global Objects
# -----------------------------
true_positions, predicted_positions, predicted_positions_dict = load_trajectory_data()
model_lat, model_lon = train_trajectory_model(true_positions, predicted_positions)
mission_vehicles = [MissionVehicle(i) for i in range(50)]
cooperative_vehicles = [CooperativeVehicle(j) for j in range(10)]
rsus = [RSU(location) for location in [1000, 2000]]
ddqn = DDQNAgent()
# Offloading counts dictionary for DDQN simulation
ddqn_offloading_counts = {"Local": 0, "RSU": 0, "V2V": 0}

def run_ddqn_simulation():
    global global_total_latency_local, global_total_latency_rsu, global_total_latency_v2v
    global global_total_energy_local, global_total_energy_rsu, global_total_energy_v2v
    global global_count_local, global_count_rsu, global_count_v2v

    ddqn_episode_rewards = []
    for episode in range(1000):
        total_reward = 0
        for mv in mission_vehicles:
            mv.update_position(predicted_positions_dict)
            # State vector
            state = [mv.task_size, mv.task_cycles, mv.x, mv.y, mv.velocity, mv.direction, mv.cpu, V2R_BANDWIDTH, V2V_BANDWIDTH]
            action = ddqn.act(state)
            comm_revenue = mv.task_size * 10
            comp_revenue = mv.task_cycles / 1000
            if action == 0:  # Local processing
                delay = mv.task_cycles / mv.cpu
                reward = ALPHA * (comp_revenue - delay)
                ddqn_offloading_counts["Local"] += 1
                latency_local = delay
                energy_local = latency_local * POWER_LOCAL
                global_total_latency_local += latency_local
                global_total_energy_local += energy_local
                global_count_local += 1
            elif action == 1:  # RSU Offloading
                selected_rsu = min(rsus, key=lambda r: abs(mv.x - r.location))
                delay_comm = transmission_delay(mv.task_size, V2R_BANDWIDTH)
                delay_comp = mv.task_cycles / selected_rsu.cpu
                cost = BANDWIDTH_COST_RSU * V2R_BANDWIDTH
                reward = ALPHA * (comm_revenue - delay_comm) + BETA * (comp_revenue - delay_comp) - cost
                ddqn_offloading_counts["RSU"] += 1
                latency_rsu = delay_comm + delay_comp
                energy_rsu = latency_rsu * POWER_RSU
                global_total_latency_rsu += latency_rsu
                global_total_energy_rsu += energy_rsu
                global_count_rsu += 1
            else:  # V2V Offloading
                selected_cv = select_cooperative_vehicle(mv, cooperative_vehicles, model_lat, model_lon)
                if selected_cv is None:
                    latency = mv.task_cycles / mv.cpu
                    energy = latency * POWER_LOCAL
                else:
                    delay_comm = transmission_delay(mv.task_size, V2V_BANDWIDTH)
                    delay_comp = mv.task_cycles / selected_cv.cpu
                    cost = BANDWIDTH_COST_V2V * V2V_BANDWIDTH
                    reward = ALPHA * (comm_revenue - delay_comm) + BETA * (comp_revenue - delay_comp) - cost
                    ddqn_offloading_counts["V2V"] += 1
                    latency_v2v = delay_comm + delay_comp
                    energy_v2v = latency_v2v * POWER_V2V
                    global_total_latency_v2v += latency_v2v
                    global_total_energy_v2v += energy_v2v
                    global_count_v2v += 1
            total_reward += reward
            next_state = state
            ddqn.store_experience(state, action, reward, next_state, done=False)
        if ddqn.epsilon > EPSILON_MIN:
            ddqn.epsilon = max(ddqn.epsilon * EPSILON_DECAY, EPSILON_MIN)
        ddqn_episode_rewards.append(total_reward)
        ddqn.train()
        if episode % 50 == 0:
            print(f"DDQN Episode {episode} - Total Reward: {total_reward:.2f}")
    return ddqn, ddqn_episode_rewards

# Visualization functions (you can reuse the same functions as before)
def plot_training_rewards(episode_rewards):
    plt.figure(figsize=(10,6))
    plt.plot(range(len(episode_rewards)), episode_rewards, marker='o', linestyle='-', label="Training Reward")
    plt.title("DDQN Training: Reward Over Episodes")
    plt.xlabel("Episodes")
    plt.ylabel("Total Reward")
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_offloading_distribution(offloading_counts):
    labels = list(offloading_counts.keys())
    sizes = list(offloading_counts.values())
    plt.figure(figsize=(8,8))
    plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=['gold', 'lightblue', 'lightgreen'])
    plt.title("DDQN Offloading Decision Distribution")
    plt.show()

def plot_latency_distribution():
    avg_latency_local = global_total_latency_local / global_count_local if global_count_local > 0 else 0
    avg_latency_rsu = global_total_latency_rsu / global_count_rsu if global_count_rsu > 0 else 0
    avg_latency_v2v = global_total_latency_v2v / global_count_v2v if global_count_v2v > 0 else 0
    plt.figure(figsize=(8,6))
    plt.bar(['Local','RSU','V2V'], [avg_latency_local, avg_latency_rsu, avg_latency_v2v],
            color=['gold','lightblue','lightgreen'])
    plt.title("DDQN Average Latency by Offloading Option")
    plt.ylabel("Latency (s)")
    plt.show()

def plot_energy_consumption():
    avg_energy_local = global_total_energy_local / global_count_local if global_count_local > 0 else 0
    avg_energy_rsu = global_total_energy_rsu / global_count_rsu if global_count_rsu > 0 else 0
    avg_energy_v2v = global_total_energy_v2v / global_count_v2v if global_count_v2v > 0 else 0
    plt.figure(figsize=(8,6))
    plt.bar(['Local','RSU','V2V'], [avg_energy_local, avg_energy_rsu, avg_energy_v2v],
            color=['gold','lightblue','lightgreen'])
    plt.title("DDQN Average Energy Consumption by Offloading Option")
    plt.ylabel("Energy (Joules)")
    plt.show()

# Run DDQN simulation and plot results
ddqn_agent, ddqn_episode_rewards = run_ddqn_simulation()
plot_training_rewards(ddqn_episode_rewards)
plot_offloading_distribution(ddqn_offloading_counts)
plot_latency_distribution()
plot_energy_consumption()
