In [2]:
import numpy as np
import random
from chargenv import Env


def fitness_function(environment, action_sequence):
    total_reward = 0
    environment.reset()
    for i in range(len(action_sequence)):
        state, reward, done = environment.step(action_sequence[i])
        total_reward += reward.item()  # 确保 reward 是标量
        if done:
            break
    return total_reward


class Individual:
    def __init__(self, x, fitness_func):
        self.x = x
        self.f = fitness_func(x)
        self.rank = None
        self.crowding_distance = 0

def tournament_selection(pop, k=2):
    participants = random.sample(pop, k)
    return max(participants, key=lambda ind: ind.f)

def sbx_crossover(parent1, parent2, eta=30):
    child1 = np.copy(parent1.x)
    child2 = np.copy(parent2.x)
    for i in range(child1.shape[0]):
        for j in range(child1.shape[1]):
            if random.random() <= 0.5:
                x1, x2 = parent1.x[i, j], parent2.x[i, j]
                if abs(x1 - x2) > 1e-14:
                    x_min, x_max = min(x1, x2), max(x1, x2)
                    rand = random.random()
                    beta = 1.0 + 2.0 * (x_min - 0.0) / (x_max - x_min)
                    alpha = 2.0 - beta**-(eta + 1)
                    if rand <= 1.0 / alpha:
                        betaq = (rand * alpha)**(1.0 / (eta + 1))
                    else:
                        betaq = (1.0 / (2.0 - rand * alpha))**(1.0 / (eta + 1))
                    c1 = 0.5 * ((x_min + x_max) - betaq * (x_max - x_min))
                    c2 = 0.5 * ((x_min + x_max) + betaq * (x_max - x_min))
                    child1[i, j] = np.clip(c1, 0.0, 0.5)
                    child2[i, j] = np.clip(c2, 0.0, 0.5)
    return child1, child2

def polynomial_mutation(x, mutation_rate=0.1, eta=20):
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            if random.random() < mutation_rate:
                r = random.random()
                delta = (
                    (2 * r)**(1.0 / (eta + 1)) - 1 if r < 0.5
                    else 1 - (2 * (1 - r))**(1.0 / (eta + 1))
                )
                x[i, j] = np.clip(x[i, j] + delta, 0.0, 0.5)
    return x

def run_nsga2_single_objective(fitness_function, env, action_dim, seq_len=10, pop_size=50, generations=50):
    def create_individual():
        x = np.random.uniform(0, 0.5, (seq_len, action_dim))
        return Individual(x, lambda a_seq: fitness_function(env, a_seq))

    population = [create_individual() for _ in range(pop_size)]

    for gen in range(generations):
        offspring = []
        while len(offspring) < pop_size:
            p1 = tournament_selection(population)
            p2 = tournament_selection(population)
            c1_x, c2_x = sbx_crossover(p1, p2)
            c1_x = polynomial_mutation(c1_x)
            c2_x = polynomial_mutation(c2_x)
            offspring.append(Individual(c1_x, lambda a_seq: fitness_function(env, a_seq)))
            offspring.append(Individual(c2_x, lambda a_seq: fitness_function(env, a_seq)))

        population.extend(offspring)
        population.sort(key=lambda ind: ind.f, reverse=True)
        population = population[:pop_size]

        best = population[0]
        print(f"Generation {gen+1}: Best Reward = {best.f:.4f}")

    return population[0]


if __name__ == "__main__":
    arrival_rate = 5
    data_path = './datasets/EVCD{}.csv'.format(arrival_rate)
    env = Env(1, 0, 0, arrival_rate, data_path)
    best_ind = run_nsga2_single_objective(
        fitness_function=fitness_function,
        env=env,
        action_dim=env.n_cs,
        seq_len=24,
        pop_size=50,
        generations=30
    )

    print("\nFinal Best Action Sequence:")
    print(best_ind.x)


Generation 1: Best Reward = 608.6084
Generation 2: Best Reward = 646.5010
Generation 3: Best Reward = 763.0102
Generation 4: Best Reward = 763.0102
Generation 5: Best Reward = 763.0102
Generation 6: Best Reward = 784.7495
Generation 7: Best Reward = 784.7495
Generation 8: Best Reward = 784.7495
Generation 9: Best Reward = 808.4484
Generation 10: Best Reward = 809.1660
Generation 11: Best Reward = 878.4088
Generation 12: Best Reward = 888.1580
Generation 13: Best Reward = 905.7545
Generation 14: Best Reward = 905.7545
Generation 15: Best Reward = 981.3541
Generation 16: Best Reward = 981.3541
Generation 17: Best Reward = 981.3541
Generation 18: Best Reward = 990.0145
Generation 19: Best Reward = 1004.4786
Generation 20: Best Reward = 1047.8822
Generation 21: Best Reward = 1063.7782
Generation 22: Best Reward = 1063.7782
Generation 23: Best Reward = 1063.7782
Generation 24: Best Reward = 1063.7782
Generation 25: Best Reward = 1073.0249
Generation 26: Best Reward = 1073.0249
Generation 27

In [8]:
import numpy as np
import random
from chargenv import Env
import time

def fitness_function(environment, action_sequence):
    total_reward = 0
    environment.reset()
    for i in range(len(action_sequence)):
        state, reward, done = environment.step(action_sequence[i])
        total_reward += reward.item()  # 确保 reward 是标量
        if done:
            break
    return total_reward


class Individual:
    def __init__(self, x, fitness_func):
        self.x = x
        self.f = fitness_func(x)
        self.rank = None
        self.crowding_distance = 0

def tournament_selection(pop, k=2):
    participants = random.sample(pop, k)
    return max(participants, key=lambda ind: ind.f)

def sbx_crossover(parent1, parent2, eta=30):
    child1 = np.copy(parent1.x)
    child2 = np.copy(parent2.x)
    for i in range(child1.shape[0]):
        for j in range(child1.shape[1]):
            if random.random() <= 0.5:
                x1, x2 = parent1.x[i, j], parent2.x[i, j]
                if abs(x1 - x2) > 1e-14:
                    x_min, x_max = min(x1, x2), max(x1, x2)
                    rand = random.random()
                    beta = 1.0 + 2.0 * (x_min - 0.0) / (x_max - x_min)
                    alpha = 2.0 - beta**-(eta + 1)
                    if rand <= 1.0 / alpha:
                        betaq = (rand * alpha)**(1.0 / (eta + 1))
                    else:
                        betaq = (1.0 / (2.0 - rand * alpha))**(1.0 / (eta + 1))
                    c1 = 0.5 * ((x_min + x_max) - betaq * (x_max - x_min))
                    c2 = 0.5 * ((x_min + x_max) + betaq * (x_max - x_min))
                    child1[i, j] = np.clip(c1, 0.0, 0.5)
                    child2[i, j] = np.clip(c2, 0.0, 0.5)
    return child1, child2

def polynomial_mutation(x, mutation_rate=0.1, eta=20):
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            if random.random() < mutation_rate:
                r = random.random()
                delta = (
                    (2 * r)**(1.0 / (eta + 1)) - 1 if r < 0.5
                    else 1 - (2 * (1 - r))**(1.0 / (eta + 1))
                )
                x[i, j] = np.clip(x[i, j] + delta, 0.0, 0.5)
    return x
def run_nsga2_single_objective(fitness_function, env, action_dim, seq_len=10, pop_size=50, generations=50):
    start_time = time.time()  # ← 加在函数最前面

    def create_individual():
        x = np.random.uniform(0, 0.5, (seq_len, action_dim))
        return Individual(x, lambda a_seq: fitness_function(env, a_seq))

    population = [create_individual() for _ in range(pop_size)]

    for gen in range(generations):
        offspring = []
        while len(offspring) < pop_size:
            p1 = tournament_selection(population)
            p2 = tournament_selection(population)
            c1_x, c2_x = sbx_crossover(p1, p2)
            c1_x = polynomial_mutation(c1_x)
            c2_x = polynomial_mutation(c2_x)
            offspring.append(Individual(c1_x, lambda a_seq: fitness_function(env, a_seq)))
            offspring.append(Individual(c2_x, lambda a_seq: fitness_function(env, a_seq)))

        population.extend(offspring)
        population.sort(key=lambda ind: ind.f, reverse=True)
        population = population[:pop_size]

        best = population[0]
        print(f"Generation {gen+1}: Best Reward = {best.f:.4f}")

    duration = time.time() - start_time  # ← 计算耗时
    print(f"NSGA-II run finished in {duration:.2f} seconds.")

    return population[0]



def evaluate_action_sequence(env, action_sequence, repeat=10):
    """重复执行某个动作序列，计算平均奖励"""
    rewards = []
    for _ in range(repeat):
        rewards.append(fitness_function(env, action_sequence))
    return np.mean(rewards), np.std(rewards)


if __name__ == "__main__":
    arrival_rate = 3
    data_path = './datasets/EVCD{}.csv'.format(arrival_rate)
    env = Env(1, 0, 0, arrival_rate, data_path)

    NUM_TRIALS = 5
    TEST_REPEAT = 100

    all_means = []
    all_stds = []

    print("\nRunning NSGA-II multiple times to evaluate average performance...\n")

    total_start_time = time.time()  # ← 添加总开始时间

    for i in range(NUM_TRIALS):
        print(f"\nTrial {i+1}")
        best_ind = run_nsga2_single_objective(
            fitness_function=fitness_function,
            env=env,
            action_dim=env.n_cs,
            seq_len=24,
            pop_size=30,
            generations=100
        )

        mean_reward, std_reward = evaluate_action_sequence(env, best_ind.x, repeat=TEST_REPEAT)
        print(f">>> Trial {i+1} | Mean reward: {mean_reward:.2f}, Std: {std_reward:.2f}")
        all_means.append(mean_reward)
        all_stds.append(std_reward)

    total_duration = time.time() - total_start_time  # ← 总耗时

    overall_mean = np.mean(all_means)
    overall_std = np.std(all_means)

    print("\n===== NSGA-II Evaluation Summary =====")
    print(f"Average of mean rewards over {NUM_TRIALS} trials: {overall_mean:.4f}")
    print(f"Standard deviation across trials: {overall_std:.4f}")
    print(f"Total time for all {NUM_TRIALS} trials: {total_duration:.2f} seconds")
    print(f"Average time per trial: {total_duration / NUM_TRIALS:.2f} seconds")



Running NSGA-II multiple times to evaluate average performance...


Trial 1
[7063.24s] Best Reward = 469.2646
[7064.24s] Best Reward = 469.2646
[7065.24s] Best Reward = 469.2646
[7066.24s] Best Reward = 469.2646
[7067.25s] Best Reward = 469.2646
Generation 1: Best Reward = 751.4858
[7068.25s] Best Reward = 469.2646
[7069.26s] Best Reward = 469.2646


KeyboardInterrupt: 

[7070.26s] Best Reward = 469.2646
[7071.26s] Best Reward = 469.2646
[7072.26s] Best Reward = 469.2646
[7073.26s] Best Reward = 469.2646
[7074.26s] Best Reward = 469.2646
[7075.26s] Best Reward = 469.2646
[7076.27s] Best Reward = 469.2646
[7077.27s] Best Reward = 469.2646
[7078.27s] Best Reward = 469.2646
[7079.27s] Best Reward = 469.2646
[7080.27s] Best Reward = 469.2646
[7081.27s] Best Reward = 469.2646
[7082.27s] Best Reward = 469.2646
[7083.27s] Best Reward = 469.2646
[7084.27s] Best Reward = 469.2646
[7085.27s] Best Reward = 469.2646
[7086.27s] Best Reward = 469.2646
[7087.27s] Best Reward = 469.2646
[7088.27s] Best Reward = 469.2646
[7089.27s] Best Reward = 469.2646
[7090.28s] Best Reward = 469.2646
[7091.28s] Best Reward = 469.2646
[7092.28s] Best Reward = 469.2646
[7093.28s] Best Reward = 469.2646
[7094.28s] Best Reward = 469.2646
[7095.28s] Best Reward = 469.2646


In [1]:
import numpy as np
import random
from chargenv import Env
import time
import threading
def fitness_function(environment, action_sequence):
    total_reward = 0
    environment.reset()
    for i in range(len(action_sequence)):
        state, reward, done = environment.step(action_sequence[i])
        total_reward += reward.item()  # 确保 reward 是标量
        if done:
            break
    return total_reward


class Individual:
    def __init__(self, x, fitness_func):
        self.x = x
        self.f = fitness_func(x)
        self.rank = None
        self.crowding_distance = 0

def tournament_selection(pop, k=2):
    participants = random.sample(pop, k)
    return max(participants, key=lambda ind: ind.f)

def sbx_crossover(parent1, parent2, eta=30):
    child1 = np.copy(parent1.x)
    child2 = np.copy(parent2.x)
    for i in range(child1.shape[0]):
        for j in range(child1.shape[1]):
            if random.random() <= 0.5:
                x1, x2 = parent1.x[i, j], parent2.x[i, j]
                if abs(x1 - x2) > 1e-14:
                    x_min, x_max = min(x1, x2), max(x1, x2)
                    rand = random.random()
                    beta = 1.0 + 2.0 * (x_min - 0.0) / (x_max - x_min)
                    alpha = 2.0 - beta**-(eta + 1)
                    if rand <= 1.0 / alpha:
                        betaq = (rand * alpha)**(1.0 / (eta + 1))
                    else:
                        betaq = (1.0 / (2.0 - rand * alpha))**(1.0 / (eta + 1))
                    c1 = 0.5 * ((x_min + x_max) - betaq * (x_max - x_min))
                    c2 = 0.5 * ((x_min + x_max) + betaq * (x_max - x_min))
                    child1[i, j] = np.clip(c1, 0.0, 0.5)
                    child2[i, j] = np.clip(c2, 0.0, 0.5)
    return child1, child2

def polynomial_mutation(x, mutation_rate=0.1, eta=20):
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            if random.random() < mutation_rate:
                r = random.random()
                delta = (
                    (2 * r)**(1.0 / (eta + 1)) - 1 if r < 0.5
                    else 1 - (2 * (1 - r))**(1.0 / (eta + 1))
                )
                x[i, j] = np.clip(x[i, j] + delta, 0.0, 0.5)
    return x
def run_nsga2_single_objective(fitness_function, env, action_dim, seq_len=10, pop_size=50, generations=50):
    start_time = time.time()
    best_holder = {"best": None, "stop": False}

    def create_individual():
        x = np.random.uniform(0, 0.5, (seq_len, action_dim))
        return Individual(x, lambda a_seq: fitness_function(env, a_seq))

    population = [create_individual() for _ in range(pop_size)]

    # 定义每秒输出函数
    def print_best_reward():
        if best_holder["stop"]:
            return
        if best_holder["best"] is not None:
            now = time.time() - start_time
            print(f"[{now:.2f}s] Best Reward = {best_holder['best'].f:.4f}")
        threading.Timer(1.0, print_best_reward).start()

    print_best_reward()  # 启动定时器

    for gen in range(generations):
        offspring = []
        while len(offspring) < pop_size:
            p1 = tournament_selection(population)
            p2 = tournament_selection(population)
            c1_x, c2_x = sbx_crossover(p1, p2)
            c1_x = polynomial_mutation(c1_x)
            c2_x = polynomial_mutation(c2_x)
            offspring.append(Individual(c1_x, lambda a_seq: fitness_function(env, a_seq)))
            offspring.append(Individual(c2_x, lambda a_seq: fitness_function(env, a_seq)))

        population.extend(offspring)
        population.sort(key=lambda ind: ind.f, reverse=True)
        population = population[:pop_size]
        best_holder["best"] = population[0]  # 实时更新当前最优

    best_holder["stop"] = True  # 通知停止输出
    duration = time.time() - start_time
    print(f"NSGA-II run finished in {duration:.2f} seconds.")
    return population[0]



def evaluate_action_sequence(env, action_sequence, repeat=10):
    """重复执行某个动作序列，计算平均奖励"""
    rewards = []
    for _ in range(repeat):
        rewards.append(fitness_function(env, action_sequence))
    return np.mean(rewards), np.std(rewards)


if __name__ == "__main__":
    arrival_rate = 6
    data_path = './datasets/EVCD{}.csv'.format(arrival_rate)
    env = Env(1, 0, 0, arrival_rate, data_path)

    NUM_TRIALS = 5
    TEST_REPEAT = 100

    all_means = []
    all_stds = []

    print("\nRunning NSGA-II multiple times to evaluate average performance...\n")

    total_start_time = time.time()  # ← 添加总开始时间

    for i in range(NUM_TRIALS):
        print(f"\nTrial {i+1}")
        best_ind = run_nsga2_single_objective(
            fitness_function=fitness_function,
            env=env,
            action_dim=env.n_cs,
            seq_len=24,
            pop_size=30,
            generations=100
        )

        mean_reward, std_reward = evaluate_action_sequence(env, best_ind.x, repeat=TEST_REPEAT)
        print(f">>> Trial {i+1} | Mean reward: {mean_reward:.2f}, Std: {std_reward:.2f}")
        all_means.append(mean_reward)
        all_stds.append(std_reward)

    total_duration = time.time() - total_start_time  # ← 总耗时

    overall_mean = np.mean(all_means)
    overall_std = np.std(all_means)

    print("\n===== NSGA-II Evaluation Summary =====")
    print(f"Average of mean rewards over {NUM_TRIALS} trials: {overall_mean:.4f}")
    print(f"Standard deviation across trials: {overall_std:.4f}")
    print(f"Total time for all {NUM_TRIALS} trials: {total_duration:.2f} seconds")
    print(f"Average time per trial: {total_duration / NUM_TRIALS:.2f} seconds")



Running NSGA-II multiple times to evaluate average performance...


Trial 1
[7.69s] Best Reward = 472.1435
[8.69s] Best Reward = 472.1435
[9.69s] Best Reward = 472.1435
[10.70s] Best Reward = 472.1435
[11.70s] Best Reward = 523.3223
[12.70s] Best Reward = 523.3223
[13.70s] Best Reward = 523.3223
[14.70s] Best Reward = 523.3223
[15.70s] Best Reward = 523.3223
[16.70s] Best Reward = 523.3223
[17.70s] Best Reward = 536.1617
[18.70s] Best Reward = 536.1617
[19.70s] Best Reward = 536.1617
[20.70s] Best Reward = 536.1617
[21.71s] Best Reward = 536.1617
[22.76s] Best Reward = 536.1617
[23.76s] Best Reward = 536.1617
[24.76s] Best Reward = 536.1617
[25.76s] Best Reward = 536.1617
[26.76s] Best Reward = 536.1617
[27.76s] Best Reward = 542.2917
[28.76s] Best Reward = 542.2917
[29.76s] Best Reward = 542.2917
[30.77s] Best Reward = 542.2917
[31.77s] Best Reward = 543.1985
[32.77s] Best Reward = 543.1985
[33.77s] Best Reward = 543.1985
[34.78s] Best Reward = 543.1985
[35.78s] Best Reward = 543.198

KeyboardInterrupt: 

[193.60s] Best Reward = 522.5909
[194.60s] Best Reward = 522.5909
[195.60s] Best Reward = 522.5909
[196.60s] Best Reward = 522.5909
[197.60s] Best Reward = 522.5909
[198.60s] Best Reward = 522.5909
[199.60s] Best Reward = 522.5909
[200.60s] Best Reward = 522.5909
[201.60s] Best Reward = 522.5909
[202.61s] Best Reward = 522.5909
[203.61s] Best Reward = 522.5909
[204.61s] Best Reward = 522.5909
[205.61s] Best Reward = 522.5909
[206.61s] Best Reward = 522.5909
[207.61s] Best Reward = 522.5909
[208.61s] Best Reward = 522.5909
[209.61s] Best Reward = 522.5909
[210.61s] Best Reward = 522.5909
[211.61s] Best Reward = 522.5909
[212.61s] Best Reward = 522.5909
[213.61s] Best Reward = 522.5909
[214.61s] Best Reward = 522.5909
[215.61s] Best Reward = 522.5909
[216.61s] Best Reward = 522.5909
[217.62s] Best Reward = 522.5909
[218.62s] Best Reward = 522.5909
[219.62s] Best Reward = 522.5909
[220.62s] Best Reward = 522.5909
[221.62s] Best Reward = 522.5909
[222.62s] Best Reward = 522.5909
[223.62s] 