In [4]:
import numpy as np
from chargenv import Env
import time


class GreyWolfOptimizer:
    def __init__(self, alpha_pos, alpha_score, dim, wolves_num, iter_num, action_bound, env):
        self.dim = dim  # 动作维度
        self.wolves_num = wolves_num  # 狼群数量
        self.iter_num = iter_num  # 迭代次数
        self.action_bound = action_bound  # 动作的边界
        self.alpha_pos = alpha_pos
        self.alpha_score = alpha_score
        self.beta_pos = np.zeros(dim)
        self.delta_pos = np.zeros(dim)
        self.env = env  # 强化学习环境
        self.MR = 0
        self.positions = np.random.uniform(low=-0.5, high=0.5, size=(self.wolves_num, self.dim))

    def simulate_env(self, action):
        # 这个函数应该根据动作与环境交互并返回累计的奖励
        # 这里是一个示例框架，需要根据你的环境进行具体实现
        total_reward = 0
        self.env.reset()
        for _ in range(24):  # 每个episode的步数为24
            state, reward, done = self.env.step(action)
            total_reward += reward.item()
            if done:
                break
        return total_reward

    def update_alpha_beta_delta(self):
        scores = np.array([self.simulate_env(action) for action in self.positions])
        sorted_indices = np.argsort(scores)
        self.alpha_pos = self.positions[sorted_indices[-1]]
        self.alpha_score = scores[sorted_indices[-1]]
        self.beta_pos = self.positions[sorted_indices[-2]]
        self.delta_pos = self.positions[sorted_indices[-3]]

    def update_positions(self, iteration):
        a = 2 - 2 * (iteration / self.iter_num)  # a从2线性减少到0
        for i in range(self.wolves_num):
            for j in range(self.dim):
                A1 = 2 * a * np.random.random() - a
                C1 = 2 * np.random.random()
                D_alpha = abs(C1 * self.alpha_pos[j] - self.positions[i][j])
                X1 = self.alpha_pos[j] - A1 * D_alpha

                A2 = 2 * a * np.random.random() - a
                C2 = 2 * np.random.random()
                D_beta = abs(C2 * self.beta_pos[j] - self.positions[i][j])
                X2 = self.beta_pos[j] - A2 * D_beta

                A3 = 2 * a * np.random.random() - a
                C3 = 2 * np.random.random()
                D_delta = abs(C3 * self.delta_pos[j] - self.positions[i][j])
                X3 = self.delta_pos[j] - A3 * D_delta

                self.positions[i][j] = (X1 + X2 + X3) / 3

    def optimize(self, start_time):
        
        for iteration in range(self.iter_num):
            self.update_alpha_beta_delta()  # 更新alpha, beta, delta狼的位置
            print("Iteration:", iteration+1, "Score:", self.alpha_score)
            self.update_positions(iteration)  # 更新其它狼的位置
            return_list.append(self.alpha_score)
            if self.alpha_score > self.MR:
                self.MR = self.alpha_score
                end_time = time.time()
                print('MR:', self.MR, 'Time:', end_time - start_time)
        return  self.MR, end_time


# 环境初始化
arrival_rate = 1
data_path = './datasets/EVCD{}.csv'.format(arrival_rate)
env = Env(1, 0, 0, arrival_rate, data_path)
dim = 1  # 动作的维度
# GWO 参数初始化
alpha_pos = np.zeros(dim)  # 最好的狼的位置
alpha_score = float("inf")  # 最好的分数初始化为无穷大
return_list = []
wolves_num = 10  # 狼群的数量
iter_num = 1000  # 迭代次数
action_bound = [-0.5, 0.5]  # 动作的边界
start_time = time.time()
gwo = GreyWolfOptimizer(alpha_pos, alpha_score, dim, wolves_num, iter_num, action_bound, env)


MR, end_time = gwo.optimize(start_time)

print('MR:', MR, 'Time:', end_time-start_time)
with open('./result/GWO{}.csv'.format(arrival_rate), 'w', encoding='utf-8') as file:
    for item in return_list:
        file.write(f"{item}\n")

Iteration: 1 Score: 632.2799850106239
MR: 632.2799850106239 Time: 0.2656283378601074
Iteration: 2 Score: 699.1056889295578
MR: 699.1056889295578 Time: 0.5310978889465332
Iteration: 3 Score: 691.8618742227554
Iteration: 4 Score: 725.7581456899643
MR: 725.7581456899643 Time: 1.0887739658355713
Iteration: 5 Score: 727.8027340173721
MR: 727.8027340173721 Time: 1.4323110580444336
Iteration: 6 Score: 689.48355281353
Iteration: 7 Score: 708.7074770927429
Iteration: 8 Score: 746.5671373605728
MR: 746.5671373605728 Time: 2.2588601112365723
Iteration: 9 Score: 753.5533212423325
MR: 753.5533212423325 Time: 2.5393905639648438
Iteration: 10 Score: 744.1987003087997
Iteration: 11 Score: 741.6996408700943
Iteration: 12 Score: 741.0560567378998
Iteration: 13 Score: 718.1610351800919
Iteration: 14 Score: 725.4612931013107
Iteration: 15 Score: 745.0794032812119
Iteration: 16 Score: 690.3874032497406
Iteration: 17 Score: 735.2440937757492
Iteration: 18 Score: 709.8598631620407
Iteration: 19 Score: 754.27