In [1]:
import gymnasium as gym
import random
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
import pandas as pd
import torch
%matplotlib inline

In [2]:
from dqn_agent import Agent
from OPA import OPA
import static_data as sd

total_slot = sd.total_slot              # 泊位总数量
park_slot_index = sd.ops_index          # 普通泊位索引
charge_slot_index = sd.cps_index        # 充电桩索引
slot_index = [park_slot_index,charge_slot_index]   # 两类索引集合
window_time = sd.window_time            # 将时间离散化后的时间间隔总数 这里是195（15min为单位）
total_request = 2120                    # 2000个普通请求+120个充电请求

req_info = pd.read_csv(sd.req_info_path)
req_revenue = np.array((req_info['parking_t'].fillna(0)+req_info['char_t'].fillna(0)).values,dtype=int)
req_type = np.array(req_info["charge_label"],dtype=int)
rmk = np.array(pd.read_csv(sd.r_mk_path))

# 即时决策的话
# 停车场泊位供应状态 + 一个需求信息 + 需求种类
state_size = (total_slot + 1) * window_time + 1
# 动作空间为停车泊位的索引
action_size = total_slot

In [3]:
agent = Agent(state_size=state_size,action_size=action_size,seed=0)
env = OPA()

In [5]:
from copy import deepcopy

def new_state(env_state):
    return torch.concat([torch.tensor(env_state["supply"].flatten()),torch.tensor(env_state["demand"].flatten()),torch.tensor(env_state["type"].flatten())])

def dqn(n_episode=2000,episode_length=total_request,eps_start=1.0,eps_end=0.01,eps_decay=0.995):
    """

    :param n_episode: max number of training episodes
    :param episode_length:
    :param eps_start:
    :param eps_end:
    :param eps_decay:
    :return:
    """
    scores = []
    eps = eps_start
    for i_episode in range(1,n_episode+1):
        state = env.reset()
        copy_state = deepcopy(state)  # 因为是字典 需要深拷贝 否则会修改原state  这个state仍然是字典 可以通过关键字得到对应的值
        agent_state = new_state(copy_state)
        score = 0
        for t in range(episode_length):
            action = agent.act(agent_state,eps)
            next_state,reward,cum_rewards,done = env.step(action)
            copy_next_state = deepcopy(next_state)
            agent_next_state = new_state(copy_next_state)
            agent.step(agent_state,action,reward,agent_next_state,done)
            agent_state = agent_next_state
            score += reward
            if done:
                break
        scores.append(score)
        eps = max(eps_end,eps_decay*eps)
        if i_episode % 100 == 0:
            print('\rEpisode {}\t Score: {:.2f}'.format(i_episode,score), end="")
        if i_episode % 20 == 0:
           torch.save(agent.qnetwork_local.state_dict(), 'checkpoint.pth')
           break

scores = dqn()

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

TypeError: unsqueeze(): argument 'input' (position 1) must be Tensor, not int