In [7]:
import pandas as pd

def find_row(data, alldata, status):
    # 将status字典转换为DataFrame中行的形式
    status_series = pd.Series(status)
    
    # 检查data中每一行的前8列
    for index, row in data.iterrows():
        if row[:8].equals(status_series):
            return row  # 找到匹配的行，返回这一行
    
    # 如果在data中没有找到，检查alldata
    for index, row in alldata.iterrows():
        if row[:8].equals(status_series):
            return row  # 找到匹配的行，返回这一行
    
    # 如果两个DataFrame都没有找到匹配的行，返回None
    return None

# 假设你已经有两个DataFrame data和alldata
data = pd.DataFrame({
    'core': [1, 2, 3],
    'l1i_size': [32, 64, 128],
    'l1d_size': [32, 64, 128],
    'l2_size': [256, 512, 1024],
    'l1d_assoc': [4, 4, 4],
    'l1i_assoc': [4, 4, 4],
    'l2_assoc': [8, 8, 8],
    'sys_clock': [2.4, 2.5, 2.6],
    'latency': [10, 20, 30],
    'area': [100, 200, 300],
    'power': [50, 60, 70]
})

alldata = pd.DataFrame({
    'core': [1, 5, 6],
    'l1i_size': [32, 512, 1024],
    'l1d_size': [32, 512, 1024],
    'l2_size': [256, 2048, 4096],
    'l1d_assoc': [4, 4, 4],
    'l1i_assoc': [4, 4, 4],
    'l2_assoc': [8, 8, 8],
    'sys_clock': [2.4, 2.8, 2.9],
    'latency': [10, 50, 60],
    'area': [100, 500, 600],
    'power': [50, 90, 100]
})

# status结构体，现假定为一个字典，与DataFrame的列结构相同
status = {
    'core': 1,
    'l1i_size': 32,
    'l1d_size': 32,
    'l2_size': 256,
    'l1d_assoc': 4,
    'l1i_assoc': 4,
    'l2_assoc': 8,
    'sys_clock': 2.4
}

result = find_row(data, alldata, status)
row = find_row(data,alldata,status)
# 检查结果是否非空，然后添加到data中
if row is not None:
    data = pd.concat([data, pd.DataFrame([row])], ignore_index=True)
    print("Row added successfully!")
else:
    print("No matching row found")
    
# data

result_dict = row[-3:].to_dict()

result_dict

Row added successfully!


{'latency': 10.0, 'area': 100.0, 'power': 50.0}

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

# 超参数
NUM_AGENTS = 2
STATE_DIM = 24
ACTION_DIM = 2
HIDDEN_SIZE = 128
LR = 3e-4
GAMMA = 0.99
CLIP_EPS = 0.2
PPO_EPOCHS = 10
BATCH_SIZE = 64
MAX_STEPS = 1000

# 简单的多智能体环境
class SimpleMultiAgentEnv:
    def reset(self):
        # 返回每个智能体的初始状态
        return [np.random.randn(STATE_DIM) for _ in range(NUM_AGENTS)]
    
    def step(self, actions):
        # 返回下一个状态、奖励、完成标志和额外信息
        next_states = [np.random.randn(STATE_DIM) for _ in range(NUM_AGENTS)]
        rewards = [1.0 for _ in range(NUM_AGENTS)]  # 简单的固定奖励
        done = False
        return next_states, rewards, done, {}

# 策略网络
class PolicyNetwork(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_size):
        super(PolicyNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU()
        )
        self.action_head = nn.Linear(hidden_size, action_dim)
    
    def forward(self, x):
        x = self.fc(x)
        action_logits = self.action_head(x)
        return action_logits

# 价值网络
class ValueNetwork(nn.Module):
    def __init__(self, state_dim, hidden_size):
        super(ValueNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
    
    def forward(self, x):
        value = self.fc(x)
        return value

# 经验回放缓冲区
class RolloutBuffer:
    def __init__(self):
        self.states = []
        self.actions = []
        self.log_probs = []
        self.rewards = []
        self.dones = []
        self.values = []
    
    def clear(self):
        self.states = []
        self.actions = []
        self.log_probs = []
        self.rewards = []
        self.dones = []
        self.values = []

# 初始化环境和网络
env = SimpleMultiAgentEnv()
policies = [PolicyNetwork(STATE_DIM, ACTION_DIM, HIDDEN_SIZE) for _ in range(NUM_AGENTS)]
value_net = ValueNetwork(STATE_DIM, HIDDEN_SIZE)
optimizer_policy = optim.Adam([param for policy in policies for param in policy.parameters()], lr=LR)
optimizer_value = optim.Adam(value_net.parameters(), lr=LR)

buffer = RolloutBuffer()

# 选择动作
def select_action(state, policy):
    state = torch.FloatTensor(state).unsqueeze(0)
    logits = policy(state)
    dist = Categorical(logits=logits.softmax(dim=-1))
    action = dist.sample()
    return action.item(), dist.log_prob(action), dist.entropy()

# 计算优势
def compute_advantages(rewards, dones, values, next_value):
    advantages = []
    gae = 0
    for step in reversed(range(len(rewards))):
        delta = rewards[step] + GAMMA * next_value * (1 - dones[step]) - values[step]
        gae = delta + GAMMA * 0.95 * (1 - dones[step]) * gae
        advantages.insert(0, gae)
        next_value = values[step]
    returns = [adv + val for adv, val in zip(advantages, values)]
    return advantages, returns

# 主训练循环
for episode in range(1000):
    states = env.reset()
    buffer.clear()
    for step in range(MAX_STEPS):
        actions = []
        log_probs = []
        values = []
        for i, state in enumerate(states):
            action, log_prob, _ = select_action(state, policies[i])
            actions.append(action)
            log_probs.append(log_prob)
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            value = value_net(state_tensor)
            values.append(value.item())
        
        buffer.states.append(states)
        buffer.actions.append(actions)
        buffer.log_probs.append(log_probs)
        buffer.values.append(values)
        
        next_states, rewards, done, _ = env.step(actions)
        buffer.rewards.append(rewards)
        buffer.dones.append(done)
        states = next_states
        
        if done:
            break
    
    # 计算最后一个状态的价值
    next_values = []
    for i, state in enumerate(states):
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        value = value_net(state_tensor)
        next_values.append(value.item())
    
    advantages, returns = compute_advantages(buffer.rewards, buffer.dones, buffer.values, next_values[0])
    
    # 转换为张量
    states = [s for episode_states in buffer.states for s in episode_states]
    actions = [a for episode_actions in buffer.actions for a in episode_actions]
    old_log_probs = [lp for episode_lps in buffer.log_probs for lp in episode_lps]
    advantages = torch.FloatTensor(advantages)
    returns = torch.FloatTensor(returns)
    
    # 更新策略网络
    for _ in range(PPO_EPOCHS):
        # 随机采样批次
        indices = np.random.randint(0, len(states), size=BATCH_SIZE)
        sampled_states = torch.FloatTensor([states[i] for i in indices])
        sampled_actions = torch.LongTensor([actions[i] for i in indices])
        sampled_old_log_probs = torch.stack([old_log_probs[i] for i in indices])
        sampled_advantages = advantages[indices]
        sampled_returns = returns[indices]
        
        # 计算新的log_probs
        new_log_probs = []
        for i, policy in enumerate(policies):
            logits = policy(sampled_states)
            dist = Categorical(logits=logits.softmax(dim=-1))
            log_prob = dist.log_prob(sampled_actions[i])
            new_log_probs.append(log_prob)
        
        # 计算策略损失
        ratios = torch.exp(torch.stack(new_log_probs) - sampled_old_log_probs)
        surr1 = ratios * sampled_advantages
        surr2 = torch.clamp(ratios, 1.0 - CLIP_EPS, 1.0 + CLIP_EPS) * sampled_advantages
        policy_loss = -torch.min(surr1, surr2).mean()
        
        # 计算价值损失
        values = value_net(sampled_states).squeeze()
        value_loss = nn.MSELoss()(values, sampled_returns)
        
        # 总损失
        loss = policy_loss + 0.5 * value_loss
        
        # 反向传播
        optimizer_policy.zero_grad()
        optimizer_value.zero_grad()
        loss.backward()
        optimizer_policy.step()
        optimizer_value.step()
    
    if episode % 10 == 0:
        print(f"Episode {episode}, Loss: {loss.item()}")

print("训练完成！")


TypeError: can only concatenate list (not "float") to list