In [None]:
import gym
import torch
import torch.nn as nn
import torch.multiprocessing as mp
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Simple Actor-Critic network
class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()
        self.fc = nn.Linear(state_dim, 128)
        self.pi = nn.Linear(128, action_dim)
        self.v = nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.fc(x))
        return F.softmax(self.pi(x), dim=-1), self.v(x)

# Worker process
def worker(worker_id, global_net, optimizer, env_name, result_queue, gamma=0.99):
    env = gym.make(env_name)
    local_net = ActorCritic(env.observation_space.shape[0], env.action_space.n)
    state = env.reset()
    done = False
    total_reward = 0
    cumulative_rewards = []

    while True:
        buffer_s, buffer_a, buffer_r = [], [], []
        for _ in range(5):
            state_tensor = torch.tensor(state, dtype=torch.float32)
            prob, _ = local_net(state_tensor)
            action = torch.multinomial(prob, 1).item()
            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            buffer_s.append(state)
            buffer_a.append(action)
            buffer_r.append(reward)

            state = next_state
            if done:
                cumulative_rewards.append(total_reward)
                state = env.reset()
                total_reward = 0
                break

        R = 0 if done else local_net(torch.tensor(state, dtype=torch.float32))[1].item()
        buffer_v_target = []
        for r in reversed(buffer_r):
            R = r + gamma * R
            buffer_v_target.insert(0, R)

        loss = compute_loss(local_net, buffer_s, buffer_a, buffer_v_target)
        optimizer.zero_grad()
        loss.backward()
        for global_param, local_param in zip(global_net.parameters(), local_net.parameters()):
            global_param._grad = local_param.grad
        optimizer.step()
        local_net.load_state_dict(global_net.state_dict())

        if len(cumulative_rewards) >= 200:
            break

    result_queue.put(cumulative_rewards)

# Compute combined actor-critic loss
def compute_loss(net, states, actions, rewards):
    states = torch.tensor(states, dtype=torch.float32)
    actions = torch.tensor(actions, dtype=torch.int64)
    rewards = torch.tensor(rewards, dtype=torch.float32)

    probs, values = net(states)
    values = values.squeeze()
    advantage = rewards - values.detach()

    action_log_probs = torch.log(probs.gather(1, actions.view(-1, 1)).squeeze())
    actor_loss = -(action_log_probs * advantage).mean()
    critic_loss = F.mse_loss(values, rewards)
    return actor_loss + critic_loss

# Training main function
def train():
    env_name = 'CartPole-v1'  # Replace with your trading env
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    global_net = ActorCritic(state_dim, action_dim)
    global_net.share_memory()
    optimizer = torch.optim.Adam(global_net.parameters(), lr=1e-3)
    result_queue = mp.Queue()
    workers = []

    for i in range(mp.cpu_count()):
        p = mp.Process(target=worker, args=(i, global_net, optimizer, env_name, result_queue))
        p.start()
        workers.append(p)

    all_rewards = []
    for _ in range(len(workers)):
        all_rewards += result_queue.get()

    for p in workers:
        p.join()

    # Plot cumulative profit
    plt.plot(all_rewards)
    plt.xlabel('Episode')
    plt.ylabel('Cumulative Profit')
    plt.grid(True)
    plt.show()

if __name__ == '__main__':
    mp.set_start_method('spawn')
    train()


Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'worker' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/multiprocessing/spawn.py", line 13