In [1]:
import torch.nn as nn
import torch

In [2]:
def cartpole_model(observation_space, action_space):
    return nn.Sequential(
        nn.Linear(observation_space, 128),
        nn.ReLU(),
        nn.Linear(128, action_space),
        nn.Softmax(dim=1)
    )

In [4]:
def init_weight(module):
    if((type(module) == nn.Linear)):
            nn.init.xavier_uniform_(module.weight.data)
            module.bias.data.fill_(0.00)

In [5]:
def create_agents(num_agents, observation_space, action_space):
    agents = []
    
    for _ in range(num_agents):
        agent = cartpole_model(observation_space, action_space)
        agent.apply(init_weight)
        
        for param in agent.parameters():
            param.requires_grad = False
        
        agent.eval()
        agents.append(agent)
        
    return agents

In [6]:
def eval_agent(agent, env):
    observation = env.reset()
    
    total_reward = 0
    for _ in range(MAX_STEP):
        observation = torch.tensor(observation).type('torch.FloatTensor').view(1,-1)
        action_probablity = agent(observation).detach().numpy()[0]
        action = np.random.choice(range(env.action_space.n), 1, p=action_probablity).item()
        next_observation, reward, terminal, _ = env.step(action)
        total_reward += reward
        observation = next_observation
        
        if terminal:
            break

    return total_reward

In [7]:
def agent_score(agent, env, runs):
    score = 0
    for _ in range(runs):
        score += eval_agent(agent, env)
        
    return score/runs 

In [8]:
def all_agent_score(agents, env, runs):
    agents_score = []
    for agent in agents:
        agents_score.append(agent_score(agent, env, runs))
    
    return agents_score

In [10]:
def mutate(agent):
    child_agent = copy.deepcopy(agent)
    
    for param in agent.parameters():
        mutation_noise = torch.randn_like(param) * MUTATION_POWER
        param += mutation_noise
        
    return child_agent

In [11]:
def elite(agents, top_parents_id, env, elite_id=None, top=10):
    selected_elites = top_parents_id[:top]
    
    if elite_id:
        selected_elites.append(elite_id)
        
    top_score = np.NINF
    top_id = None
    
    for agent_id in selected_elites:
        
        score = agent_score(agents[agent_id], env, runs=5)
        if score > top_score:
            top_score = score
            top_id = agent_id
    
    return copy.deepcopy(agents[top_id])

In [None]:
def child_agents(agents, top_parents_id, env, elite_id=None):
    children = []
    
    agent_count = len(agents)-1
    
    selected_agents_id = np.random.choice(top_parents_id, agent_count)
    selected_agents = [agents[id] for id in selected_agents_id]
    child_agents = [mutate(agent) for agent in selected_agents]