In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import torch
import sys, os
import pystk
import ray
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('device = ', device)
ray.init(logging_level=50)

In [None]:
from state_agent.agents.subnets.actors import SteeringActor, DriftActor, SpeedActor
from state_agent.agents.subnets.agents import Agent, BaseTeam
from state_agent.agents.subnets.utils import run_soccer_agent, rollout_many, show_trajectory_histogram, load_model, save_model
from state_agent.agents.subnets.rewards import SoccerBallDistanceObjective
from state_agent.trainers.train_policy_gradient import reinforce, SoccerReinforcementConfiguration
import numpy as np

In [None]:
data = run_soccer_agent(Agent(SteeringActor(), train=True))

In [None]:
def get_initializations(actor_class):    
    distance_objective = SoccerBallDistanceObjective(150)
    many_actors = [actor_class() for i in range(100)]

    data = rollout_many([
        Agent(actor, accel=0.05) for actor in many_actors
    ], n_steps=600)

    good_initialization = many_actors[ np.argmax([distance_objective.calculate_state_score(d[-1]) for d in data]) ]
    bad_initialization = many_actors[ np.argmin([distance_objective.calculate_state_score(d[-1]) for d in data]) ]
    
    return good_initialization, bad_initialization

good_initialization, _ = get_initializations(SteeringActor)

In [None]:
import copy

#good_initialization = best_steering_net
action_net = copy.deepcopy(good_initialization.action_net)
actors = [SteeringActor(action_net)]

def gen_agent(*args, **kwargs):
    return Agent(*args, accel=0.05, target_speed=10.0, **kwargs)

# configuration
config = SoccerReinforcementConfiguration()
config.agent = gen_agent

# iterations is high relatively here to help force a good outcome from a bad initialization
best_steering_net = reinforce(actors[0], actors, config, 
                              n_epochs=5, n_iterations=500, n_trajectories=200, n_validations=100, T=1
                    )

In [None]:
# save the steering actor
save_model(best_steering_net, 'modules/steering/agent.pt')

In [None]:
# train the speed actor

good_initialization_speed, _ = get_initializations(SpeedActor)

action_net = copy.deepcopy(good_initialization_speed.action_net)
actors = [SteeringActor(best_steering_net, train=False), SpeedActor(action_net)]

def gen_agent(*args, **kwargs):
    reverse = np.random.uniform(0, 1) < 0.1
    speed = np.random.normal(10, 5) * (-1.0 if reverse else 1.0)
    return Agent(*args, target_speed=speed, **kwargs)

# configuration
config = SoccerReinforcementConfiguration()
config.agent = gen_agent

# iterations is high relatively here to help force a good outcome from a bad initialization
best_speed_net = reinforce(actors[1], actors, config, 
                              n_epochs=5, n_iterations=500, n_trajectories=200, n_validations=100, T=1
                    )

In [None]:
data = run_soccer_agent(Agent(SteeringActor(best_steering_net), SpeedActor(action_net), target_speed=6.0), randomize=True)