In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import torch
import sys, os
import pystk
import ray
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('device = ', device)
ray.init(logging_level=50)

In [None]:
from utils.actors import new_action_net, Agent, TrainingAgent, SteeringActor, DriftActor
from utils.utils import run_soccer_agent, rollout_many, show_trajectory_histogram
from utils.rewards import SoccerBallDistanceObjective
import numpy as np

In [None]:
action_net = new_action_net()
data = run_soccer_agent(TrainingAgent(SteeringActor(action_net)))

In [None]:
distance_objective = SoccerBallDistanceObjective(150)
many_action_nets = [new_action_net() for i in range(100)]

data = rollout_many([TrainingAgent(SteeringActor(action_net), accel=0.05) for action_net in many_action_nets], mode="soccer", n_steps=600)

print([distance_objective.calculate_state_score(d[-1]) for d in data])

good_initialization = many_action_nets[ np.argmax([distance_objective.calculate_state_score(d[-1]) for d in data]) ]
bad_initialization = many_action_nets[ np.argmin([distance_objective.calculate_state_score(d[-1]) for d in data]) ]

In [None]:
data = run_soccer_agent(Agent(SteeringActor(good_initialization), accel=0.05))

In [None]:
from utils.reinforce import reinforce
from utils.utils import SoccerReinforcementConfiguration

import copy

#good_initialization = best_steering_net
action_net = copy.deepcopy(good_initialization)
actors = [SteeringActor(action_net, reward_type="angle")]

def gen_agent(*args, **kwargs):
    return Agent(*args, accel=0.05, **kwargs)

def gen_training_agent(*args, **kwargs):
    return TrainingAgent(*args, accel=0.05, **kwargs)

# configuration
config = SoccerReinforcementConfiguration()
config.agent = gen_agent
config.training_agent = gen_training_agent

# iterations is high relatively here to help force a good outcome from a bad initialization
best_steering_net = reinforce(actors[0], actors, config, n_epochs=5, n_iterations=500, n_trajectories=200, n_validations=100, T=1)

In [None]:
data = run_soccer_agent(Agent(SteeringActor(best_steering_net), accel=0.2), randomize=True)