# run_car

In [1]:
import datetime
import numpy as np
import random

In [2]:
from cars.world import SimpleCarWorld
from cars.agent import SimpleCarAgent
from cars.physics import SimplePhysics
from cars.track import generate_map

# базовый пример кода
```
if filename:
    agent = SimpleCarAgent.from_file(filename)
    w = SimpleCarWorld(1, m, SimplePhysics, SimpleCarAgent, timedelta=0.2)
    if evaluate:
        print(w.evaluate_agent(agent, steps))
    else:
        w.set_agents([agent])
        w.run(steps)
else:
    w = SimpleCarWorld(1, m, SimplePhysics, SimpleCarAgent, timedelta=0.2)
    w.run(steps)
```

In [3]:
def create_map(seed, agent):
    np.random.seed(seed)
    random.seed(seed)
    m = generate_map(8, 5, 3, 3)
    w = SimpleCarWorld([agent], m, SimplePhysics, None, timedelta=0.2)
    return w

# Агент

In [4]:
random.seed(42)
np.random.seed(42)
agent = SimpleCarAgent(name="Ivan")

agent = SimpleCarAgent.from_file('network_config_agent_0_layers_9_6_1.txt')

# Параметры

In [5]:
train_params = [
    (0.20, 1000,),
    #(0.05, 1000,),
]

map_seeds = [23, 15, 21, 42]

# Учим модель

In [6]:
for rap, steps in train_params:
    agent.RANDOM_ACTION_P = rap

    for map_seed in map_seeds:
        print("train on map = {}".format(map_seed))

        # Меняем карту
        w = create_map(seed=map_seed, agent=agent)

        # Учимся
        w.run(steps=steps, visual=False)

        # чистим историю: не учимся повторно на старой карте, ускоряемся
        agent.clear_history()

train on map = 23
revard -0.064
revard -1.35801062695
revard -1.69651846736
revard -0.661587141241
revard -0.704495426504
revard -1.22663147661
revard -1.03628264085
revard -0.687414405118
revard -1.02693676507
revard -1.13404161575
revard -1.16615853989
revard -0.873867338079
revard -1.12617201095
revard -2.25394622864
revard -1.85885891923
revard -1.17164185309
revard -1.1181635634
revard -0.485261721807
revard -0.754479980969
revard -0.883449010199
Saved agent parameters to 'network_config_agent_0_layers_9_6_1.txt'
train on map = 15
revard -0.899356090711
revard -0.652099246144
revard -0.298421200788
revard -0.832
revard -1.12
revard -0.896
revard -0.544
revard -0.064
revard -0.0866323258665
revard -0.630632325866
revard -0.8
revard -0.365982992418
revard -0.307889491875
revard -0.524790464769
revard -0.358883965312
revard -0.367221315967
revard -1.29799495197
revard -1.36243445457
revard -0.578664532156
revard -0.365577635071
Saved agent parameters to 'network_config_agent_0_layers

# Смотрим вживую и продолжаем учиться

In [8]:
agent.RANDOM_ACTION_P = 0.00

for map_seed in map_seeds[:1]:
    print("train on map = {}".format(map_seed))

    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)

    # Учимся
    w.run(steps=None, visual=True)

train on map = 23
Saved agent parameters to 'network_config_agent_0_layers_9_6_1.txt'


In [None]:
# чистим историю: не учимся повторно на старой карте, ускоряемся
agent.clear_history()

# Оцениваем

In [7]:
print("- time = {}, train_params = {}".format(datetime.datetime.now(), train_params))

for map_seed in map_seeds:
    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)
    
    # оцениваем
    revard = w.evaluate_agent(agent, steps=800, visual=False)
    print("  - evaluate_agent on map = {}, reward = {}".format(map_seed, revard))

- time = 2018-01-24 00:41:18.886220, train_params = [(0.2, 1000)]
  - evaluate_agent on map = 23, reward = -1.7741872447429086
  - evaluate_agent on map = 15, reward = -1.5800781959803243
  - evaluate_agent on map = 21, reward = -0.8135290975234429
  - evaluate_agent on map = 42, reward = -1.7857630295989895


- time = 2018-01-24 00:37:51.303347, train_params = [(0.2, 1000)]
  - evaluate_agent on map = 23, reward = -1.7741872447429086
  - evaluate_agent on map = 15, reward = -1.5800781959803243
  - evaluate_agent on map = 21, reward = -0.8135290975234429
  - evaluate_agent on map = 42, reward = -1.7857630295989895


# Немного покатаемся для отладки

In [None]:
for map_seed in map_seeds:
    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)
    
    # оцениваем
    revard = w.evaluate_agent(agent, steps=200, visual=True)
    print("evaluate_agent on map = {}, reward = {}".format(map_seed, revard))

# Debug

In [None]:
agent = w.agents[0]

In [None]:
np.concatenate([
    np.array(agent.sensor_data_history)[:,:1],
    np.array(agent.chosen_actions_history),
    np.array(agent.reward_history).reshape(-1,1)
], axis=1)

In [None]:
agent.RANDOM_ACTION_P