# run_car

In [1]:
import datetime
import numpy as np
import random

In [2]:
from cars.world import SimpleCarWorld
from cars.agent import SimpleCarAgent
from cars.physics import SimplePhysics
from cars.track import generate_map

# базовый пример кода
```
if filename:
    agent = SimpleCarAgent.from_file(filename)
    w = SimpleCarWorld(1, m, SimplePhysics, SimpleCarAgent, timedelta=0.2)
    if evaluate:
        print(w.evaluate_agent(agent, steps))
    else:
        w.set_agents([agent])
        w.run(steps)
else:
    w = SimpleCarWorld(1, m, SimplePhysics, SimpleCarAgent, timedelta=0.2)
    w.run(steps)
```

In [3]:
def create_map(seed, agent):
    np.random.seed(seed)
    random.seed(seed)
    m = generate_map(8, 5, 3, 3)
    w = SimpleCarWorld(1, m, SimplePhysics, SimpleCarAgent, timedelta=0.2)
    w.set_agents([agent])
    return w

# Агент

In [4]:
agent = SimpleCarAgent()

agent = SimpleCarAgent.from_file('network_config_agent_0_layers_9_6_1.txt')

# Параметры

In [5]:
train_params = [
    (0.20, 1000,),
    (0.05, 1000,),
]

map_seeds = [23, 15, 21, 42]

# Учим модель

In [6]:
for rap, steps in train_params:
    agent.RANDOM_ACTION_P = rap

    for map_seed in map_seeds:
        print("train on map = {}".format(map_seed))

        # Меняем карту
        w = create_map(seed=map_seed, agent=agent)

        # Учимся
        w.run(steps=steps, visual=False)

        # чистим историю: не учимся повторно на старой карте, ускоряемся
        agent.clear_history()

train on map = 23
revard -1.6876659686
revard -1.20006878203
revard -0.547891006284
revard -0.95166024457
revard -2.0895342805
revard -2.86840708484
revard -1.73541390595
revard -0.415076768006
revard -0.566337963729
revard -0.862029498398
revard -0.962232237086
revard -0.764993703214
revard -0.732291607959
revard -1.0738954538
revard -1.88587089107
revard -2.29244218491
revard -2.59093935174
revard -2.0544639881
revard -0.551559431508
revard -0.045709427616
Saved agent parameters to 'network_config_agent_0_layers_9_6_1.txt'
train on map = 15
revard -0.110746600897
revard -1.11867390173
revard -1.08791238443
revard -0.54745064058
revard -1.35750062921
revard -1.36030105315
revard -0.717639281373
revard -0.719973632371
revard -1.35920980407
revard -2.90377072342
revard -3.99898055588
revard -3.14637493338
revard -1.62224940876
revard -1.00956536552
revard -2.04824629072
revard -2.71723670442
revard -2.58111721422
revard -2.79015032281
revard -3.168666933
revard -3.28708534319
Saved agen

AssertionError: phase((-1.0969788613784897+6.196558717535725j)) = 1.746011 was not found anywhere in the m

### d

In [11]:
from cmath import rect, phase, pi

In [15]:
seed = 42
np.random.seed(seed)
random.seed(seed)
m = generate_map(8, 5, 3, 3)

In [9]:
position = -1.0969788613784897+6.196558717535725j

In [16]:
cur_phase = phase(m[-1][0]) - 2 * pi
for i in range(len(m)):
    prev_phase = cur_phase
    cur_phase = phase(m[i][0])
    print(prev_phase, cur_phase,  "-", min(prev_phase, cur_phase), max(prev_phase, cur_phase))
    if min(prev_phase, cur_phase) < phase(position) <= max(prev_phase, cur_phase):
        # position does not lie between i-1-th and i-th points of m
        print(i)

-9.424777960769378 -2.536968738261595 - -9.424777960769378 -2.536968738261595
-2.536968738261595 -1.0022209839513132 - -2.536968738261595 -1.0022209839513132
-1.0022209839513132 0.17944426583972284 - -1.0022209839513132 0.17944426583972284
0.17944426583972284 1.145864733087902 - 0.17944426583972284 1.145864733087902
1.145864733087902 1.3977272078977956 - 1.145864733087902 1.3977272078977956
1.3977272078977956 1.6495507453753833 - 1.3977272078977956 1.6495507453753833
1.6495507453753833 1.7433157100302203 - 1.6495507453753833 1.7433157100302203
1.7433157100302203 -3.141592653589792 - -3.141592653589792 1.7433157100302203


# Смотрим вживую и продолжаем учиться

In [None]:
agent.RANDOM_ACTION_P = 0.00

for map_seed in map_seeds:
    print("train on map = {}".format(map_seed))

    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)

    # Учимся
    w.run(steps=None, visual=True)

    # чистим историю: не учимся повторно на старой карте, ускоряемся
    agent.clear_history()

# Оцениваем

In [None]:
print("- time = {}, train_params = {}".format(datetime.datetime.now(), train_params))

for map_seed in map_seeds:
    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)
    
    # оцениваем
    revard = w.evaluate_agent(agent, steps=800, visual=False)
    print("  - evaluate_agent on map = {}, reward = {}".format(map_seed, revard))

- train_params = [(0.2, 1000), (0.05, 1000)]
  - evaluate_agent on map = 23, reward = -2.096
  - evaluate_agent on map = 15, reward = -1.1392033828952342
  - evaluate_agent on map = 21, reward = -0.008
  - evaluate_agent on map = 42, reward = -0.016

# Немного покатаемся для отладки

In [None]:
for map_seed in map_seeds:
    # Меняем карту
    w = create_map(seed=map_seed, agent=agent)
    
    # оцениваем
    revard = w.evaluate_agent(agent, steps=200, visual=True)
    print("evaluate_agent on map = {}, reward = {}".format(map_seed, revard))

# Debug

In [None]:
agent = w.agents[0]

In [None]:
np.concatenate([
    np.array(agent.sensor_data_history)[:,:1],
    np.array(agent.chosen_actions_history),
    np.array(agent.reward_history).reshape(-1,1)
], axis=1)

In [None]:
agent.RANDOM_ACTION_P