# run_car

In [None]:
import datetime
import numpy as np
import random

In [None]:
from cars.world import SimpleCarWorld
from cars.agent import SimpleCarAgent
from cars.physics import SimplePhysics
from cars.track import generate_map

In [None]:
from tqdm import tqdm_notebook

In [None]:
from functools import reduce

In [None]:
import pandas as pd

In [None]:
def create_map(seed, agent):
    np.random.seed(seed)
    random.seed(seed)
    m = generate_map(8, 5, 3, 3)
    w = SimpleCarWorld([agent], m, SimplePhysics, None, timedelta=0.2)
    return w

In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt

class DynamicPlot:
    def __init__(self, xlim=(None, None)):
        self.xlim = xlim
        self.X = []
        self.Y = []
        self.on_launch()

    def on_launch(self):
        self.figure, ax = plt.subplots()
        self.ax = ax
        self.lines, = ax.plot(self.X, self.Y)
        #Autoscale on unknown axis and known lims on the other
        ax.set_autoscaley_on(True)
        ax.set_autoscalex_on(True)
        #ax.set_xlim(*self.xlim)
        #Other stuff
        #ax.grid()

    def clear(self):
        #Update data (with the new _and_ the old points)
        self.X = []
        self.Y = []
        self._update()
        
    def add_point(self, x, y):
        #Update data (with the new _and_ the old points)
        self.X.append(x)
        self.Y.append(y)
        self._update()
    
    def _update(self):
        self.lines.set_xdata(self.X)
        self.lines.set_ydata(self.Y)
        #Need both of these in order to rescale
        self.ax.relim()
        self.ax.autoscale_view()
        #We need to draw *and* flush
        self.figure.canvas.draw()
        self.figure.canvas.flush_events()

In [None]:
import seaborn as sns

In [2]:
plt.ion()
dyp = DynamicPlot()

<IPython.core.display.Javascript object>

In [3]:
for i in range(10):
    dyp.add_point(i, 9)

NotImplementedError: 

# Параметры

In [None]:
map_seeds = [3, 13, 23]

In [None]:
# За сколько шагов модель начинает учитывать вознаграждение
map_seed = map_seeds[0]

# Нарабатываем данные, учимся и оцениваемся

In [None]:
def norm_y(y):
    return y / 15.0

In [None]:
def mine_data(agent, agent_random_action,
              clear_history, map_seed, steps,
              epochs, mini_batch_size, eta, reward_shift, alpha):
    agent.RANDOM_ACTION_P = agent_random_action
    if clear_history:
        agent.clear_history()
    
    #mine data
    w = create_map(seed=map_seed, agent=agent)
    w.run(steps=tqdm_notebook(range(steps), desc="train", leave=False), visual=False, save=False)

    # prepare train data
    X_train = np.concatenate([agent.sensor_data_history, agent.chosen_actions_history], axis=1)
    y_train = np.array(agent.reward_history)
    # сглаживаем пики, чтобы сеть небольшая сеть могла дать адекватное предсказание
    mean_train_revard = y_train.mean()
    y_train = norm_y(y_train)
    
    y_train = np.pad(y_train, (0, reward_shift + 1), mode="constant")
    y_train = reduce(lambda a, b: a + b, [y_train[i:(i - reward_shift - 1)] * (alpha ** (i - 1))
                                          for i in range(1, reward_shift+1)])

    #train NN
    train_data = [(x[:, np.newaxis], y) for x, y in zip(X_train, y_train)]
    agent.neural_net.SGD(training_data=train_data,
                         epochs=epochs,
                         mini_batch_size=mini_batch_size,
                         eta=eta)
    train_rmse = agent.neural_net.evaluate(X_train.T, y_train)

    # evaluate
    w = create_map(seed=map_seed, agent=agent)
    mean_test_reward = w.evaluate_agent(agent, steps=tqdm_notebook(range(800), desc="test", leave=False), visual=False)
    log_message = f"""map_seed = {map_seed}
agent_random_action = {agent_random_action:.2f}
train_rmse = {train_rmse:.9f}
mean_train_revard = {mean_train_revard:.3f}
mean_test_reward  = {mean_test_reward:.3f}"""

    chart_count = agent.neural_net.num_layers
    plt.figure(figsize=(5 * chart_count, 2))
    for l in range(chart_count - 1):
        plt.subplot(1, chart_count, l+1)
        ax = sns.heatmap(agent.neural_net.weights[l])
        
    plt.subplot(1, chart_count, chart_count)    
    plt.text(0.05, 0.95, log_message, size=12, ha='left', va='top', family='monospace')
    
    plt.show()

In [None]:
kvparams = {
    "clear_history": False,
    "map_seed": map_seed,
    "epochs": 40,
    "mini_batch_size": 100,
    "eta": 0.05,
    "reward_shift": 6,
    "alpha": 0.8
}

random.seed(42)
np.random.seed(42)
agent1 = SimpleCarAgent(name="Ivan", history_data=8000)

for agent_random_action in tqdm_notebook(np.arange(1, 0, -.19), desc="main_1"):
    mine_data(agent1, agent_random_action, steps=2000, **kvparams)

In [None]:
for _ in tqdm_notebook(range(5), desc="main_2"):
    mine_data(agent1, agent_random_action=0.05, steps=8000, **kvparams)

На первом слое: скорость, направление, лидары, руль, ускорение 

# Смотрим вживую

In [None]:
def visualize(agent, map_seed):
    agent.evaluate_mode=True
    print("visualize on map = {}".format(map_seed))
    agent.clear_history()
    w = create_map(seed=map_seed, agent=agent)
    w.run(steps=None, visual=True, save=False)

In [None]:
visualize(agent1, map_seed)

# Отладка

In [None]:
raise Exception("Здесь отладка")

In [None]:
agent_dbg = agent1

In [None]:
agent_dbg.clear_history()
#mine data
w = create_map(seed=map_seed, agent=agent_dbg)
w.run(steps=tqdm_notebook(range(10000), desc="train", leave=False), visual=False, save=False)

In [None]:
# prepare train data
X_train = np.concatenate([agent_dbg.sensor_data_history, agent_dbg.chosen_actions_history], axis=1)
y_train = np.array(agent_dbg.reward_history)
y_train = norm_y(y_train)
y_clean = y_train.copy()
mean_train_revard = y_train.mean()
mean_train_revard

In [None]:
reward_shift = 6
alpha = 0.8

In [None]:
y_train = np.pad(y_train, (0, 2 * reward_shift), mode="constant")
y_train = reduce(lambda a, b: a + b, [y_train[i:(-2 * reward_shift + i)] * (alpha ** (i - 1))
                                      for i in range(1, reward_shift+1)])

In [None]:
y_predict = agent_dbg.neural_net.feedforward(X_train.T)

In [None]:
pd_visual = pd.DataFrame(X_train,
                         columns=["velocity", "angle"] +
                         ["lid{}".format(i) for i in range(agent_dbg.rays)] +
                         ["steering", "acceleration"])
pd_visual["y_clean"] = y_clean
pd_visual["y_train"] = y_train
pd_visual["y_predict"] = y_predict.flatten()

In [None]:
pd_visual.head(15)

In [None]:
pd_visual[:2000].plot(y=["y_clean", "y_train", "y_predict"], figsize=(64, 6))

In [None]:
rmse_before = ((pd_visual.y_train - pd_visual.y_predict)**2).mean() ** 0.5
rmse_before

In [None]:
train_data = [(x[:, np.newaxis], y) for x, y in zip(X_train, y_train)]

In [None]:
plt.ion()
dyp = DynamicPlot()

In [None]:
for en, res in enumerate(agent_dbg.neural_net.SGD(
        training_data=train_data,
        epochs=2000,
        mini_batch_size=100,
        eta=0.5,
        test_data=(X_train.T, y_train))):
    dyp.add_point(en, res)

In [None]:

              epochs, mini_batch_size, eta, reward_shift, alpha):

    




    # evaluate
    w = create_map(seed=map_seed, agent=agent)
    mean_test_reward = w.evaluate_agent(agent, steps=tqdm_notebook(range(800), desc="test", leave=False), visual=False)
    log_message = f"""map_seed = {map_seed}
agent_random_action = {agent_random_action:.2f}
train_rmse = {train_rmse:.9f}
mean_train_revard = {mean_train_revard:.3f}
mean_test_reward  = {mean_test_reward:.3f}"""

    chart_count = agent.neural_net.num_layers
    plt.figure(figsize=(5 * chart_count, 2))
    for l in range(chart_count - 1):
        plt.subplot(1, chart_count, l+1)
        ax = sns.heatmap(agent.neural_net.weights[l])
        
    plt.subplot(1, chart_count, chart_count)    
    plt.text(0.05, 0.95, log_message, size=12, ha='left', va='top', family='monospace')
    
    plt.show()