# run_car

In [1]:
%matplotlib notebook
import matplotlib.pyplot as plt
dyn_plot = True
class DynamicUpdate():
    def on_launch(self):
        #Set up plot
        self.figure, self.ax = plt.subplots()
        self.lines, = self.ax.plot([],[])
        #Autoscale on unknown axis and known lims on the other
        self.ax.set_autoscaley_on(True)
        self.ax.set_autoscalex_on(True)
        #Other stuff
        self.ax.grid()

    def on_running(self, xdata, ydata):
        #Update data (with the new _and_ the old points)
        self.lines.set_xdata(xdata)
        self.lines.set_ydata(ydata)
        #Need both of these in order to rescale
        self.ax.relim()
        self.ax.autoscale_view()
        #We need to draw *and* flush
        self.figure.canvas.draw()
        self.figure.canvas.flush_events()

In [2]:
import datetime
import numpy as np
import random

In [3]:
from cars.world import SimpleCarWorld
from cars.agent import SimpleCarAgent
from cars.physics import SimplePhysics
from cars.track import generate_map

In [4]:
from tqdm import tqdm_notebook

In [5]:
from functools import reduce

In [6]:
import pandas as pd

In [None]:
def create_map(seed, agent):
    np.random.seed(seed)
    random.seed(seed)
    m = generate_map(8, 5, 3, 3)
    w = SimpleCarWorld([agent], m, SimplePhysics, None, timedelta=0.2)
    return w

In [None]:
import seaborn as sns

# Параметры

In [None]:
map_seeds = [3, 13, 23]

In [None]:
# За сколько шагов модель начинает учитывать вознаграждение
map_seed = map_seeds[0]

# Нарабатываем данные, учимся и оцениваемся

In [None]:
log_list = []

In [None]:
def norm_y(y):
    return y / 15.0

In [None]:
def mine_data(agent, action_trainer_params,
              clear_history, map_seed, steps,
              epochs, mini_batch_size, eta, reward_shift, alpha):
    agent.action_trainer.set_params(**action_trainer_params)
    if clear_history:
        agent.clear_history()
    
    # сбрасываем статистику
    agent.action_trainer.reset_steps()
    
    #mine data
    w = create_map(seed=map_seed, agent=agent)
    w.run(steps=tqdm_notebook(range(steps), desc="train", leave=False), visual=False, save=False)

    steps_str = "total: {:d}, bad: {:.4f}, good: {:.4f}, diff: {:.4f}, other: {:.4f}".format(
        agent.action_trainer.steps[0],
        agent.action_trainer.steps[1] / agent.action_trainer.steps[0],
        agent.action_trainer.steps[2] / agent.action_trainer.steps[0],
        agent.action_trainer.steps[3] / agent.action_trainer.steps[0],
        1 - sum(agent.action_trainer.steps[1:]) / agent.action_trainer.steps[0])
    
    # prepare train data
    X_train = np.concatenate([agent.sensor_data_history, agent.chosen_actions_history], axis=1)
    y_train = np.array(agent.reward_history)
    # сглаживаем пики, чтобы сеть небольшая сеть могла дать адекватное предсказание
    mean_train_revard = y_train.mean()
    y_train = norm_y(y_train)
    
    y_train = np.pad(y_train, (0, reward_shift + 1), mode="constant")
    y_train = reduce(lambda a, b: a + b, [y_train[i:(i - reward_shift - 1)] * (alpha ** (i - 1))
                                          for i in range(1, reward_shift+1)])

    #train NN
    train_data = [(x[:, np.newaxis], y) for x, y in zip(X_train, y_train)]
    train_rmse_before = agent.neural_net.evaluate(X_train.T, y_train)
    next(agent.neural_net.SGD(training_data=train_data,
                         epochs=epochs,
                         mini_batch_size=mini_batch_size,
                         eta=eta))
    train_rmse_after = agent.neural_net.evaluate(X_train.T, y_train)

    # evaluate
    w = create_map(seed=map_seed, agent=agent)
    mean_test_reward = w.evaluate_agent(agent, steps=tqdm_notebook(range(800), desc="test", leave=False), visual=False)
    log_message = f"""map_seed = {map_seed}
train_rmse_before = {train_rmse_before:.9f}, train_rmse_after = {train_rmse_after:.9f}, 
mean_train_revard = {mean_train_revard:.3f}, mean_test_reward  = {mean_test_reward:.3f},
steps = {steps_str}"""

    log_list.append(log_message)
    if not dyn_plot:
        chart_count = agent.neural_net.num_layers
        plt.figure(figsize=(5 * chart_count, 2))
        for l in range(chart_count - 1):
            plt.subplot(1, chart_count, l+1)
            ax = sns.heatmap(agent.neural_net.weights[l])

        plt.subplot(1, chart_count, chart_count)    
        plt.text(0.05, 0.95, log_message, size=12, ha='left', va='top', family='monospace')

        plt.show()
    else:
        print(log_message)

In [None]:
kvparams = {
    "clear_history": False,
    "map_seed": map_seed,
    "epochs": 100,
    "mini_batch_size": 50,
    "eta": 0.5,
    "reward_shift": 6,
    "alpha": 0.8
}

act_trainer = {
    "low": 0.0,
    "high": 0.0,
    "random_default": 0.8,
    "random_bad": 0.8,
    "random_good": 0.8,
    "random_diff": 0.8,
    "bad_choice_p": 0.01
}

random.seed(42)
np.random.seed(42)
agent1 = SimpleCarAgent(name="Ivan", history_data=10000)

In [None]:
print("\n".join(log_list))
for _ in tqdm_notebook(range(1), desc="main"):
    mine_data(agent1, act_trainer, steps=4000, **kvparams)

In [None]:
y_train_setting = norm_y(np.array(agent1.reward_history))

mn = y_train_setting.min()
mx = y_train_setting.max()
act_trainer["low"] = mn + (mx - mn) * 0.60
act_trainer["high"] = mn + (mx - mn) * 0.80

act_trainer["random_default"] = 0.50
act_trainer["random_bad"] = 0.80
act_trainer["random_good"] = 0.05
act_trainer["random_diff"] = 0.05

act_trainer["bad_choice_p"] = 0.20

In [None]:
print("\n".join(log_list))

for _ in tqdm_notebook(range(5), desc="main"):
    mine_data(agent1, act_trainer, steps=4000, **kvparams)
    
    y_train_setting = norm_y(np.array(agent1.reward_history))
    mn = y_train_setting.min()
    mx = y_train_setting.max()
    print("y: [{:3f} - {:3f} - {:3f} - {:3f}]".format(mn, mn + (mx - mn) * 0.30, mn + (mx - mn) * 0.70, mx))
    
    act_trainer["low"] = mn + (mx - mn) * 0.60
    act_trainer["high"] = mn + (mx - mn) * 0.80
    print("")

# Смотрим вживую

In [None]:
def visualize(agent, map_seed):
    agent.evaluate_mode=True
    print("visualize on map = {}".format(map_seed))
    agent.clear_history()
    w = create_map(seed=map_seed, agent=agent)
    w.run(steps=None, visual=True, save=False)

In [None]:
visualize(agent1, map_seed)

# Отладка

In [None]:
raise Exception("Здесь отладка")

In [None]:
agent_dbg = agent1

In [None]:
agent_dbg.clear_history()
#mine data
w = create_map(seed=map_seed, agent=agent_dbg)
w.run(steps=tqdm_notebook(range(10000), desc="train", leave=False), visual=False, save=False)

In [None]:
# prepare train data
X_train = np.concatenate([agent_dbg.sensor_data_history, agent_dbg.chosen_actions_history], axis=1)
y_train = np.array(agent_dbg.reward_history)
mean_train_revard = y_train.mean()
y_train = norm_y(y_train)
y_clean = y_train.copy()

mean_train_revard

In [None]:
reward_shift = 6
alpha = 0.8

In [None]:
y_train = np.pad(y_train, (0, 2 * reward_shift), mode="constant")
y_train = reduce(lambda a, b: a + b, [y_train[i:(-2 * reward_shift + i)] * (alpha ** (i - 1))
                                      for i in range(1, reward_shift+1)])

In [None]:
y_predict = agent_dbg.neural_net.feedforward(X_train.T)

In [None]:
pd_visual = pd.DataFrame(X_train,
                         columns=["velocity", "angle"] +
                         ["lid{}".format(i) for i in range(agent_dbg.rays)] +
                         ["steering", "acceleration"])
pd_visual["y_clean"] = y_clean
pd_visual["y_train"] = y_train
pd_visual["y_predict"] = y_predict.flatten()

In [None]:
(pd_visual.y_clean[200:] < 0).argmax()

In [None]:
pd_visual.loc[215:235]

In [None]:
from cars.utils import Action

In [None]:
sensor_info = pd_visual.loc[225].values[:7]
rewards_to_controls_map = []
# дискретизируем множество значений, так как все возможные мы точно предсказать не сможем
for steering in np.linspace(-1, 1, 3):  # выбирать можно и другую частоту дискретизации, но
    for acceleration in np.linspace(-0.75, 0.75, 3):  # в наших тестах будет именно такая
        action = Action(steering, acceleration)
        agent_vector_representation = np.append(sensor_info, action)
        agent_vector_representation = agent_vector_representation.flatten()[:, np.newaxis]
        predicted_reward = float(agent_dbg.neural_net.feedforward(agent_vector_representation))
        rewards_to_controls_map.append((predicted_reward, action))

rewards_to_controls_map

In [None]:
sensor_info = pd_visual.loc[217].values[:7]
rewards_to_controls_map = []
# дискретизируем множество значений, так как все возможные мы точно предсказать не сможем
for steering in np.linspace(-1, 1, 3):  # выбирать можно и другую частоту дискретизации, но
    for acceleration in np.linspace(-0.75, 0.75, 3):  # в наших тестах будет именно такая
        action = Action(steering, acceleration)
        agent_vector_representation = np.append(sensor_info, action)
        agent_vector_representation = agent_vector_representation.flatten()[:, np.newaxis]
        predicted_reward = float(agent_dbg.neural_net.feedforward(agent_vector_representation))
        rewards_to_controls_map.append((predicted_reward, action))

rewards_to_controls_map

In [None]:
plt.ion()

In [None]:
pd_visual[:2000].plot(y=["y_clean", "y_train", "y_predict"], figsize=(64, 4))

In [None]:
rmse_before = ((pd_visual.y_train - pd_visual.y_predict)**2).mean() ** 0.5
rmse_before

In [None]:
train_data = [(x[:, np.newaxis], y) for x, y in zip(X_train, y_train)]

In [None]:
from learning_algorithms.network import Network
agent_dbg.neural_net = Network([9, 12, 1], output_function=lambda x: x, output_derivative=lambda x: 1)

In [None]:
epoch_num = 1
dyp_x = []
dyp_y = []

In [None]:
plt.ion()
dyp = DynamicUpdate()
dyp.on_launch()

try:
    for res in agent_dbg.neural_net.SGD(
            training_data=train_data,
            epochs=100,
            mini_batch_size=50,
            eta=0.5,
            test_data=(X_train.T, y_train)):
        dyp_x.append(epoch_num)
        epoch_num += 1
        dyp_y.append(res)
        if epoch_num % 10 == 0:
            dyp.on_running(dyp_x, dyp_y)
except KeyboardInterrupt:
    print("min rmse: {}".format(min(dyp_y)))

In [None]:

              epochs, mini_batch_size, eta, reward_shift, alpha):

    




    # evaluate
    w = create_map(seed=map_seed, agent=agent)
    mean_test_reward = w.evaluate_agent(agent, steps=tqdm_notebook(range(800), desc="test", leave=False), visual=False)
    log_message = f"""map_seed = {map_seed}
agent_random_action = {agent_random_action:.2f}
train_rmse = {train_rmse:.9f}
mean_train_revard = {mean_train_revard:.3f}
mean_test_reward  = {mean_test_reward:.3f}"""

    chart_count = agent.neural_net.num_layers
    plt.figure(figsize=(5 * chart_count, 2))
    for l in range(chart_count - 1):
        plt.subplot(1, chart_count, l+1)
        ax = sns.heatmap(agent.neural_net.weights[l])
        
    plt.subplot(1, chart_count, chart_count)    
    plt.text(0.05, 0.95, log_message, size=12, ha='left', va='top', family='monospace')
    
    plt.show()