In [1]:
from torch import nn
import torch
import torch.nn.functional as F
# Project specific inputs
from environment import Environment
from dgnAgent import Agent
from trainer import Trainer
import data_creator as dc
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import plotly
import plotly.graph_objects as go
import plotly.express as px
plotly.offline.init_notebook_mode(connected=True)
from mcts import MCTS
import functions as fc
import operator
import copy
from las import Look_ahead_search as Las
from collections import deque

In [2]:
# Constants
KIND_CARS = 8
INPUT_SEQUENCE_LENGTH = 100
INPUT_WINDOW = 3
OUTPUT_SEQUENCE_LENGTH = 5
NUM_LINES = 2
CAPACITY_LINES = 3


# Constants Agent
BUFFER_SIZE = int(1e4)  # replay buffer size
BATCH_SIZE = 64         # minibatch size
GAMMA = 1          # discount factor
TAU = 0.001#1e-3              # for soft update of target parameters
LR = 5e-4               # learning rate
UPDATE_EVERY = 4        # how often to update the network
SEED = 0

In [3]:
trainer = Trainer('', KIND_CARS, GAMMA)

In [4]:
meta_labels = []
agents = []
labels = []
envs = []

In [5]:
def add_agents(intermediate_layers, series_name):
    current_agents = []
    current_labels = []
    current_envs = []
    # load 64ers
    for i in range(1,10):
        INPUT_WINDOW = i
        # important for network specific stuff
        layers = [(INPUT_WINDOW + OUTPUT_SEQUENCE_LENGTH+NUM_LINES*CAPACITY_LINES)*(KIND_CARS+1)] + intermediate_layers + [NUM_LINES]
        # create name
        name = 'I:' + str(INPUT_WINDOW) + "_O:" + str(OUTPUT_SEQUENCE_LENGTH) + "_N:"
        for layer in layers:
            name += str(layer) + "-"
        name = name[:-1]
        name += "_NL:" + str(NUM_LINES) + "_CL:" + str(CAPACITY_LINES) + "_W:" + str(INPUT_SEQUENCE_LENGTH) + "_KC:" + str(KIND_CARS)

        pathname = fc.get_path() + '/results/'
        pathname += name + '/'

        for filename in os.listdir(pathname):
            if filename[-4:] == '.pth':

                net = Network(layers)
                net.load_state_dict(torch.load(pathname + filename))
                net.eval()
                agent = Agent(net, BUFFER_SIZE, BATCH_SIZE, UPDATE_EVERY, GAMMA, TAU, LR, SEED)
                env = Environment(INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)
                current_labels.append(filename[:-4])
                current_agents.append(agent)
                current_envs.append(env)
        
    agents.append(current_agents)
    envs.append(current_envs)
    labels.append(current_labels)
    meta_labels.append(series_name)
        

In [None]:
class Network(nn.Module):
    def __init__(self, layer_numbers):
        super().__init__()
        self.fc1 = nn.Linear(layer_numbers[0], layer_numbers[1])
        self.fc2 = nn.Linear(layer_numbers[1], layer_numbers[2])
#         self.fc3 = nn.Linear(layer_numbers[2], layer_numbers[3])
#         self.fc4 = nn.Linear(layer_numbers[3], layer_numbers[4])
#         self.fc5 = nn.Linear(layer_numbers[4], layer_numbers[5])
#         self.fc6 = nn.Linear(layer_numbers[5], layer_numbers[6])


    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
#         x = F.relu(x)
#         x = self.fc3(x)
#         x = F.relu(x)
#         x = self.fc4(x)
#         x = F.relu(x)
#         x = self.fc5(x)
#         x = F.relu(x)
#         x = self.fc6(x)
        return x


add_agents([64], "64")
add_agents([32], "32")

In [6]:
class Network(nn.Module):
    def __init__(self, layer_numbers):
        super().__init__()
        self.fc1 = nn.Linear(layer_numbers[0], layer_numbers[1])
        self.fc2 = nn.Linear(layer_numbers[1], layer_numbers[2])
        self.fc3 = nn.Linear(layer_numbers[2], layer_numbers[3])
#         self.fc4 = nn.Linear(layer_numbers[3], layer_numbers[4])
#         self.fc5 = nn.Linear(layer_numbers[4], layer_numbers[5])
#         self.fc6 = nn.Linear(layer_numbers[5], layer_numbers[6])


    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
#         x = F.relu(x)
#         x = self.fc4(x)
#         x = F.relu(x)
#         x = self.fc5(x)
#         x = F.relu(x)
#         x = self.fc6(x)
        return x


add_agents([64,64], "64-64")
add_agents([256,256], "256-256")
# add_agents([32,32], "32-32")
# add_agents([256,256], "256-256")

In [None]:
class Network(nn.Module):
    def __init__(self, layer_numbers):
        super().__init__()
        self.fc1 = nn.Linear(layer_numbers[0], layer_numbers[1])
        self.fc2 = nn.Linear(layer_numbers[1], layer_numbers[2])
        self.fc3 = nn.Linear(layer_numbers[2], layer_numbers[3])
        self.fc4 = nn.Linear(layer_numbers[3], layer_numbers[4])
        self.fc5 = nn.Linear(layer_numbers[4], layer_numbers[5])
        self.fc6 = nn.Linear(layer_numbers[5], layer_numbers[6])


    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        x = F.relu(x)
        x = self.fc5(x)
        x = F.relu(x)
        x = self.fc6(x)
        return x


add_agents([64,64,64,64,64], "64-64-64-64-64")

In [None]:
N = 1000
L = 100
# test_sequences = dc.create_test_sequences('1000-100.test', N, L, KIND_CARS)
test_sequences = dc.load_test_sequences('1000-100.test')

In [None]:
# cut_off = 10
results = []

In [None]:
buffer = np.random.randint(0, KIND_CARS, NUM_LINES * CAPACITY_LINES)
output_sequence = np.random.randint(0, KIND_CARS, OUTPUT_SEQUENCE_LENGTH)
for i, (agents_series, envs_series, labels_series) in enumerate(zip(agents, envs, labels)):
    current_results = []
    for  j,(agent, env, label) in enumerate(zip(agents_series, envs_series, labels_series)):
        current_results.append(trainer.test_agent(env, agent, test_sequences, 250, 90, buffer = buffer, output_sequence = output_sequence, show_picture=False))
        if j == 0:
            break
    results.append(current_results)
    if i == 0:
        break

In [None]:
mc_results = []
for i,test_sequence in enumerate(test_sequences):
    ret = 0
    env = Environment(INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)
    state = env.reset(test_sequence, buffer, output_sequence)
    done = False
    while not done:
        mcts = MCTS(env, 100)
        act = mcts.execute()
        reward, next_state, done = env.step(act)
        ret += reward
    mc_results.append(ret)
    print(str(i) + '/' +str(len(test_sequences)) + " : " + str(ret))

In [None]:
INPUT_WINDOW = 3

class Network(nn.Module):
    def __init__(self, layer_numbers):
        super().__init__()
        self.fc1 = nn.Linear(layer_numbers[0], layer_numbers[1])
        self.fc2 = nn.Linear(layer_numbers[1], layer_numbers[2])
        self.fc3 = nn.Linear(layer_numbers[2], layer_numbers[3])
        # self.fc4 = nn.Linear(layer_numbers[3], layer_numbers[4])
        # self.fc5 = nn.Linear(layer_numbers[4], layer_numbers[5])


    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        # x = F.relu(x)
        # x = self.fc4(x)
        # x = F.relu(x)
        # x = self.fc5(x)
        return x


mce_results = []
for i,test_sequence in enumerate(test_sequences):
    ret = 0
    env = Environment(INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)
    state = env.reset(test_sequence, buffer, output_sequence)
    pathname = fc.get_path()
    policy = pathname + '/results/I:3_O:5_N:126-64-64-2_NL:2_CL:3_W:100_KC:8/I:3_O:5_N:126-64-64-2_NL:2_CL:3_W:100_KC:8_0.pth'
    layers = [(INPUT_WINDOW + OUTPUT_SEQUENCE_LENGTH + NUM_LINES * CAPACITY_LINES) * (KIND_CARS + 1), 64, 64, NUM_LINES]
    net = Network(layers)
    net.load_state_dict(torch.load(policy))
    net.eval()
    agent = Agent(net, BUFFER_SIZE, BATCH_SIZE, UPDATE_EVERY, GAMMA, TAU, LR, SEED)
    done = False
    while not done:
        mcts = MCTS(env, agent, KIND_CARS, 100)
        act = mcts.execute()
        reward, next_state, done = env.step(act)
        ret += reward
    mc_results.append(ret)
    print(str(i) + '/' +str(len(test_sequences)) + " : " + str(ret))
    if i == 200:
        break

In [None]:
eval_numbers = []
for meta_label, labels_series, results_series in zip(meta_labels, labels,  results):
    current_eval_numbers = []
    for i in range(9):
        x  =  np.max([np.sum(results_series[2*i]), np.sum(results_series[2*i+1])])
        x  = x * (-1) / (N*L)
        current_eval_numbers.append(x)
    eval_numbers.append(current_eval_numbers)

In [None]:
fig = go.Figure()
for i, (label, numbers) in enumerate(zip(meta_labels, eval_numbers)):
    fig.add_trace(go.Scatter(x=np.linspace(1, len(eval_numbers[0]), len(eval_numbers[0])), y=numbers, mode='lines', name=label))
fig.show()

In [None]:
fig.update_layout(height=600, width=1200, title_text="Subplots")
plotly.offline.plot(fig, filename='compare-agents.html')

In [None]:
# results of 64 64 network 
fig = go.Figure()
for i, (label, res) in enumerate(zip(labels, results)):
    fig.add_trace(go.Scatter(x=np.linspace(1, N-cut_off, N), y=results[i][1][cut_off:], mode='lines', name=labels[i]))
fig.show()

In [None]:
bar_results = []
high_score_bar_results = []
bar_labels = []
high_score_bar_labels = []
for label, result in zip(labels,results):
    x = np.sum(result)*(-1) / (N*L)
    if label[-1] == 'e':
        high_score_bar_results.append(x)
        high_score_bar_labels.append(label)
    else:
        bar_results.append(x)
        bar_labels.append(label)


results = []
for a,b in zip(bar_results, high_score_bar_results):
    results.append(np.max([a,b]))
    
fig = go.Figure(data=[
    go.Bar(name='best', x=bar_labels, y=results),
    go.Bar(name='bar_results', x=bar_labels, y=bar_results),
    go.Bar(name='high_score_bar_results', x = bar_labels, y=high_score_bar_results),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

In [None]:
a = [1]
b = [2,3]
c = [4]
a + b + c

In [None]:
comp_results = []
for i in range(201):
    comp_results.append(results[0][2][0][i])

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=np.linspace(1, 200, 200), y=comp_results, mode='markers', name='DQN compare'))
fig.add_trace(go.Scatter(x=np.linspace(1, 200, 200), y=mc_results, mode='markers', name='MCTS'))
fig.add_trace(go.Scatter(x=np.linspace(1, 200, 200), y=mce_results, mode='markers', name='Expert MCTS'))


fig.show()

In [None]:
dqn_means = []
mc_means = []
mce_means = []
depth_means = []
for i in range(201):
    if i < 50:
        continue
    upper = i
    under = upper - 50
    dqn_means.append(np.mean(comp_results[under:upper]))
    mc_means.append(np.mean(mc_results[under:upper]))
    mce_means.append(np.mean(mce_results[under:upper]))
    depth_means.append(np.mean(depth_results[under:upper]))

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=dqn_means, mode='lines', name='DQN compare'))
fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=mc_means, mode='lines', name='MCTS'))
fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=mce_means, mode='lines', name='Expert MCTS'))


fig.show()

In [None]:
dqn_eval_num = np.mean(comp_results)
mc_eval_num = np.mean(mc_results)
mce_eval_num = np.mean(mce_results)
print("DQN", "MC", "MCE")
print(dqn_eval_num, mc_eval_num, mce_eval_num)

In [None]:
MAX_DEPTH = 20
class Search_node():
    def __init__(self, env, value = 0, depth = 1, visited = []):
        self.env = env
        self.visited = visited
        self.value = value
        self.done = env.done
        self.depth = depth

    def explore(self):
        res = []
        for i in range(NUM_LINES):
            child_visited = copy.deepcopy(self.visited)
            child_visited.append(i)
            child_env, (reward, next_state, done) = self.env.light_step(i)
            child = Search_node(child_env, self.value + reward, self.depth+1, child_visited)
            res.append(child)
            
        return res
    
    def show(self):
        print(self.value, self.visited)
        
    def terminal(self):
        return (self.done or self.depth == MAX_DEPTH) 

def optimal(env):
    result = None
    root = Search_node(env)
    open_list = [root]
    while len(open_list) != 0:
        actual = open_list.pop()
        if actual.terminal():
#             print("Best result found!")
#             actual.show()
            result = actual
            break
        children = actual.explore()
        for child in children:
            open_list.append(child)
        open_list.sort(key=operator.attrgetter('value'))
    
    return result.visited[0]

In [None]:
depth_results = []
actions = []
for i,test_sequence in enumerate(test_sequences):
    ret = 0
    current_actions = []
    env = Environment(    env = Environment(INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)

INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)
    state = env.reset(test_sequence, buffer, output_sequence)
    done = False
    while not done:
        act = optimal(env)
        current_actions.append(act)
        reward, next_state, done = env.step(act)
        ret += reward
    actions.append(current_actions)
    depth_results.append(ret)
    print(str(i) + '/' +str(len(test_sequences)) + " : " + str(ret))
    if i == 0:
        break

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=dqn_means, mode='lines', name='DQN compare'))
fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=mc_means, mode='lines', name='MCTS'))
fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=mce_means, mode='lines', name='Expert MCTS'))
fig.add_trace(go.Scatter(x=np.linspace(1, 151, 151), y=depth_means, mode='lines', name='depth'))



fig.show()

In [None]:
dqn_hand_results = []
actions = []
for i,test_sequence in enumerate(test_sequences):
    ret = 0
    current_actions = []
    env = Environment(INPUT_SEQUENCE_LENGTH, KIND_CARS, NUM_LINES, CAPACITY_LINES, OUTPUT_SEQUENCE_LENGTH, INPUT_WINDOW)
    state = env.reset(test_sequence, buffer, output_sequence)
    done = False
    while not done:
        count += 1
        state = fc.linearize([state], KIND_CARS)
        act = agent.act(state)
        current_actions.append(act)
        reward, next_state, done = env.step(act)
        ret += reward
    actions.append(current_actions)
    depth_results.append(ret)
    print(str(i) + '/' +str(len(test_sequences)) + " : " + str(ret))
    if i == 0:
        break

In [None]:
agent = agents[0][4]
env = envs[0][4]
_, _, ca = trainer.test_agent(env, agent, test_sequences[0:1], 250, 90, buffer = buffer, output_sequence = output_sequence, show_picture=False)

In [None]:
label = labels[0][6]
label

In [None]:
agent = agents[0][6]
env = envs[0][6]
buffer = np.random.randint(0, KIND_CARS, NUM_LINES * CAPACITY_LINES)
output_sequence = np.random.randint(0, KIND_CARS, OUTPUT_SEQUENCE_LENGTH)

In [None]:
scores, means, actions = trainer.test_agent(env, agent, test_sequences, 250, 1000, buffer, output_sequence)

In [None]:
ls = []
depth = 2
las = Las(depth)
car_count = 0
sum_ret = 0
for i, test_sequence in enumerate(test_sequences):
    ret = 0 
    done = False
    env.reset(test_sequence, buffer, output_sequence)
    while not done:
        car_count += 1
        action = las.act(env)
        reward, _, done = env.step(action)
        ret += reward 
#     print(i, ':', ret)
    ls.append(ret)
    sum_ret += ret
relative = sum_ret/len(test_sequences)
relative_t = sum_ret/car_count
print("For Depth "  + str(depth) , sum_ret, car_count, relative, relative_t)

In [None]:
la2_means = []
scores_window = deque(maxlen=250)
for score in ls:
    scores_window.append(score)
    la2_means.append(np.mean(scores_window))

In [None]:
fig = go.Figure()
length = len(means)
fig.add_trace(go.Scatter(x=np.linspace(1, length, length), y=means, mode='markers', name='Learning'))
fig.add_trace(go.Scatter(x=np.linspace(1, length, length), y=la_means, mode='markers', name='LA3'))
fig.add_trace(go.Scatter(x=np.linspace(1, length, length), y=la2_means, mode='markers', name='LA2'))



In [None]:
np.mean(la_means)