# string GA - Hill climbing
## With Round-robin / FiFo

In [None]:
import numpy as np
from SC_v12 import simple_conveyor_2
import yaml
import random
from copy import copy
import time
import plotly.express as px
import statistics as sts

def single_point_crossover(input_list):
    """Changes a string with one-point cross-over to a new string"""
    new_list = copy(input_list)
    i1 = random.randint(1,len(new_list)-1)
    i2 = random.randint(1,len(new_list)-1)
    t1 = new_list[i1]
    t2 = new_list[i2]
#     print('t1 = {} and t2 = {}'.format(t1,t2))
    while t1 == t2:
#         print('in while loop')
        i1 = random.randint(1,len(new_list)-1)
        i2 = random.randint(1,len(new_list)-1)
        t1 = new_list[i1]
        t2 = new_list[i2]
#         print('t1 = {} and t2 = {}'.format(t1,t2))
    new_list[i1] = t2
    new_list[i2] = t1
    return new_list

def return_fitness(order_list):
    """"Calculates the fitness of an order set"""""
    env.reset()
    for item in order_list:
        env.step(item)
    while env.demand_queues != [[] * i for i in range(env.amount_of_gtps)]:
        env.step(0) 
    return env.reward

#### Initialize the sequence and the environment
config_path = 'rl/config/simple_conveyor_2.yml'
with open(config_path, 'r') as f:
    config = yaml.load(f)
    
#queues = [random.choices(np.arange(1,config['environment']['amount_gtp']+1), [config['environment']['percentage_small_carriers'], config['environment']['percentage_medium_carriers'], config['environment']['percentage_large_carriers']], k=config['environment']['gtp_buffer_size']) for item in range(config['environment']['amount_gtp'])] # generate random queues
queues = [[2, 3, 2, 1, 3, 2, 3, 3, 3, 2], [3, 2, 3, 2, 2, 3, 1, 2, 2, 3], [2, 2, 1, 2, 2, 3, 2, 3, 2, 3]]
env = simple_conveyor_2(config, queues)


#### build order list
order_list = []
for index in range(len(queues[0])):
    order_list.append([item[index] for item in env.queues])

order_list = [item for sublist in order_list for item in sublist]
order_list = order_list + 3*len(order_list) * [0]


#### Calculate best sequence
fitness = []
t = 0
best_run = return_fitness(order_list)
fitness.append(best_run)
start = time.time()
for i in range(100):
    t+=1
    new_order_list = single_point_crossover(order_list)
    this_run = return_fitness(new_order_list)
    if this_run > best_run:
        order_list = new_order_list
        best_run = this_run
        print('Order list updated, best reward: {}'.format(best_run), end = '\r')
    fitness.append(best_run)
    
while sts.mean(fitness[-100:]) != max(fitness):
    t+=1
    new_order_list = single_point_crossover(order_list)
    this_run = return_fitness(new_order_list)
    if this_run > best_run:
        order_list = new_order_list
        best_run = this_run
        print('Order list updated, best reward: {}'.format(best_run), end = '\r')
    fitness.append(best_run)
delta = time.time() - start
print('Running time: {} minutes'.format(delta/60))
print('Best reward: {} \n Resulting sequence of actions: \n {}'.format(best_run, order_list))


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.



Ep:   886, steps: 479, R: 437.000577

In [None]:
# plot a figure with the reward over time
fig = px.line(x=[i for i in range(len(fitness))], y=fitness, title='Reward')
fig.show()

In [None]:
# and show the best one
env.reset()
for item in order_list:
    env.step(item)
    env.render()
while env.demand_queues != [[] * i for i in range(env.amount_of_gtps)]:
    env.step(0) 
    env.render()

## With Random shuffle

In [None]:
import random

#queues = [random.choices(np.arange(1,config['environment']['amount_gtp']+1), [config['environment']['percentage_small_carriers'], config['environment']['percentage_medium_carriers'], config['environment']['percentage_large_carriers']], k=config['environment']['gtp_buffer_size']) for item in range(config['environment']['amount_gtp'])] # generate random queues
queues = [[2, 3, 2, 1, 3, 2, 3, 3, 3, 2], [3, 2, 3, 2, 2, 3, 1, 2, 2, 3], [2, 2, 1, 2, 2, 3, 2, 3, 2, 3]]
env = simple_conveyor_2(config, queues)


#### build order list
order_list = [item for sublist in queues for item in sublist]
order_list = order_list + 3*len(order_list) * [0]
random.shuffle(order_list)


#### Calculate best sequence
fitness = []
t = 0
best_run = return_fitness(order_list)
fitness.append(best_run)
start = time.time()
for i in range(1000):
    t+=1
    new_order_list = single_point_crossover(order_list)
    this_run = return_fitness(new_order_list)
    if this_run > best_run:
        order_list = new_order_list
        best_run = this_run
        print('Order list updated, best reward: {}'.format(best_run), end = '\r')
    fitness.append(best_run)
    
while sts.mean(fitness[-1000:]) != max(fitness):
    t+=1
    new_order_list = single_point_crossover(order_list)
    this_run = return_fitness(new_order_list)
    if this_run > best_run:
        order_list = new_order_list
        best_run = this_run
        print('Order list updated, best reward: {}'.format(best_run), end = '\r')
    fitness.append(best_run)
delta = time.time() - start
print('Running time: {} minutes'.format(delta/60))
print('Best reward: {} \n Resulting sequence of actions: \n {}'.format(best_run, order_list))

In [None]:
# plot a figure with the reward over time
fig = px.line(x=[i for i in range(len(fitness))], y=fitness, title='Reward')
fig.show()

# Random restart + Hill climbing

# Tabu Search: hill climbing by local search

# Other

In [12]:
order_list = []
for index in range(len(queues[0])):
    order_list.append([item[index] for item in env.queues])

#flat_list = [item for sublist in l for item in sublist]
order_list = [item for sublist in order_list for item in sublist]
#print(order_list)
order_list = order_list + 2*len(order_list) * [0]
#print(order_list)

In [13]:
import time
fitness = []
t = 0
best_run = return_fitness(order_list)
start = time.time()
for i in range(1000):
    t+=1
    new_order_list = single_point_crossover(order_list)
    this_run = return_fitness(new_order_list)
    if this_run > best_run:
        order_list = new_order_list
        best_run = this_run
        print('Order list updated, best reward: {}'.format(best_run), end = '\r')
        fitness.append([t, best_run])
delta = time.time() - start
print('Running time: {} minutes'.format(delta/60))
print('Best reward: {} \n Resulting sequence of actions: \n {}'.format(best_run, order_list))

Running time: 7.839385946591695 minutes
Best reward: 468 
 Resulting sequence of actions: 
 [2, 2, 2, 3, 3, 2, 0, 3, 0, 3, 1, 2, 0, 2, 3, 0, 2, 2, 2, 0, 0, 1, 2, 2, 2, 0, 3, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 2, 0, 0, 2]


In [67]:
import numpy as np
from SC_v12 import simple_conveyor_2
import yaml
import random
from copy import copy


def single_point_crossover(input_list):
    """Changes a string with one-point cross-over to a new string"""
    new_list = copy(input_list)
    i1 = random.randint(1,len(new_list)-1)
    i2 = random.randint(1,len(new_list)-1)
    t1 = new_list[i1]
    t2 = new_list[i2]
    print('t1 = {} and t2 = {}'.format(t1,t2))
    while t1 == t2:
        print('in while loop')
        i1 = random.randint(1,len(new_list)-1)
        i2 = random.randint(1,len(new_list)-1)
        t1 = new_list[i1]
        t2 = new_list[i2]
        print('t1 = {} and t2 = {}'.format(t1,t2))
    new_list[i1] = t2
    new_list[i2] = t1
    return new_list

order_list = [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]

In [74]:
print(order_list)

[1, 1, 1, 0, 0, 0, 0, 0, 0, 0]


In [73]:
print(single_point_crossover(order_list))

t1 = 1 and t2 = 0
[1, 0, 1, 0, 0, 0, 0, 1, 0, 0]


In [5]:
def return_fitness(order_list):
""""Calculates the fitness of an order set"""""
    env.reset()
    for item in order_list:
        env.step(item)
    while env.demand_queues != [[] * i for i in range(env.amount_of_gtps)]:
        env.step(0) 
    return env.reward

INFO:root:setup used: [[2, 3, 2, 1, 3, 2, 3, 3, 3, 2], [3, 2, 3, 2, 2, 3, 1, 2, 2, 3], [2, 2, 1, 2, 2, 3, 2, 3, 2, 3]]


Ep:     2, steps: 479, R: 297.000

297

In [9]:
env.negative_reward

-340

In [None]:
env.reset()
for item in order_list:
    env.step(item)
    env.render()
while env.demand_queues != [[] * i for i in range(env.amount_of_gtps)]:
    env.step(0) 
    env.render()
env.negative_reward

INFO:root:0


Ep:     1, steps:   0, R: 0.000

INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
INFO:root:0
