In [34]:
import rl.environments
from os.path import join
from stable_baselines import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import yaml, pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

path = pathlib.Path().absolute()

#for the different designs without imitation learning upfront
to_combine=[
    ['ConveyorEnv12','20210112_1530'], #1x2
    ['ConveyorEnv12','20210112_1445'], #2x2
    ['ConveyorEnv12','20210112_1600'], #2x3
    ['ConveyorEnv12','20210112_1700'], #3x3
    ['ConveyorEnv12','20210112_1800'], #4x3
    ['ConveyorEnv12','20210112_1900']  #5x3
]

#  different designs, with imitation learning upfront
# to_combine=[
#     ['ConveyorEnv12','20210113_0000'], #2x2
#     ['ConveyorEnv12','20210113_0100'], #2x3
#     ['ConveyorEnv12','20210113_0200'], #2x3
#     ['ConveyorEnv12','20210113_0300'], #3x3
#     ['ConveyorEnv12','20210113_0400'], #4x3
#     ['ConveyorEnv12','20210113_0500']  #5x3
# ]

## For the buffer size
# to_combine=[
#     ['ConveyorEnv1','20201217_1400'], #1
#     ['ConveyorEnv1','20201217_1500'], #2
#     ['ConveyorEnv1','20201217_1600'], #3
#     ['ConveyorEnv1','20201217_1700'], #4
#     ['ConveyorEnv1','20201217_1800'], #5
#     ['ConveyorEnv1','20201217_1900'], #6
#     ['ConveyorEnv1','20201217_2000'], #7
#     ['ConveyorEnv1','20201217_2100'], #8
#     ['ConveyorEnv1','20201217_2200'], #9
#     ['ConveyorEnv1','20201217_2300'] #10
    
# ]

# For the pipeline
# to_combine=[
#     ['ConveyorEnv121', '20210113_0500'],  # pipe10
#     ['ConveyorEnv121', '20210113_0530'],  # pipe15
#     ['ConveyorEnv121', '20210113_0600'],  # pipe20
#     ['ConveyorEnv121', '20210113_0630'],  # pipe25
#     ['ConveyorEnv121', '20210113_0700'],  # pipe30
#     ['ConveyorEnv121', '20210113_0730'],  # pipe35
#     ['ConveyorEnv121', '20210113_0800'],  # pipe40
#     ['ConveyorEnv121', '20210113_0830'],  # pipe45
#     ['ConveyorEnv121', '20210113_0900']   # pipe50
# ]

In [35]:
env1, subdir = to_combine[0]
episodes = 100
#combinations = ['1x2', '2x2', '2x3', '3x3', '4x3', '5x3']
#combinations = ['buffersize_{:02d}'.format(i+1) for i in range(11)]
#combinations = ['pipeline_{}'.format(i) for i in range(10,55,5)]
combinations = [item[1] for item in to_combine]

results = {}
for idx, combination in enumerate(to_combine):
    env1, subdir = combination
    # load config and variables needed
    location_path= join(path, 'rl', 'trained_models', env1, subdir)
    with open(join(location_path, 'config.yml'), 'r') as c:
        config = yaml.load(c)
        print('\nLoaded config file from: {}\n'.format(join(location_path, 'config.yml')))
    model_config = config['models']['PPO2']

    #initialize env with the config file
    env_obj = getattr(rl.environments, env1)
    env = env_obj(config)
 

    # load best model from path
    model = PPO2.load(join(location_path, 'best_model.zip'), env=DummyVecEnv([lambda: env]))


    results[combinations[idx]] = {}
    results[combinations[idx]]['configuration'] = '{}x{}'.format(config['environment']['amount_of_gtps'], config['environment']['amount_of_outputs'])
    results[combinations[idx]]['gamma'] = config['models']['PPO2']['gamma']
    results[combinations[idx]]['idle_time'] = 0
    results[combinations[idx]]['cycle_count'] = 0
    results[combinations[idx]]['steps'] = 0
    results[combinations[idx]]['items_processed'] = 0
    results[combinations[idx]]['reward'] = 0

    for episode in range(episodes):
        # Run an episode
        state = env.reset()
        done = False
        while not done:
            action, _ = model.predict(state, deterministic=True)
            state, reward, done, tc = env.step(action)
            results[combinations[idx]]['reward'] += reward
        results[combinations[idx]]['idle_time'] += sum(env.idle_times_operator.values())
        results[combinations[idx]]['cycle_count'] += env.cycle_count
        results[combinations[idx]]['steps'] += env.steps
        results[combinations[idx]]['items_processed'] += env.items_processed
        
    results[combinations[idx]]['idle_time'] = results[combinations[idx]]['idle_time'] / episodes
    results[combinations[idx]]['cycle_count'] = results[combinations[idx]]['cycle_count'] /episodes
    results[combinations[idx]]['steps'] = results[combinations[idx]]['steps'] /episodes
    results[combinations[idx]]['items_processed'] = results[combinations[idx]]['items_processed'] /episodes
    results[combinations[idx]]['reward'] = results[combinations[idx]]['reward'] /episodes


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_0100\config.yml





Ep:   100, steps:  82, R: 9.0000


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_0200\config.yml





Ep:   100, steps:  94, R: 18.000
Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_0300\config.yml




calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.



Ep:   100, steps: 118, R: 33.000
Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_0400\config.yml




calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.



Ep:   100, steps:  81, R: 27.000


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_0500\config.yml





Ep:   100, steps: 1441, R: 462.000


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_1200\config.yml





Ep:   100, steps: 1424, R: 444.000


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_1300\config.yml





Ep:   100, steps:  51, R: -78.0000
Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_1400\config.yml




calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.



Ep:   100, steps: 292, R: 24.0000


calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.




Loaded config file from: D:\Drive\git\RL\rl\trained_models\ConveyorEnv12\20210112_1430\config.yml





Ep:   100, steps: 1345, R: 444.000

In [43]:
resultcsv = pd.DataFrame.from_dict(results).T
#resultcsv.to_csv('evaluation_results/results_DRL.csv'
#resultcsv.to_csv('evaluation_results/results_DRL_without_imit.csv')
resultcsv.to_csv('evaluation_results/results_DRL_buffer.csv')
#resultcsv.to_csv('evaluation_results/results_DRL_pipe.csv')
resultcsv['idle_percent'] = resultcsv.idle_time/resultcsv.steps 
resultcsv['cycle_percent'] = resultcsv.cycle_count/resultcsv.items_processed 
resultcsv

Unnamed: 0,idle_time,cycle_count,steps,items_processed
buffersize_1,161.13,155.39,1527.55,151.02
buffersize_2,183.34,125.95,1521.66,151.02
buffersize_3,39.03,42.35,1460.88,151.03
buffersize_4,3.82,1.53,1424.48,151.02
buffersize_5,16.85,4.78,1442.42,151.04
buffersize_6,0.33,0.57,1430.71,151.04
buffersize_7,0.0,2.88,1436.79,151.03
buffersize_8,0.02,2.34,1423.12,151.04
buffersize_9,0.0,0.55,1444.86,151.01
buffersize_10,0.0,0.78,1442.44,151.01
