In [1]:
import simpy
import numpy as np

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

from evaluation.plot import evolution
from elements.workflow_process_elements import StartEvent, UserTask, connect
from evaluation.statistics import calculate_statistics
from policies.monte_carlo import MC
from simulations import *
%matplotlib notebook


# init q_table and reinforcement learning variables
q_table = np.zeros((100, 100, NUMBER_OF_USERS))
epsilon = 0.1
gamma = 0.9
epochs = 1

for i in range(epochs):
    # creates simulation environment
    env = simpy.Environment()

    # open file and write header
    file_policy,file_statistics,file_policy_name,file_statistics_name = create_files("run{}_mc".format(i))

    # initialize policy
    policy = MC(env, NUMBER_OF_USERS, WORKER_VARAIBILITY, file_policy, file_statistics,q_table,epsilon,gamma)

    # start event
    start_event = StartEvent(env, GENERATION_INTERVAL)

    # user tasks
    user_task = UserTask(env, policy, "User task 1", SERVICE_INTERVAL, TASK_VARIABILITY)

    # connections
    connect(start_event, user_task)

    # calls generation tokens process
    env.process(start_event.generate_tokens())

    # runs simulation
    env.run(until=SIM_TIME)

    # update q_table
    new_q_table = MC.update_q_table(policy)
    q_table = new_q_table

    fig = plt.figure()
    ax= fig.add_subplot(111,projection="3d")
    
    print(len(q_table))
    
    for i in range(len(q_table)):
        ax.scatter(q_table[i,i,0],q_table[i,i,1],np.argmax(q_table[i,i]))

    # ax.set_zticks(1.0,2.0)

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')

    plt.show()

    # close file
    file_policy.close()
    file_statistics.close()

    # calculate statistics and plots
    # calculate_statistics(file_policy_name, outfile="{}.pdf".format(file_policy_name[:-4]))
    # evolution(file_statistics_name, outfile="{}.pdf".format(file_statistics_name[:-4]))

<IPython.core.display.Javascript object>

100
