In [1]:
import sys
sys.path.append("..")
import ScheduleFlow

import numpy as np

In [2]:
num_processing_units = 10

In [3]:
# create workload
def create_job_list(num_processing_units, num_jobs):
    job_list = set()

    for i in range(num_jobs):
        execution_time = np.random.randint(11, 100)
        request_time = execution_time + int(i / 2) * 10
        processing_units = np.random.randint(1, num_processing_units + 1)
        submission_time = 0
        job_list.add(ScheduleFlow.Application(
            processing_units,
            submission_time,
            execution_time,
            [request_time]))
    job_list.add(ScheduleFlow.Application(np.random.randint(9, 11), 0,
                                          100, [90, 135]))
    return job_list

job_list = create_job_list(num_processing_units, 3)
print(list(job_list)[0])
job_list

Job -1: 8 nodes; 0.0 submission time; 39.0 total execution time (49.0 requested)


{Job(Nodes: 8, Submission: 0.0, Walltime: 39.0, Request: 49.0),
 Job(Nodes: 5, Submission: 0.0, Walltime: 81.0, Request: 81.0),
 Job(Nodes: 10, Submission: 0.0, Walltime: 100.0, Request: 90.0),
 Job(Nodes: 1, Submission: 0.0, Walltime: 68.0, Request: 68.0)}

In [4]:
# create the simulator
job_list = create_job_list(num_processing_units, 10)
simulator = ScheduleFlow.Simulator(check_correctness=True,
                                   output_file_handler=sys.stdout,
                                   loops = 10)
simulator

Simulator(GIF: False, Check_correctness: True, Loops: 10, Output: <ipykernel.iostream.OutStream object at 0x7fa574b793c8>, Jobs: 0)

In [5]:
sch = ScheduleFlow.BatchScheduler(ScheduleFlow.System(num_processing_units))
print(sch)

Batch Scheduler: System: 10 total nodes (10 currently free); 0 jobs in queue; 0 jobs running


In [6]:
simulator.create_scenario(sch, job_list=job_list)
simulator

Simulator(GIF: False, Check_correctness: True, Loops: 10, Output: <ipykernel.iostream.OutStream object at 0x7fa574b793c8>, Jobs: 11)

In [7]:
results = simulator.run(metrics=["system"])

Scenario name : system makespan : system utilization : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 
ScheduleFlow : 809.00 : 0.64 : 


In [8]:
results

{'system makespan': 809.0, 'system utilization': 0.6372064276885042}

In [9]:
simulator = ScheduleFlow.Simulator(check_correctness=True)
simulator

Simulator(GIF: False, Check_correctness: True, Loops: 1, Output: None, Jobs: 0)

In [10]:
loops = 2
avg_results = {}
for i in range(loops):
    job_list = create_job_list(num_processing_units, 10)
    results = simulator.run_scenario(
        sch,
        job_list,
        metrics=["utilization", "response time", "wait time"])
    if len(avg_results)==0:
        avg_results = results
    else:
        avg_results = {i:avg_results[i]+results[i] for i in avg_results}
avg_results = {i:avg_results[i]/loops for i in avg_results}
print(avg_results)

{'job response time': 327.59090909090907, 'job utilization': 0.730040741024333, 'job wait time': 236.45833333333334, 'system utilization': 0.6490631919923395}


In [11]:
# get all statistics for the last simulation
simulator.get_stats_metrics(["all"])

{'job failures': 1,
 'job response time': 347.09090909090907,
 'job stretch': 6.0089726156858605,
 'job utilization': 0.7633254713026606,
 'job wait time': 246.08333333333334,
 'system makespan': 539,
 'system utilization': 0.6890538033395176}

In [12]:
execution_log = simulator.get_execution_log()
simulator.test_correctness(execution_log=execution_log)

0

In [13]:
# inject incorrect execution sequences into the log
for job in execution_log:
    # insert an out of order execution for the failed job
    if len(execution_log[job])>1:
        # move the execution of the failed job to the end of the log
        execution_log[job] = [[i[0]+1000, i[1]+1000] for i in execution_log[job]]
        # overlap the beginning of the second instance with the first one
        execution_log[job][1][0] = execution_log[job][0][1] - 1
        execution_log[job][1][1] = execution_log[job][1][0] + job.walltime

simulator.test_correctness(execution_log=execution_log)

Job 0: 10 nodes; 0.0 submission time; 100.0 total execution time (90.0 requested) did not pass the sanity check: [[1000, 1090], [1089, 1189]]


1

In [14]:
# create moments when more than total nodes are allocated
start = 0
for job in execution_log:
    # move the first instance of the failed job to the beginning on the simulation
    if len(execution_log[job])>1:
        if execution_log[job][0][0] == 1000:
            start = 100
        end = start + job.request_walltime
        execution_log[job][0][0] = start
        execution_log[job][0][1] = end
simulator.test_correctness(execution_log=execution_log)

Full schedule did not pass sanity check


3

In [15]:
# plot the two timeslots that fail the full schedule sanity check
for job in execution_log:
    if execution_log[job][0][0] <= end and (execution_log[job][0][0]+job.request_walltime) >= start:
        print(job)
        print(execution_log[job])

Job 0: 10 nodes; 0.0 submission time; 100.0 total execution time (90.0 requested)
[[100, 190], [1089, 1189]]
Job 6: 6 nodes; 0.0 submission time; 96.0 total execution time (116.0 requested)
[[90, 186]]
Job 10: 4 nodes; 0.0 submission time; 59.0 total execution time (99.0 requested)
[[90, 149]]
Job 9: 2 nodes; 0.0 submission time; 49.0 total execution time (89.0 requested)
[[189, 238]]
