In [10]:
import os
import numpy as np
import random
import scipy as sp
from matplotlib import pyplot as plt
import seaborn as sns
import itertools
import copy
import pandas as pd

In [2]:
def generate_sched(stages, transition_proba, length=15):
    """_summary_ To generate a trace
    """
    res = []
    prev = random.choice(stages)
    res.append(prev)
    
    for _ in range(length-1):
        prev = random.choices(stages, transition_proba[prev], k=1)[0]
        res.append(prev)
    return res

def gen_job(stages): 
    return {i: None for i in stages}

def count_done(works, stages):
    res = 0
    latencies = []
    first_stage = stages[0]
    last_stage = stages[-1]
    while works[res][last_stage] is not None:
        w = works[res]
        latencies.append(w[last_stage] - w[first_stage] + 1)
        res += 1
    return res, latencies

In [67]:
def run_simulation_(stages, transitions, total_time, n_cores):
    trace = [generate_sched(stages=stages, transition_proba=transitions, length=total_time) for _ in range(n_cores)]
    lasts = [0 for _ in stages]
    todo = [gen_job(stages) for _ in range(total_time * n_cores // 2)]
    for job in todo:
        job['last_update'] = -1

    for timestamp, workers in enumerate(zip(*trace)):
        workers = list(workers)
        workers.sort()
        for worker in workers:
            if worker == stages[0]:
                if todo[lasts[worker]][worker] is None:
                    todo[lasts[worker]][worker] = timestamp
                    todo[lasts[worker]]['last_update'] = timestamp
                    lasts[worker] += 1
                else:
                    lasts[worker] += 1
                    todo[lasts[worker]][worker] = timestamp
                    todo[lasts[worker]]['last_update'] = timestamp
            else:
                if todo[lasts[worker]][worker-1] is None or todo[lasts[worker]]['last_update'] == timestamp:
                    continue
                else:
                    if todo[lasts[worker]][worker] is None:
                        todo[lasts[worker]][worker] = timestamp
                        todo[lasts[worker]]['last_update'] = timestamp
                        lasts[worker] += 1
                    else:
                        lasts[worker] += 1
                        todo[lasts[worker]][worker] = timestamp
                        todo[lasts[worker]]['last_update'] = timestamp
    total_work, latencies = count_done(todo, stages)

    throughput = total_work / total_time
    tail_latency = np.percentile(latencies, 99)
    mean_latency = np.mean(latencies)
    median_latency = np.median(latencies)

    return throughput, median_latency, mean_latency, tail_latency

def run_simulation(stages, transitions, total_time, n_cores, runs=1):
    names = ['throughput', 'median_latency', 'mean_latency', 'tail_latency']
    data = [run_simulation_(stages, transitions, total_time, n_cores) for _ in range(runs)]
    return pd.DataFrame(data=data, columns=names)

In [None]:
def rand(x, y, n):
    if x == y:
        return 0
    else:
        return 1/(n-1)

def ordered(x, y, n, weight=1):
    if x == y:
        return 0
    m = y - x
    if m < 0:
        m += n
    if m == 1:
        return weight
    else:
        return (1-weight)/(n-2)

def generate_order_transition(weight, stages):
    n_stages = len(stages)
    res = [ [0 for _ in stages ] for _ in stages ]
    
    for x, y in itertools.product(stages, stages):
        res[x][y] = ordered(x, y, n_stages, weight=weight)
    return res

In [69]:
n_stages = 4
stages = [i for i in range(n_stages)]
n_cores = 32
total_time = 1000
proba = [ [0 for _ in stages] for _ in stages ]
random_transitions = [
    [0, .5, .5],
    [.5, 0, .5],
    [.5, .5, 0]
]
ordered_transitions = [
    [0, 1, 0],
    [0, 0, 1],
    [1, 0, 0],
]
semi_ordered_transitions = [
    [0, .99, .01],
    [.01, 0, .99],
    [.99, .01, 0]
]

rand_weights = [.50, .75, .90, .99, 1]

In [70]:
random_transitions = copy.deepcopy(proba)
order_transition = {i: generate_order_transition(i, stages) for i in rand_weights}

for x, y in itertools.product(stages, stages):
    random_transitions[x][y] = rand(x, y, n_stages)

In [71]:
def order_transition(i):
    return generate_order_transition(i, stages)

In [72]:
print("Optimal throughput:", n_cores/n_stages)
print("Optimal latency:", n_stages)

Optimal throughput: 8.0
Optimal latency: 4


In [73]:
df_semi_ordered = run_simulation(stages, random_transitions, total_time, n_cores, runs=1000)
df_semi_ordered.median()

throughput         7.824000
median_latency    17.000000
mean_latency      16.421812
tail_latency      26.000000
dtype: float64

In [74]:
df_random = run_simulation(stages, order_transition(1), total_time, n_cores, runs=1000)
df_random.median()

throughput        7.976
median_latency    4.000
mean_latency      4.000
tail_latency      4.000
dtype: float64

In [75]:
df_random = run_simulation(stages, order_transition(.5), total_time, n_cores, runs=1000)
df_random.median()

throughput         7.832000
median_latency    16.000000
mean_latency      15.865836
tail_latency      25.000000
dtype: float64

In [76]:
df_random = run_simulation(stages, order_transition(.9), total_time, n_cores, runs=1000)
df_random.median()

throughput         7.906000
median_latency    10.000000
mean_latency       9.697151
tail_latency      14.000000
dtype: float64

In [77]:
df_random = run_simulation(stages, order_transition(.99), total_time, n_cores, runs=1000)
df_random.median()

throughput        7.954000
median_latency    6.000000
mean_latency      5.678755
tail_latency      8.000000
dtype: float64

In [78]:
df_random = run_simulation(stages, order_transition(.999), total_time, n_cores, runs=1000)
df_random.median()

throughput        7.970000
median_latency    4.000000
mean_latency      4.455207
tail_latency      5.500000
dtype: float64

In [79]:
df_random = run_simulation(stages, order_transition(.3), total_time, n_cores, runs=1000)
df_random.median()

throughput         7.822000
median_latency    16.000000
mean_latency      16.357977
tail_latency      26.000000
dtype: float64

In [80]:
df_random = run_simulation(stages, order_transition(.2), total_time, n_cores, runs=1000)
df_random.median()

throughput         7.823000
median_latency    17.000000
mean_latency      16.837681
tail_latency      26.000000
dtype: float64

In [81]:
df_random = run_simulation(stages, order_transition(.1), total_time, n_cores, runs=1000)
df_random.median()

throughput         7.819000
median_latency    17.000000
mean_latency      16.867546
tail_latency      26.000000
dtype: float64