# Graphs generation

In [1]:
from sampo.generator.base import SimpleSynthetic

In [2]:
GRAPHS_TOP_BORDER = 100
GRAPHS_COUNT = 50

In [3]:
ss = SimpleSynthetic(256)

# Metrics calculation

In [4]:
from sampo.scheduler.selection.neural_net import encode_graph
from sampo.schemas.time import Time
from sampo.scheduler.topological.base import TopologicalScheduler
from sampo.scheduler.heft.base import HEFTScheduler, HEFTBetweenScheduler

import pandas as pd

contractors = [ss.contractor(10)]
schedulers = [HEFTScheduler(), HEFTBetweenScheduler(), TopologicalScheduler()]

def argmin(array) -> int:
    res = 0
    res_v = int(Time.inf())
    for i, v in enumerate(array):
        if v < res_v:
            res_v = v
            res = i
    return res

def generate_graphs(labels_count: int, bin_size: int) -> list[list[int], int]:
    bins = [0 for _ in range(labels_count)]
    result = []

    while any((bin < bin_size for bin in bins)):
        wg = ss.work_graph(top_border=GRAPHS_TOP_BORDER)
        encoding = encode_graph(wg)
        schedulers_results = [int(scheduler.schedule(wg, contractors).execution_time) for scheduler in schedulers]
        generated_label = argmin(schedulers_results)

        if bins[generated_label] < bin_size:
            bins[generated_label] += 1
            result.append((encoding, generated_label))
            if bins[generated_label] % 10 == 0:
                print(f'{generated_label}: {bins[generated_label]}/{bin_size} processed')
    return result

dataset_raw = generate_graphs(len(schedulers), GRAPHS_COUNT)
dataset_raw

0: 10/50 processed
0: 20/50 processed
0: 30/50 processed
0: 40/50 processed
0: 50/50 processed
1: 10/50 processed
1: 20/50 processed
1: 30/50 processed
2: 10/50 processed
1: 40/50 processed
1: 50/50 processed
2: 20/50 processed
2: 30/50 processed
2: 40/50 processed
2: 50/50 processed


[([171, 0.7880104460005154, 66, 29.34502923976608, 66], 0),
 ([122, 0.8480191345736486, 45, 28.811475409836067, 45], 0),
 ([122, 0.8197457566326756, 36, 29.950819672131146, 36], 1),
 ([42, 1.0390333724681309, 24, 33.20238095238095, 24], 0),
 ([113, 1.011349432435296, 37, 32.11504424778761, 37], 1),
 ([128, 0.9561229379408731, 37, 31.98046875, 37], 0),
 ([175, 0.7664429674261881, 60, 29.002857142857142, 60], 0),
 ([37, 1.44096060006218, 23, 39.13513513513514, 23], 0),
 ([168, 0.8384573878381748, 72, 28.577380952380953, 72], 0),
 ([47, 1.4044640112352238, 29, 39.11702127659574, 29], 0),
 ([116, 1.0673168645250388, 41, 32.40948275862069, 41], 0),
 ([39, 1.405978028788074, 20, 38.833333333333336, 20], 0),
 ([122, 0.9544665669155522, 41, 31.668032786885245, 41], 0),
 ([115, 1.2330955892622233, 41, 36.46521739130435, 41], 0),
 ([155, 0.9049337858882347, 47, 30.88064516129032, 47], 0),
 ([125, 0.7572827985194056, 48, 27.504, 48], 0),
 ([125, 0.9227506528732107, 41, 31.212, 41], 0),
 ([112, 1.

In [5]:
import numpy as np

dataset_transposed = np.array(dataset_raw).T
df = pd.DataFrame.from_records(dataset_transposed[0])
df['label'] = dataset_transposed[1]
df

  dataset_transposed = np.array(dataset_raw).T


Unnamed: 0,0,1,2,3,4,label
0,171,0.788010,66,29.345029,66,0
1,122,0.848019,45,28.811475,45,0
2,122,0.819746,36,29.950820,36,1
3,42,1.039033,24,33.202381,24,0
4,113,1.011349,37,32.115044,37,1
...,...,...,...,...,...,...
145,187,0.697383,70,28.374332,70,2
146,139,0.930178,47,30.953237,47,2
147,156,0.896968,68,30.506410,68,2
148,197,0.637514,78,27.444162,78,2


In [6]:
# calculate the minimum uniform sampling size
dataset_size = min(df.groupby('label', group_keys=False).apply(lambda x: len(x)))
dataset_size

50

In [7]:
df = df.groupby('label', group_keys=False).apply(lambda x: x.sample(dataset_size))
df

Unnamed: 0,0,1,2,3,4,label
19,44,1.057673,27,32.613636,27,0
41,37,1.761701,21,44.310811,21,0
47,124,1.055308,44,33.677419,44,0
40,163,0.875346,70,30.714724,70,0
43,167,0.730318,74,26.955090,74,0
...,...,...,...,...,...,...
131,164,0.757259,66,28.460366,66,2
85,229,0.765796,88,29.262009,88,2
125,188,0.726381,66,29.090426,66,2
147,156,0.896968,68,30.506410,68,2


In [8]:
df.to_csv('dataset.csv')