# Graphs generation

In [1]:
from sampo.generator.base import SimpleSynthetic

In [2]:
GRAPHS_TOP_BORDER = 100
GRAPHS_COUNT = 3

In [3]:
ss = SimpleSynthetic(256)

# Metrics calculation

In [4]:
from sampo.scheduler.selection.neural_net import encode_graph
from sampo.schemas.time import Time
from sampo.scheduler.topological.base import TopologicalScheduler
from sampo.scheduler.heft.base import HEFTScheduler, HEFTBetweenScheduler

import pandas as pd

contractors = [ss.contractor(10)]
schedulers = [HEFTScheduler(), HEFTBetweenScheduler(), TopologicalScheduler()]

def argmin(array) -> int:
    res = 0
    res_v = int(Time.inf())
    for i, v in enumerate(array):
        if v < res_v:
            res_v = v
            res = i
    return res

def generate_graphs(labels_count: int, bin_size: int) -> list[list[int], int]:
    bins = [0 for _ in range(labels_count)]
    result = []

    while any((bin < bin_size for bin in bins)):
        wg = ss.work_graph(top_border=GRAPHS_TOP_BORDER)
        encoding = encode_graph(wg)
        schedulers_results = [int(scheduler.schedule(wg, contractors).execution_time) for scheduler in schedulers]
        generated_label = argmin(schedulers_results)

        if bins[generated_label] < bin_size:
            bins[generated_label] += 1
            result.append((encoding, generated_label))
            if bins[generated_label] % 10 == 0:
                print(f'{generated_label}: {bins[generated_label]}/{bin_size} processed')
    return result

dataset_raw = generate_graphs(len(schedulers), GRAPHS_COUNT)

In [5]:
dataset_raw

[([171,
   0.7880104460005154,
   66,
   29.34502923976608,
   66,
   1,
   1,
   4,
   133,
   453,
   933,
   993,
   402],
  0),
 ([122,
   0.8480191345736486,
   45,
   28.811475409836067,
   45,
   1,
   1,
   3,
   73,
   306,
   613,
   665,
   274],
  0),
 ([122,
   0.8197457566326756,
   36,
   29.950819672131146,
   36,
   1,
   1,
   3,
   71,
   350,
   751,
   871,
   370],
  1),
 ([37, 1.627292502779792, 26, 41.7027027027027, 26, 1, 1, 2, 51, 101, 101, 66],
  0),
 ([113,
   1.011349432435296,
   37,
   32.11504424778761,
   37,
   1,
   1,
   3,
   61,
   262,
   653,
   713,
   370],
  1),
 ([141,
   1.0761191667103711,
   49,
   33.56382978723404,
   49,
   1,
   1,
   3,
   99,
   526,
   811,
   1031,
   226],
  1),
 ([198,
   0.7400455033235528,
   82,
   27.800505050505052,
   82,
   1,
   1,
   3,
   90,
   412,
   1148,
   1184,
   708],
  2),
 ([164,
   0.8698144942333436,
   68,
   30.048780487804876,
   68,
   1,
   1,
   4,
   137,
   449,
   853,
   905,
   3

In [6]:
import numpy as np

dataset_transposed = np.array(dataset_raw).T
df = pd.DataFrame.from_records(dataset_transposed[0])
df['label'] = dataset_transposed[1]
df.fillna(value=0, inplace=True)
df

  dataset_transposed = np.array(dataset_raw).T


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,label
0,171,0.78801,66,29.345029,66,1,1,4,133,453,933,993,402.0,0
1,122,0.848019,45,28.811475,45,1,1,3,73,306,613,665,274.0,0
2,122,0.819746,36,29.95082,36,1,1,3,71,350,751,871,370.0,1
3,37,1.627293,26,41.702703,26,1,1,2,51,101,101,66,0.0,0
4,113,1.011349,37,32.115044,37,1,1,3,61,262,653,713,370.0,1
5,141,1.076119,49,33.56383,49,1,1,3,99,526,811,1031,226.0,1
6,198,0.740046,82,27.800505,82,1,1,3,90,412,1148,1184,708.0,2
7,164,0.869814,68,30.04878,68,1,1,4,137,449,853,905,322.0,2
8,122,0.900641,39,31.172131,39,1,1,3,79,550,767,1091,178.0,2


In [7]:
# calculate the minimum uniform sampling size
dataset_size = min(df.groupby('label', group_keys=False).apply(lambda x: len(x)))
dataset_size

3

In [8]:
df = df.groupby('label', group_keys=False).apply(lambda x: x.sample(dataset_size))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,label
0,171,0.78801,66,29.345029,66,1,1,4,133,453,933,993,402.0,0
3,37,1.627293,26,41.702703,26,1,1,2,51,101,101,66,0.0,0
1,122,0.848019,45,28.811475,45,1,1,3,73,306,613,665,274.0,0
4,113,1.011349,37,32.115044,37,1,1,3,61,262,653,713,370.0,1
2,122,0.819746,36,29.95082,36,1,1,3,71,350,751,871,370.0,1
5,141,1.076119,49,33.56383,49,1,1,3,99,526,811,1031,226.0,1
7,164,0.869814,68,30.04878,68,1,1,4,137,449,853,905,322.0,2
8,122,0.900641,39,31.172131,39,1,1,3,79,550,767,1091,178.0,2
6,198,0.740046,82,27.800505,82,1,1,3,90,412,1148,1184,708.0,2


In [13]:
df.to_csv('dataset.csv')