In [55]:
bandwidths_ranges = [(12,128), (600,800)]
 
vcpus_ranges = [(1,2), (6,10)]

min_nb_tasks_per_job = 1
max_nb_tasks_per_job = 20

max_dataset_size_MB = 40960
min_dataset_size_MB = 1025



import numpy as np 
import json
import sys, os
import random
import time


def arrival_times(nb_nodes=10, lambda_ = 100):
    import random
    random.seed(42)
    sum = 0
    arrival_times = []
    for i in range(nb_nodes):
        sum += random.expovariate(lambd=1/lambda_)
        arrival_times.append(sum)
    return arrival_times

def generatSetOfJobs(path_to_file, nb_jobs=200):
    global num_jobs, min_nb_tasks_per_job, max_nb_tasks_per_job, min_dataset_size_MB, max_dataset_size_MB, num_jobs
    
    random.seed(42)

    arriving_times_list = arrival_times(nb_nodes=num_jobs, lambda_=10)
    
    jobs = []
    
    job_id = 0
    while len(jobs) < num_jobs:    

        nb_tasks = random.randint(min_nb_tasks_per_job, max_nb_tasks_per_job)
        time = random.choice([1, 10, 50, 70, 80, 100]) #random.randint(1, 60)
        dataset_size = random.choice([1024, 2048, 5120, 10240, 20480, 40960])#  #random.randint(min_dataset_size_MB, max_dataset_size_MB) 
        id_dataset = job_id  
        
        # Create job entry
        job = {
            "nb_tasks": nb_tasks,
            "task_duration": time,
            "dataset_size": dataset_size,
            "id_dataset": id_dataset,
            "arriving_time": arriving_times_list[job_id],
            "type_of_job": None
        }
        job["job_id"]  = job_id
        job_id +=1
        jobs.append(job)

    for job in jobs:
        if job["task_duration"] <= 50 and job["dataset_size"] >= 10240:
            job["type_of_job"] = 0
        elif job["task_duration"] > 50 and job["dataset_size"] <= 10240:
            job["type_of_job"] = 1
        elif job["task_duration"] > 50 and job["dataset_size"] > 10240:
            job["type_of_job"] = 2
        else:
            job["type_of_job"] = 3

    with open(path_to_file, "w") as file:
        json.dump(jobs, file, indent=4)


    return jobs




import pandas as pd
import random



def generateHeterogeneousInfrastructureEquilibre(nb_node, path):
    """Generate a heterogeneous infrastructure with random bandwidth for each compute node and return a pandas DataFrame."""
    random.seed(42)
    categories = 4
    
    taille_categories = {
        0: 0.24,
        1: 0.26,
        2: 0.24,
        3: 0.26
    }
    
    cats = {
        0: {"VCPU": (1, 2.1), "BW": (12, 128)},
        1: {"VCPU": (6, 10),  "BW": (12, 128)},
        2: {"VCPU": (1, 2.1), "BW": (600, 800)},
        3: {"VCPU": (6, 10),  "BW": (600, 800)},
    }

    nodes_config = []

    for cat in range(categories):
        nodes_needed = int(taille_categories[cat] * nb_node)
        for _ in range(nodes_needed):
            vcpu = random.uniform(*cats[cat]["VCPU"])
            bw = random.randint(*cats[cat]["BW"])
            energy = random.uniform(0.1, 2.1)
            nodes_config.append([bw, vcpu, energy])

    # Conversion en DataFrame
    df = pd.DataFrame(nodes_config, columns=["bandwidth", "computation_nodes", "energy_consumption"])
    x = df.to_csv(path, index=False)

    return nodes_config


def translatToMinizinc(nodes_config, jobs, path_to_file):
    
    params = {
        "nb_nodes": len(nodes_config),
        "nb_data": len(jobs),   
        "makespan": 221506,
        "data_sizes": [job["dataset_size"] for job in jobs],
        "work_duration": [job["task_duration"] for job in jobs],
        "bandwidths": [node[0] for node in nodes_config],
        "cpus": [node[1] * 1 for node in nodes_config],
        "energy_consumptions": [[]],
        "nb_works": [job["nb_tasks"] for job in jobs],
        "node_free_timespan": [],
        "starting_times": [int(job["arriving_time"]) for job in jobs],
    }

        # ---- Convert to DZN ----
    dzn_path = path_to_file 
    with open(dzn_path, "w") as d:
        for key, value in params.items():
            if key == "transfers_time":
                d.write("transfers_time = [")
                for row in value:
                    d.write("|" + ", ".join(map(str, row)) + ",")
                d.write("|];\n")
            elif isinstance(value, list):
                d.write(f"{key} = {value};\n")
            else:
                d.write(f"{key} = {value};\n")

insts = [(5,10),(10,50),(20,50),(20,100),(50,100)]

for num_jobs, num_nodes in insts:
    path = f'/Users/cherif/Documents/Traveaux/simulator-for-CSP-model/simulator/workloads/GeneratedJobs/inst1-{num_jobs}j-{num_nodes}Nodes/'
    nodes_config = generateHeterogeneousInfrastructureEquilibre(num_nodes, path+"infrastructure.csv" )
    jobs = generatSetOfJobs(path+"jobs.json", nb_jobs=num_jobs)
    translatToMinizinc( nodes_config, jobs, path+f"data{num_jobs}_{num_nodes}.dzn")

In [59]:

print([int(x) for x in arrival_times(50, 100)])




[102, 104, 136, 161, 295, 408, 630, 640, 694, 697, 722, 792, 795, 817, 922, 1001, 1026, 1115, 1281, 1281, 1445, 1565, 1607, 1623, 1939, 1980, 1989, 2000, 2188, 2280, 2445, 2576, 2652, 3014, 3062, 3142, 3319, 3415, 3613, 3699, 3821, 3826, 3852, 3886, 3894, 3921, 3931, 3964, 4065, 4110]
