In [10]:
import json
import numpy as np
import pandas as pd

def generate_jobs(num_jobs, output_file):
    jobs = []

    # Poisson distribution parameter (average interval between arrivals)
    lambda_poisson = 1.0

    # Initialize time for Poisson arrivals
    current_time = 0

    for job_id in range(num_jobs):
        # Simulate arriving time with Poisson distribution
        arriving_interval = np.random.poisson(lambda_poisson)
        current_time += arriving_interval

        # Generate random job attributes
        nb_tasks = np.random.randint(1, 6)  # Random number of tasks between 1 and 5
        time = np.random.uniform(0.1, 2)  # Random time between 0.1 and 1.0
        dataset_size = np.random.randint(2000, 10000)  # Random dataset size between 1000 and 10000
        id_dataset = job_id  # Using job_id as id_dataset for simplicity

        # Create job entry
        job = {
            "job_id": job_id,
            "nb_tasks": nb_tasks,
            "time": time,
            "dataset_size": dataset_size,
            "id_dataset": id_dataset,
            "arriving_time": current_time
        }

        jobs.append(job)

    # Write jobs to a JSON file
    with open(output_file, "w") as file:
        json.dump(jobs, file, indent=4)

# Generate 1000 jobs and save to "jobs.json"
generate_jobs(1000, "jobs.json")


In [11]:
df = pd.read_json("./jobs.json")
print(df.head(20))

    job_id  nb_tasks      time  dataset_size  id_dataset  arriving_time
0        0         2  1.543383          2547           0              0
1        1         2  1.245508          3318           1              0
2        2         5  0.551782          5753           2              1
3        3         2  0.487239          2267           3              2
4        4         5  1.063043          7683           4              4
5        5         4  0.347470          7109           5              5
6        6         2  0.278882          7653           6              7
7        7         1  1.082009          3618           7              9
8        8         4  1.447383          3486           8             13
9        9         2  0.605223          8871           9             15
10      10         3  1.415752          2484          10             17
11      11         1  1.692647          2934          11             18
12      12         5  1.091883          4967          12        

In [12]:
import pandas as pd


jobs = df[df["arriving_time"] == 6]
jobs

Unnamed: 0,job_id,nb_tasks,time,dataset_size,id_dataset,arriving_time


In [13]:
count = df[df["arriving_time"] == 6].shape[0]
print(f"Number of jobs with arriving_time == 6: {count}")


Number of jobs with arriving_time == 6: 0


In [None]:
import random
import numpy as np

def generate_jobs(n_nodes, target_load=(0.7, 0.8), time_horizon=100, lambda_arrival=2):
    """
    Génère des jobs avec des caractéristiques aléatoires tout en maintenant la charge cible.
    
    :param n_nodes: Nombre de nœuds dans l'infrastructure.
    :param target_load: Tuple (min_load, max_load) représentant la charge cible en pourcentage.
    :param time_horizon: Temps total pour la simulation.
    :param lambda_arrival: Paramètre lambda pour la loi de Poisson (arrivée des jobs).
    :return: Liste des jobs générés.
    """
    # Capacité totale de traitement (nombre total de tâches par unité de temps)
    total_capacity = n_nodes

    # Liste pour stocker les jobs générés
    jobs = []

    # Temps courant dans la simulation
    current_time = 0

    # Charge actuelle (nombre de tâches en cours d'exécution)
    active_tasks = 0

    while current_time < time_horizon:

        inter_arrival_time = np.random.exponential(1 / lambda_arrival)
        current_time += inter_arrival_time
        
        if current_time > time_horizon:
            break

        nb_tasks = random.randint(1, 10)  # Par exemple, entre 1 et 10 tâches
        time_per_task = random.uniform(0.1, 1.0)  # Temps pour chaque tâche (entre 0.1 et 1.0 unités)
        dataset_size = random.randint(100, 10000)  # Taille du dataset (entre 100 et 10,000)
        job_time = time_per_task * nb_tasks

        task_load = nb_tasks / total_capacity
        predicted_load = (active_tasks / total_capacity) + task_load

        if predicted_load > target_load[1]:
            continue  # Ignorer ce job pour maintenir la charge dans la limite

        job = {
            "job_id": len(jobs),
            "nb_tasks": nb_tasks,
            "time": job_time,
            "dataset_size": dataset_size,
            "id_dataset": random.randint(0, 100),  # Identifiant du dataset
            "arriving_time": current_time
        }
        jobs.append(job)

        # Mettre à jour la charge active
        active_tasks += nb_tasks

        # Libérer les tâches complétées (simplification : libération instantanée après le temps)
        active_tasks = max(0, active_tasks - int(job_time))

    return jobs


# Exemple d'utilisation
n_nodes = 10  # Nombre de nœuds
jobs = generate_jobs(n_nodes, target_load=(0.7, 0.8), time_horizon=5000, lambda_arrival=2)

# Afficher les jobs générés
for job in jobs:
    print(job)


{'job_id': 0, 'nb_tasks': 4, 'time': 1.3923553213608204, 'dataset_size': 5179, 'id_dataset': 3, 'arriving_time': 0.2828685235281022}
{'job_id': 1, 'nb_tasks': 2, 'time': 0.2765788359559312, 'dataset_size': 3607, 'id_dataset': 6, 'arriving_time': 0.9485319640619768}
{'job_id': 2, 'nb_tasks': 3, 'time': 2.076659619604669, 'dataset_size': 3455, 'id_dataset': 82, 'arriving_time': 1.2860256594728794}
{'job_id': 3, 'nb_tasks': 1, 'time': 0.36194577590724064, 'dataset_size': 2849, 'id_dataset': 30, 'arriving_time': 2.497960052885593}
{'job_id': 4, 'nb_tasks': 1, 'time': 0.7928296130676595, 'dataset_size': 7413, 'id_dataset': 33, 'arriving_time': 3.0353254544184045}


In [None]:
import random
import numpy as np

def generate_jobs(n_nodes, num_jobs, target_load=(0.7, 0.8), lambda_arrival=2):

    total_capacity = n_nodes
    jobs = []
    current_time = 0
    active_tasks = 0
    id_dataset = 0

    while len(jobs) < num_jobs:
        
        inter_arrival_time = np.random.exponential(1 / lambda_arrival)
        current_time += inter_arrival_time

        nb_tasks = random.randint(1, 10)
        time_per_task = random.uniform(0.1, 1.0) 
        dataset_size = random.randint(100, 10000)
        job_time = time_per_task * nb_tasks

        
        task_load = nb_tasks / total_capacity
        predicted_load = (active_tasks / total_capacity) + task_load

        
        if predicted_load > target_load[1]:
            continue 

        job = {
            "job_id": len(jobs),
            "nb_tasks": nb_tasks,
            "time": job_time,
            "dataset_size": dataset_size,
            "id_dataset": id_dataset,
            "arriving_time": current_time
        }
        jobs.append(job)
        id_dataset += 1
        active_tasks += nb_tasks
        active_tasks = max(0, active_tasks - int(job_time))

    return jobs


# Exemple d'utilisation
n_nodes = 10  # Nombre de nœuds
num_jobs = 20  # Nombre de jobs à générer
jobs = generate_jobs(n_nodes, num_jobs, target_load=(0.7, 0.8), lambda_arrival=2)

# Afficher les jobs générés
for job in jobs:
    print(job)


In [None]:
import random

def generate_jobs(num_jobs, total_time, load_factor=0.75, task_duration_range=(5, 15)):
    """
    Génère un ensemble de jobs composés de tasks pour garantir une charge de 75%.

    Args:
        num_jobs (int): Nombre total de jobs à générer.
        total_time (int): Temps total disponible (e.g., en secondes ou minutes).
        load_factor (float): Facteur de charge (par défaut 0.75, soit 75%).
        task_duration_range (tuple): Intervalle des durées des tasks (min, max).

    Returns:
        list: Liste de jobs où chaque job est un dictionnaire contenant des tasks et leurs durées.
    """
    jobs = []
    effective_time = total_time * load_factor  # Temps total à utiliser pour atteindre la charge cible

    for job_id in range(num_jobs):
        job = {
            "job_id": job_id,
            "tasks": []
        }

        remaining_time = effective_time / num_jobs  # Temps alloué à ce job
        while remaining_time > 0:
            task_duration = random.randint(*task_duration_range)

            # Ajouter la tâche seulement si elle respecte le temps restant
            if task_duration <= remaining_time:
                job["tasks"].append({
                    "task_id": len(job["tasks"]),
                    "duration": task_duration
                })
                remaining_time -= task_duration
            else:
                break

        jobs.append(job)

    return jobs

# Exemple d'utilisation
if __name__ == "__main__":
    num_jobs = 20
    total_time = 1000  # Temps total disponible
    jobs = generate_jobs(num_jobs, total_time)

    for job in jobs:
        print(f"Job ID: {job['job_id']}, Tasks: {len(job['tasks'])}")


Job ID: 0, Tasks: 4
Job ID: 1, Tasks: 4
Job ID: 2, Tasks: 4
Job ID: 3, Tasks: 3
Job ID: 4, Tasks: 3
Job ID: 5, Tasks: 3
Job ID: 6, Tasks: 4
Job ID: 7, Tasks: 4
Job ID: 8, Tasks: 3
Job ID: 9, Tasks: 3
Job ID: 10, Tasks: 3
Job ID: 11, Tasks: 3
Job ID: 12, Tasks: 2
Job ID: 13, Tasks: 2
Job ID: 14, Tasks: 3
Job ID: 15, Tasks: 3
Job ID: 16, Tasks: 3
Job ID: 17, Tasks: 3
Job ID: 18, Tasks: 4
Job ID: 19, Tasks: 3


In [13]:
import numpy as np 
import json
def generate_jobs_withThreshold(num_jobs,nb_nods, output_file):
    jobs = []

    # Poisson distribution parameter (average interval between arrivals)
    lambda_poisson = 1.0

    current_time = 1
    nb_task = 0
    tasks_duration = 0
    starting_time = 0
    job_id = 0
    index = 1
    while index <= num_jobs:


        arriving_interval = np.random.poisson(lambda_poisson)
        current_time += arriving_interval
        

        taux = (((tasks_duration / nb_nods)* 100) / (current_time*nb_nods))
        while taux < 70:
            job_id +=1
            nb_tasks = np.random.randint(1, 10)  
            time = np.random.uniform(0.5, 11)  
            dataset_size = np.random.randint(10000, 100000) 
            id_dataset = job_id  # Using job_id as id_dataset for simplicity

            # Create job entry
            job = {
                "job_id": job_id,
                "nb_tasks": nb_tasks,
                "time": time,
                "dataset_size": dataset_size,
                "id_dataset": id_dataset,
                "arriving_time": current_time
            }
            tasks_duration += job['nb_tasks']*job['time']
            jobs.append(job)
            taux = (((tasks_duration / nb_nods)* 100) / (current_time*nb_nods))
        index +=1
        print(f'le taux est de {taux}')
    with open(output_file, "w") as file:
        json.dump(jobs, file, indent=4)
    
# Generate 1000 jobs and save to "jobs.json"
generate_jobs_withThreshold(100,10, "jobs.json")

le taux est de 82.66612871780401
le taux est de 78.92640486806395
le taux est de 78.92640486806395
le taux est de 81.39752227440417
le taux est de 73.83790863099064
le taux est de 70.34021403134578
le taux est de 73.13413727706299
le taux est de 73.13413727706299
le taux est de 70.91053402634634
le taux est de 70.91053402634634
le taux est de 71.86387929737202
le taux est de 70.56904214318679
le taux est de 70.56904214318679
le taux est de 70.56904214318679
le taux est de 71.21788537168705
le taux est de 71.21788537168705
le taux est de 71.85317302946854
le taux est de 71.85317302946854
le taux est de 70.18806025049017
le taux est de 71.56610749086546
le taux est de 70.6139871814487
le taux est de 70.48447480341902
le taux est de 70.48447480341902
le taux est de 70.48447480341902
le taux est de 71.04334904920034
le taux est de 72.82172761883683
le taux est de 72.82172761883683
le taux est de 70.61500860008421
le taux est de 70.61500860008421
le taux est de 70.61500860008421
le taux est

In [12]:
import pandas as pd
import numpy as np
df = pd.read_json("../JobsFiles/jobs1000.json")
depart_time = 0
total_time = 0
taux = 0

for a_time in range(1,df['arriving_time'].max()+1):

    jobs = df[df["arriving_time"] == a_time]
    if jobs.shape[0] != 0:
        for i, row in jobs.iterrows():
            total_time += row['time']*row['nb_tasks']

    #total_time +=a_time
    jobs = df[df["arriving_time"] != a_time]
    taux = (((total_time / 10)* 100) / (a_time*10))

    print(f'Charge du reseau a {a_time} est {taux} ')

Charge du reseau a 1 est 0.0 
Charge du reseau a 2 est 1.623236137598519 
Charge du reseau a 3 est 1.0821574250656794 
Charge du reseau a 4 est 0.9794313444582938 
Charge du reseau a 5 est 0.7835450755666351 
Charge du reseau a 6 est 0.6529542296388625 
Charge du reseau a 7 est 0.5886402540120871 
Charge du reseau a 8 est 0.747040776633143 
Charge du reseau a 9 est 1.1411728755656154 
Charge du reseau a 10 est 1.0270555880090537 
Charge du reseau a 11 est 0.9336868981900489 
Charge du reseau a 12 est 0.8558796566742115 
Charge du reseau a 13 est 1.0925652519480344 
Charge du reseau a 14 est 1.014524876808889 
Charge du reseau a 15 est 0.9468898850216299 
Charge du reseau a 16 est 1.2002462748360547 
Charge du reseau a 17 est 1.2719954584716462 
Charge du reseau a 18 est 1.2013290441121103 
Charge du reseau a 19 est 1.5993551480536488 
Charge du reseau a 20 est 1.8856874608788456 
Charge du reseau a 21 est 1.8185475335162158 
Charge du reseau a 22 est 1.7813369635026883 
Charge du resea