In [1]:
import json
from itertools import product

In [None]:
side_task_names = [
    "resnet18",
    "resnet50",
    "vgg19",
    "sgd",
    "pr",
    "image",
    "mix",
]
deepspeed_sizes = ["xxxlarge"]
side_task_type = ["training"]
implementation_types = ["iterative"]
side_task_batch_sizes = [64]
main_task_epoch_sizes = [128]
mini_batch_sizes = [4]

side_task_throughputs_3080 = {
    "resnet18": 998.7,
    "resnet50": 393.4,
    "vgg19": 161.8,
    "pr": 126.3,
    "sgd": 1.5,
    "image": 7.8,
}

price_3080 = 0.18
price_ada6000 = 3.96

mix_tasks = ["pr", "resnet18", "image", "vgg19"]

for (
    side_task_name,
    side_task_type,
    implementation_type,
    deepspeed_size,
    side_task_batch_size,
    main_task_epoch_size,
    mini_batch_size,
) in product(
    side_task_names,
    side_task_type,
    implementation_types,
    deepspeed_sizes,
    side_task_batch_sizes,
    main_task_epoch_sizes,
    mini_batch_sizes,
):
    if side_task_name in ["resnet18", "resnet50", "vgg19"]:
        free_ride_output_prefix = f"../../e2e_freeride_ada6000_{deepspeed_size}_{side_task_name}_{side_task_type}_{implementation_type}_{side_task_batch_size}_{mini_batch_size}"
        vanilla_output_prefix = f"../../out/e2e_vanilla_deepspeed_baseline_ada6000_{deepspeed_size}_{main_task_epoch_size}_{mini_batch_size}"
    elif side_task_name in ["pr", "sgd", "image", "mix"]:
        free_ride_output_prefix = f"../../e2e_freeride_ada6000_{deepspeed_size}_{side_task_name}_{mini_batch_size}"
        vanilla_output_prefix = f"../../out/e2e_vanilla_deepspeed_baseline_ada6000_{deepspeed_size}_{main_task_epoch_size}_{mini_batch_size}"

    if side_task_name == "mix":
        main_task_time_output = (
            f"{free_ride_output_prefix}_{main_task_epoch_size}_stage0.json"
        )
        main_task_time = 0
        with open(main_task_time_output, "r") as f:
            main_task_time = json.load(f)["time"]

        vanilla_time_output = f"{vanilla_output_prefix}_stage0.json"
        vanilla_time = 0
        with open(vanilla_time_output, "r") as f:
            vanilla_time = json.load(f)["time"]

        main_time_change = (main_task_time - vanilla_time) / vanilla_time

        hourly_cost_sum = 0
        dollar_saving_sum = 0
        side_task_progresses = []
        for i, mix_task in enumerate(mix_tasks):
            side_task_progress = 0
            side_task_progress_output = f"{free_ride_output_prefix}_{i}_{i}_side_task.txt"
            try:
                with open(side_task_progress_output, "r") as f:
                    side_task_progress = sum(int(line.strip()) for line in f)
                    side_task_progresses.append(side_task_progress)
            except:
                pass

            side_task_throughput = side_task_progress / main_task_time

            # dollar_saving = round(dollar_saving, 4)
            # main_time_change_percent = round(main_time_change * 100, 2)
            # dollar_saving_percent = round(dollar_saving * 100, 2)
            print(
                f"Mix_task({mix_task}): side_task_progress: {side_task_progress}"
            )
        dollar_saving = (((side_task_progresses[0] / side_task_throughputs_3080[mix_tasks[0]] + side_task_progresses[1] / side_task_throughputs_3080[mix_tasks[1]] + side_task_progresses[2] / side_task_throughputs_3080[mix_tasks[2]] + side_task_progresses[3] / side_task_throughputs_3080[mix_tasks[3]]) / (price_ada6000 / price_3080)) - (main_task_time - vanilla_time)) / vanilla_time
        dollar_saving = round(dollar_saving, 4)
        main_time_change_percent = round(main_time_change * 100, 2)
        dollar_saving_percent = round(dollar_saving * 100, 2)
        print(f"Mix_task(all): main-task overhead: {main_time_change_percent}%, dollar saving: {dollar_saving_percent}%")
    
    else:
        side_task_progress = 0
        stages = [0, 1, 2, 3]
        for stage in stages:
            side_task_progress_output = f"{free_ride_output_prefix}_{stage}_side_task.txt"
            try:
                with open(side_task_progress_output, "r") as f:
                    side_task_progress += sum(int(line.strip()) for line in f)
            except:
                pass
        main_task_time_output = (
            f"{free_ride_output_prefix}_{main_task_epoch_size}_stage0.json"
        )
        
        main_task_time = 0
        with open(main_task_time_output, "r") as f:
            main_task_time = json.load(f)["time"]

        vanilla_time_output = f"{vanilla_output_prefix}_stage0.json"
        vanilla_time = 0
        with open(vanilla_time_output, "r") as f:
            vanilla_time = json.load(f)["time"]

        side_task_throughput = side_task_progress / main_task_time
        main_time_change = (main_task_time - vanilla_time) / vanilla_time

        hourly_cost = (
            (main_task_time * (price_ada6000 / price_3080))
            - (side_task_progress / side_task_throughputs_3080[side_task_name])
        ) / main_task_time
        hourly_cost = round(hourly_cost, 4)
        dollar_saving = -(main_task_time / vanilla_time) * (
            1
            - (side_task_throughput / side_task_throughputs_3080[side_task_name])
            / (price_ada6000 / price_3080)
            - 1
        )
        dollar_saving = round(dollar_saving, 4)
        main_time_change_percent = round(main_time_change * 100, 2)
        dollar_saving_percent = round(dollar_saving * 100, 2)

        print(
            f"{side_task_name}: hourly_cost: {hourly_cost}, main-task overhead: {main_time_change_percent}%, dollar saving: {dollar_saving_percent}%"
        )
    


sgd: hourly_cost: 17.6782, main-task overhead: 1.16%, dollar saving: 19.87%
pr: hourly_cost: 21.3282, main-task overhead: 0.05%, dollar saving: 3.06%
image: hourly_cost: 20.9415, main-task overhead: 0.74%, dollar saving: 4.85%
