# General

In [None]:
import matplotlib.pyplot as plt
import re
from matplotlib import rc
import numpy as np
from scipy.optimize import curve_fit
from numpy.polynomial.polynomial import Polynomial
from sklearn.metrics import r2_score

In [None]:
fontsize = 36
legend_fontsize = 32
plt.rcParams.update({'font.size': fontsize})
rc('font', **{'family': 'serif', 'serif': ['Computer Modern']})
rc('text', usetex=True)

colors_orig = {"Naive": '#FFA500', "Optimized": '#008000', "Mid" : '#00008B'}
markers_orig = {"Naive": 'x', "Optimized": 'o', "Mid" : "s"}
linewidth = 3
markersize = 12
algo_names_orig = {"Naive" : "$\mathtt{standard}$-$\mathtt{DP}$", 
              "Optimized" : "$\mathtt{statistical}$-$\mathtt{DP}$", 
             "Mid" : "$\mathtt{hybrid}$"}
# xticks = [1, 10, 100, 1000]
xticks = None
yticks_mem = [10, 1e2, 1e3, 1e4, 1e5]
yticks_time = [1e-2, 1e-1, 1, 10, 1e2, 1e3]
# yticks_mem = None
# yticks_time =None
xscale = 'linear' # can also be 'linear'
yscale = 'log'

only_one_vertical_label = True
only_one_horizontal_label = True
legend_to_the_left = False

In [None]:
def find_optimal_polynomial_degree(x, y, max_degree=8):
    """
    Find the optimal polynomial degree for fitting the given data.

    Args:
    x (array): Independent variable data.
    y (array): Dependent variable data.
    max_degree (int): Maximum degree of polynomial to test.

    Returns:
    int: Optimal degree of the polynomial.
    """
    best_degree = 0
    best_r2 = -np.inf

    for degree in range(1, max_degree + 1):
        # Fit the polynomial
        coefs = np.polyfit(x, y, degree)
        p = Polynomial(coefs[::-1])  # np.polyfit returns coefficients in reverse order

        # Compute the predicted y values
        y_pred = p(x)

        # Compute the R² score
        r2 = r2_score(y, y_pred)

        # Update the best degree if the current one is better
        print(f'degree {degree}, r2 = {r2}')
        if r2 > best_r2:
            best_r2 = r2
            best_degree = degree
    return "?"
    return best_degree

In [None]:
def exponential_model(x, a, b):
    return a * np.exp(b * x)


def get_next_value(insec):
    x_values = np.arange(len(insec[1:]))
    params, _ = curve_fit(exponential_model, x_values, insec[1:])
    predicted_next_value = exponential_model(len(insec), *params)
    return predicted_next_value




def get_time_and_memory_data(data_string):

    data_time = {}
    algolist = ["Optimized", "Naive"]
    if "Mid" in data_string:
        algolist.append("Mid")
    for case in algolist:
        pattern = r"T: (\d+)(?:(?!T:).)*?" + re.escape(case) + r" Elapsed time: ([\de\.-]+) seconds"
        matches = re.findall(pattern, data_string, re.DOTALL)
        for match in matches:
            T = int(match[0])
            elapsed = float(match[1])

            if T not in data_time:
                data_time[T] = {}
                for algoname in algolist:
                    data_time[T][algoname] = []
            data_time[T][case].append(elapsed)

    data_memory = {}
    for case in algolist:
        pattern = r"T: (\d+)(?:(?!T:).)*?" + re.escape(case) + r" Memory Usage: ([\de\.-]+) MB"
        matches = re.findall(pattern, data_string, re.DOTALL)
        for match in matches:
            T = int(match[0])
            elapsed = float(match[1])

            if T not in data_memory:
                data_memory[T] = {}
                for algoname in algolist:
                    data_memory[T][algoname] = []
            data_memory[T][case].append(elapsed)
    return data_time, data_memory

# Experiment: Balanced server

In [None]:
# The input string

experimental_data = 'collected_data/slurm-6756911.out'

with open(experimental_data, 'r') as fp:
    data_string = fp.read()

# Regular expression to parse the data

data_time, data_memory = get_time_and_memory_data(data_string)

algo_names = algo_names_orig.copy()
colors = colors_orig.copy()
markers = markers_orig.copy()

if "Mid" in data_string:
    algo_names["Mid"] = algo_names_orig["Optimized"]
    algo_names["Optimized"] = algo_names_orig["Mid"]
    markers["Mid"] = markers_orig["Optimized"]
    markers["Optimized"] = markers_orig["Mid"]
    colors["Mid"] = colors_orig["Optimized"]
    colors["Optimized"] = colors_orig["Mid"]

In [None]:
data = data_time

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
averages_mid = np.array([np.mean(data[T]['Mid']) for T in T_values if data[T]['Mid']])
stddevs_mid = np.array([np.std(data[T]['Mid']) for T in T_values if data[T]['Mid']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])
T_values_mid = np.array([T for T in T_values if data[T]['Mid']])



# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))


plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)


if "Mid" in data_string:

    plt.plot(T_values_mid, averages_mid, label=algo_names['Mid'], marker=markers["Mid"], 
                 color = colors["Mid"], linewidth = linewidth, 
                markersize = markersize)

    plt.fill_between(T_values_mid, averages_mid - stddevs_mid, averages_mid + stddevs_mid,
                     color = colors["Mid"], alpha = 0.2)


plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)





for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.ylim(ymin=0.001, ymax=5000)

plt.xticks(xticks)
plt.yticks(yticks_time)

# plt.xticks(list(plt.xticks()[0]) + [T_values_naive[-1]] )
# plt.xticks([0.0, 6.0, 25.0, 50.0, 75.0, 100.0, 125.0, 150.0, 175.0])



# Add labels and title
if not only_one_horizontal_label:
    plt.xlabel('Horizon length')
plt.ylabel('Exec. time (sec.)')
if legend_to_the_left:
    plt.legend(fontsize=legend_fontsize)

# Show the plot
plt.savefig('results_images/balanced_server_time.pdf', bbox_inches='tight')
plt.show()

x_values = np.array([1, 2, 3, 4, 5, 6])
y_values = np.array([1, 4, 9, 16, 25, 36])  # Quadratic relationship

optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
print(f'{optimal_degree=}')

In [None]:
data = data_memory

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
averages_mid = np.array([np.mean(data[T]['Mid']) for T in T_values if data[T]['Mid']])
stddevs_mid = np.array([np.std(data[T]['Mid']) for T in T_values if data[T]['Mid']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])
T_values_mid = np.array([T for T in T_values if data[T]['Mid']])




# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))

plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)

if "Mid" in data_string:
    plt.plot(T_values_mid, averages_mid, label=algo_names['Mid'], marker=markers["Mid"], 
                 color = colors["Mid"], linewidth = linewidth, 
                markersize = markersize)

    plt.fill_between(T_values_mid, averages_mid - stddevs_mid, averages_mid + stddevs_mid,
                     color = colors["Mid"], alpha = 0.2)


plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)




plt.axhline(200*1e3, linestyle='dashed', label="Memory limit")


T_val_extra = [T_values_naive[-1], T_values_naive[-1]+1]
naive_extra = [averages_naive[-1], get_next_value(averages_naive)]
plt.plot(T_val_extra, naive_extra, label=None, marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, linestyle = 'dashed',
            markersize = markersize)


for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.xticks(xticks)
plt.yticks(yticks_mem)

plt.ylim(ymin=5, ymax=300000)

# Add labels and title
plt.xlabel('Horizon length')
plt.ylabel('Memory use (MB)')
if legend_to_the_left:
    plt.legend(fontsize=legend_fontsize, loc=(0.3,0.03))

# Show the plot
plt.savefig('results_images/balanced_server_memory.pdf', bbox_inches='tight')
plt.show()

# optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
# print(f'{optimal_degree=}')
optimal_degree = find_optimal_polynomial_degree(np.array(T_values_mid), np.array(averages_mid))
print(f'{optimal_degree=}')

# Experiment: Maximally responsive

In [None]:
# The input string

experimental_data = 'collected_data/slurm-6749527.out'

with open(experimental_data, 'r') as fp:
    data_string = fp.read()

# Regular expression to parse the data

data_time, data_memory = get_time_and_memory_data(data_string)

algo_names = algo_names_orig.copy()
colors = colors_orig.copy()
markers = markers_orig.copy()

if "Mid" in data_string:
    algo_names["Mid"] = algo_names_orig["Optimized"]
    algo_names["Optimized"] = algo_names_orig["Mid"]
    markers["Mid"] = markers_orig["Optimized"]
    markers["Optimized"] = markers_orig["Mid"]
    colors["Mid"] = colors_orig["Optimized"]
    colors["Optimized"] = colors_orig["Mid"]

In [None]:
data = data_time

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])


# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))


plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)


plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)





for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.ylim(ymin=0.001, ymax=5000)

plt.xticks(xticks)
plt.yticks(yticks_time)

# plt.xticks(list(plt.xticks()[0]) + [T_values_naive[-1]] )
# plt.xticks([0.0, 6.0, 25.0, 50.0, 75.0, 100.0, 125.0, 150.0, 175.0])



# Add labels and title
if not only_one_horizontal_label:
    plt.xlabel('Horizon length')
# plt.ylabel('Execution time (sec.)')
if legend_to_the_left:
    plt.legend(fontsize=legend_fontsize)

# Show the plot
plt.savefig('results_images/maximally_responsive_time.pdf', bbox_inches='tight')
plt.show()

x_values = np.array([1, 2, 3, 4, 5, 6])
y_values = np.array([1, 4, 9, 16, 25, 36])  # Quadratic relationship

optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
print(f'{optimal_degree=}')

In [None]:
data = data_memory

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])



# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))
plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)

plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)

T_val_extra = [T_values_naive[-1], T_values_naive[-1]+1]
naive_extra = [averages_naive[-1], get_next_value(averages_naive)]
plt.plot(T_val_extra, naive_extra, label=None, marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, linestyle = 'dashed',
            markersize = markersize)


plt.axhline(200*1e3, linestyle='dashed', label="Memory limit")




for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.xticks(xticks)
plt.yticks(yticks_mem)

plt.ylim(ymin=5, ymax=300000)


# Add labels and title
plt.xlabel('Horizon length')
if not only_one_vertical_label:
    plt.ylabel('Memory usage (MB)')
# plt.legend()

# Show the plot
plt.savefig('results_images/maximally_responsive_memory.pdf', bbox_inches='tight')
plt.show()

optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
print(f'{optimal_degree=}')

# Experiment: Distribution Change

In [None]:
# The input string

experimental_data = 'collected_data/dist_change.txt'
experimental_data = 'collected_data/slurm-5978328_dist_change.out'
experimental_data = 'collected_data/slurm-5978335_dist_change.out'
experimental_data = 'collected_data/slurm-5994642_dist_change.out'


with open(experimental_data, 'r') as fp:
    data_string = fp.read()

# Regular expression to parse the data

algo_names = algo_names_orig.copy()
colors = colors_orig.copy()
markers = markers_orig.copy()

if "Mid" in data_string:
    algo_names["Mid"] = algo_names_orig["Optimized"]
    algo_names["Optimized"] = algo_names_orig["Mid"]
    markers["Mid"] = markers_orig["Optimized"]
    markers["Optimized"] = markers_orig["Mid"]
    colors["Mid"] = colors_orig["Optimized"]
    colors["Optimized"] = colors_orig["Mid"]

data_time, data_memory = get_time_and_memory_data(data_string)
        

In [None]:
data = data_time

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
averages_mid = np.array([np.mean(data[T]['Mid']) for T in T_values if data[T]['Mid']])
stddevs_mid = np.array([np.std(data[T]['Mid']) for T in T_values if data[T]['Mid']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])
T_values_mid = np.array([T for T in T_values if data[T]['Mid']])



# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))
plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)

if "Mid" in data_string:
    plt.plot(T_values_mid, averages_mid, label=algo_names['Mid'], marker=markers["Mid"], 
                 color = colors["Mid"], linewidth = linewidth, 
                markersize = markersize)

    plt.fill_between(T_values_mid, averages_mid - stddevs_mid, averages_mid + stddevs_mid,
                     color = colors["Mid"], alpha = 0.2)

plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)


# plt.axhline(1800, linestyle='dashed', label="Timeout")


for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.ylim(ymin=0.001, ymax=5000)

plt.xticks(xticks)
plt.yticks(yticks_time)

# plt.xticks(list(plt.xticks()[0]) + [T_values_naive[-1]] )

# Add labels and title
if not only_one_horizontal_label:
    plt.xlabel('Horizon length')
if not only_one_vertical_label:
    plt.ylabel('Execution time (sec.)')
if not legend_to_the_left:
    plt.legend(fontsize=30, loc=(0.3,0.03))

# Show the plot
plt.savefig('results_images/dist_change_time.pdf', bbox_inches='tight')
plt.show()

optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
print(f'{optimal_degree=}')

In [None]:
data = data_memory

# Calculate averages and standard deviations
T_values = sorted(data.keys())
averages_elapsed = np.array([np.mean(data[T]['Optimized']) for T in T_values])
stddevs_elapsed = np.array([np.std(data[T]['Optimized']) for T in T_values])
averages_naive = np.array([np.mean(data[T]['Naive']) for T in T_values if data[T]['Naive']])
stddevs_naive = np.array([np.std(data[T]['Naive']) for T in T_values if data[T]['Naive']])
averages_mid = np.array([np.mean(data[T]['Mid']) for T in T_values if data[T]['Mid']])
stddevs_mid = np.array([np.std(data[T]['Mid']) for T in T_values if data[T]['Mid']])
T_values_naive = np.array([T for T in T_values if data[T]['Naive']])
T_values_mid = np.array([T for T in T_values if data[T]['Mid']])



# Create the plot
fig, ax = plt.subplots(figsize=(8,5))

# plt.figure(figsize=(10, 6))
plt.plot(T_values, averages_elapsed, 
             label=algo_names['Optimized'], marker=markers["Optimized"], color = colors["Optimized"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values, averages_elapsed - stddevs_elapsed, averages_elapsed + stddevs_elapsed,
                 color = colors["Optimized"], alpha = 0.2)

if "Mid" in data_string:
    plt.plot(T_values_mid, averages_mid, label=algo_names['Mid'], marker=markers["Mid"], 
                 color = colors["Mid"], linewidth = linewidth, 
                markersize = markersize)

    plt.fill_between(T_values_mid, averages_mid - stddevs_mid, averages_mid + stddevs_mid,
                     color = colors["Mid"], alpha = 0.2)

plt.plot(T_values_naive, averages_naive, label=algo_names['Naive'], marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, 
            markersize = markersize)

plt.fill_between(T_values_naive, averages_naive - stddevs_naive, averages_naive + stddevs_naive,
                 color = colors["Naive"], alpha = 0.2)


T_val_extra = [T_values_naive[-1], T_values_naive[-1]+1]
naive_extra = [averages_naive[-1], get_next_value(averages_naive)]
plt.plot(T_val_extra, naive_extra, label=None, marker=markers["Naive"], 
             color = colors["Naive"], linewidth = linewidth, linestyle = 'dashed',
            markersize = markersize)

plt.axhline(200*1e3, linestyle='dashed', label="Memory limit")

for spine in ['right', 'top']:
    ax.spines[spine].set_visible(False)

# Set the scale of y-axis to logarithmic
plt.yscale(yscale)
plt.xscale(xscale)

plt.xticks(xticks)
plt.yticks(yticks_mem)

plt.ylim(ymin=5, ymax=300000)


# Add labels and title
plt.xlabel('Horizon length')
if not only_one_vertical_label:
    plt.ylabel('Memory usage (MB)')

if not legend_to_the_left:
    plt.legend(fontsize=30, loc=(0.3,0.03))

# Show the plot
plt.savefig('results_images/dist_change_memory.pdf', bbox_inches='tight')
plt.show()

optimal_degree = find_optimal_polynomial_degree(np.array(T_values), np.array(averages_elapsed))
print(f'{optimal_degree=}')