In [None]:
import os
import pandas as pd
import numpy as np

folder_path = "3-merged-results"
job_file = os.path.join(folder_path, "merged_job.csv")
light_file = os.path.join(folder_path, "merged_light.csv")

def conformal_score_lambda(cost, latency):
  return abs(cost/110 - latency)

def extract_last_column(file_path):
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        df['ConformalScore'] = conformal_score_lambda(df['cost'], df['runtime'])
        return df['ConformalScore'].tolist() 
    else:
        return []


job_values = extract_last_column(job_file)
light_values = extract_last_column(light_file)


In [None]:
import math
def getC(R,delta):
    # print("delta = ",delta)
    sortedR = sorted(R)
    n = len(sortedR)
    print("total_number: ",n)
    q_hat_index = math.ceil(((n+1)*(1-delta)))
    return sortedR[q_hat_index-1]

In [None]:
E = 0.268
def g(x, e=E):
    return max(0,x-e)

def g_inverse(x, e=E):
    # if (x < 1-e):
    #     return x + e
    # if x > 1-e:
    #     return 1 
    return min(1, x+e)

def shift(c, K):
    return g_inverse(g((1+1/K)*g_inverse(c)))

In [None]:
# Not useful, we just compute to get this
light_C = getC(light_values,0.2)
print("light_C: ",light_C)

In [None]:
job_C = getC(job_values,0.2)
print("job_C: ",job_C)

In [None]:
def test_robustCP_one_iteration(job_values, light_values, delta, debug=True):
    K = 300 # Split Index

    np.random.shuffle(job_values)

    # JOB Calibration -> Sorted
    job_calibration = job_values[:K] # K -> calibration data points
    job_calibration = sorted(job_calibration)

    # JOB Test -> Remaining values
    job_test = job_values[K:] # N-K -> test data points

    # LIGHT Test -> All the data points
    job_calibration_C = getC(job_calibration, delta)
    if debug: print("Original C: ", job_calibration_C)
    
    # Test on JOB
    job_count = 0
    for val in job_test:
        if val<=job_calibration_C:
            job_count+=1
    job_test_valid_rate = job_count / len(job_test)
    if debug: print("=> JOB Test: ",job_test_valid_rate)

    robustCP_shift_quantile=shift(1-delta,K)
    print("RobustCP_shift_quantile: ", robustCP_shift_quantile)
    robustCP_index = math.ceil(((K+1)*robustCP_shift_quantile))
    robustCP_index = min(robustCP_index, len(job_calibration))

    robustCP_new_C = job_calibration[robustCP_index-1]
    if debug: print("RobustCP_new_C: ",robustCP_new_C)

    # Test on LIGHT
    light_count = 0
    for val in light_values:
        if val < robustCP_new_C:
            light_count +=1
    light_test_valid_rate = light_count / len(light_values)
    print("=> LIGHT Test: ",light_test_valid_rate)
    return light_test_valid_rate


In [None]:
test_robustCP_one_iteration(job_values, light_values, delta = 0.4)

In [None]:
test_robustCP_one_iteration(job_values, light_values, delta = 0.3, debug = False)

In [None]:
def test_robustCP_multiples_iterations(job_values, light_values, delta = 0.2, iterations=20000):
    import matplotlib.pyplot as plt
    import sys
    import statistics
    from scipy.stats import gaussian_kde
    class HiddenPrints:
        def __enter__(self):
            self._original_stdout = sys.stdout
            sys.stdout = open('/dev/null', 'w')  # macOS/Linux

        def __exit__(self, exc_type, exc_val, exc_tb):
            sys.stdout.close()
            sys.stdout = self._original_stdout 

    light_test_valid_rates = []

    with HiddenPrints(): 
        light_test_valid_rates = []
        for i in range(iterations):
            ans = test_robustCP_one_iteration(job_values, light_values, delta, debug = False)
            light_test_valid_rates.append(ans)

    avg_valid_rate = sum(light_test_valid_rates) / len(light_test_valid_rates) if light_test_valid_rates else 0
    print("Average LIGHT Valid Rate:", avg_valid_rate)
    median_valid_rate = statistics.median(light_test_valid_rates) if light_test_valid_rates else 0
    print("Median LIGHT Valid Rate:", median_valid_rate)

In [None]:
test_robustCP_multiples_iterations(job_values,light_values,delta=0.1)

In [None]:
test_robustCP_multiples_iterations(job_values,light_values,delta=0.2)

In [None]:
test_robustCP_multiples_iterations(job_values,light_values,delta=0.3)

In [None]:
test_robustCP_multiples_iterations(job_values,light_values,delta=0.4)

In [None]:
def test_robustCP_multiples_iterations(job_values, light_values, delta = 0.2, iterations=10000):
    import matplotlib.pyplot as plt
    import sys
    import statistics
    from scipy.stats import gaussian_kde
    class HiddenPrints:
        def __enter__(self):
            self._original_stdout = sys.stdout
            sys.stdout = open('/dev/null', 'w')  

        def __exit__(self, exc_type, exc_val, exc_tb):
            sys.stdout.close()
            sys.stdout = self._original_stdout 

    light_test_valid_rates = []

    with HiddenPrints():  
        light_test_valid_rates = []
        for i in range(iterations):
            ans = test_robustCP_one_iteration(job_values, light_values, delta, debug = False)
            light_test_valid_rates.append(ans)

    avg_valid_rate = sum(light_test_valid_rates) / len(light_test_valid_rates) if light_test_valid_rates else 0
    print("Average LIGHT Valid Rate:", avg_valid_rate)
    median_valid_rate = statistics.median(light_test_valid_rates) if light_test_valid_rates else 0
    print("Median LIGHT Valid Rate:", median_valid_rate)

    plt.figure(figsize=(8, 5))
    graph_ceb_test_valid_rates = [i*100 for i in light_test_valid_rates]
    kde = gaussian_kde(graph_ceb_test_valid_rates, bw_method='scott')
    kde.set_bandwidth(bw_method=kde.factor * 3.0) 
    x = np.linspace(min(graph_ceb_test_valid_rates) - 5, max(graph_ceb_test_valid_rates) + 8, 10000)
    y_density = kde(x)
    n = len(graph_ceb_test_valid_rates)
    y_frequency = y_density * n * (x[1] - x[0])
    plt.fill_between(x, y_frequency, color='blue', alpha=0.3)
    plt.plot(x, y_frequency, color='blue', linewidth=4, alpha=0.3, label ="")
    plt.xlabel("Coverage(%)", fontsize=28)
    plt.ylabel("Frequency Density", fontsize=28)
    plt.xticks(fontsize=28)
    plt.yticks(fontsize=28)
    plt.vlines(avg_valid_rate * 100, ymin=0, ymax=7, color='red', linestyle='--', linewidth=2)
    plt.vlines(median_valid_rate * 100, ymin=0, ymax=7, color='green', linestyle='-.', linewidth=2)
    plt.plot([], [], color='red', linestyle='--', linewidth=2, label="Avg: {:.2f}%".format(avg_valid_rate * 100))
    plt.plot([], [], color='green', linestyle='-.', linewidth=2, label="Med: {:.2f}%".format(median_valid_rate * 100))
    plt.legend(fontsize=16, loc="upper right")
    plt.xlim(45, 70)
    plt.axhline(0, color='black', linewidth=0.8, linestyle='--')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("distribution_shift_LIGHT.pdf")
    plt.show()  

    plt.figure(figsize=(8, 5))
    plt.hist(light_test_valid_rates, bins=15, range=(1-delta-0.1, 1), edgecolor='black')
    plt.title("Histogram of LIGHT Test Valid Rate")
    plt.xlabel("Valid Rate")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.show()  
    
test_robustCP_multiples_iterations(job_values,light_values,delta=0.45)

In [None]:
### Compare with the origianl C
from scipy.stats import gaussian_kde
def original_C_test_robustCP_one_iteration(job_values, ceb_values, delta, debug=True):
    K = 300 # Split Index

    np.random.shuffle(job_values)

    # JOB Calibration -> Sorted
    job_calibration = job_values[:K] # K -> calibration data points
    job_calibration = sorted(job_calibration)

    # CEB Test -> All the data points
    job_calibration_C = getC(job_calibration, delta)

    # Test on CEB
    ceb_count = 0
    for val in ceb_values:
        if val < job_calibration_C:
            ceb_count +=1
    ceb_test_valid_rate = ceb_count / len(ceb_values)
    print("=> CEB Test: ",ceb_test_valid_rate)
    return ceb_test_valid_rate

def original_C_test_robustCP_multiples_iterations(job_values, ceb_values, delta, iterations=10000):
    print("delta = ",delta)
    import matplotlib.pyplot as plt
    import sys
    import statistics
    class HiddenPrints:
        def __enter__(self):
            self._original_stdout = sys.stdout
            sys.stdout = open('/dev/null', 'w') 

        def __exit__(self, exc_type, exc_val, exc_tb):
            sys.stdout.close()
            sys.stdout = self._original_stdout

    ceb_test_valid_rates = []

    with HiddenPrints(): 
        ceb_test_valid_rates = []
        for i in range(iterations):
            ans = original_C_test_robustCP_one_iteration(job_values, ceb_values, delta, debug = False)
            ceb_test_valid_rates.append(ans)

    avg_valid_rate = sum(ceb_test_valid_rates) / len(ceb_test_valid_rates) if ceb_test_valid_rates else 0
    print("Average Lighttt Valid Rate:", avg_valid_rate)
    median_valid_rate = statistics.median(ceb_test_valid_rates) if ceb_test_valid_rates else 0
    print("Median Lighttt Valid Rate:", median_valid_rate)

    # Frequency Density Version
    plt.figure(figsize=(8, 5))
    graph_ceb_test_valid_rates = [i*100 for i in ceb_test_valid_rates]
    kde = gaussian_kde(graph_ceb_test_valid_rates, bw_method='scott')
    kde.set_bandwidth(bw_method=kde.factor * 3.0) 
    x = np.linspace(min(graph_ceb_test_valid_rates) - 3, max(graph_ceb_test_valid_rates) + 3, 10000)
    y_density = kde(x)
    n = len(graph_ceb_test_valid_rates)
    y_frequency = y_density * n * (x[1] - x[0])
  
original_C_test_robustCP_multiples_iterations(job_values,light_values,delta=0.45) 