# Load Results


In [6]:
import os
import re
import numpy as np

class SCPInstance:
    def __init__(self, index, folder="SCP-Instances", sol_file="Solutions.txt"):
        self.folder = folder
        self.sol_file = sol_file
        self.index = index

        self.filename = self._get_filename(index)
        self.name = self.filename.replace(".txt", "")

        self.path = os.path.join(folder, self.filename)

        self.m, self.n, self.costs, self.attr_of_set, self.sets_of_attr = self._load_instance()
        self.opt_value = self._load_opt_value()

    def _get_filename(self, index):
        """
        Get the filename of the SCP instance based on the index.
        """
        files = sorted(
            [f for f in os.listdir(self.folder)
             if f.lower().startswith("scp") and f.lower().endswith(".txt")]
        )
        if not files:
            raise FileNotFoundError(f"No SCP files found in {self.folder}.")
        if isinstance(index, int):
            if index >= len(files):
                raise IndexError(f"Index {index} out of range (found {len(files)} files).")
            filename = files[index]
        else:
            raise TypeError("Index must be an integer.")
        return filename

    def _load_instance(self):
        """
        Load SCP instance from file.
        """
        with open(self.path, "r") as f:
            data = list(map(int, f.read().split()))

        m, n = data[0], data[1]
        costs = data[2:2 + n]

        attr_of_set = [set() for _ in range(n)]
        sets_of_attr = [set() for _ in range(m)]

        idx = 2 + n
        for attr in range(m):
            k_i = data[idx]
            idx += 1
            airplanes = data[idx:idx + k_i]
            idx += k_i
            for j in airplanes:
                attr_of_set[j - 1].add(attr)
                sets_of_attr[attr].add(j - 1)

        return m, n, costs, attr_of_set, sets_of_attr



    def _load_opt_value(self):
        """
        Load known optimal value from solutions file.
        """
        base = self.filename.lower().replace("scp", "").replace(".txt", "")
        sol_id = f"{base[0].upper()}.{base[1:]}" if base[0].isalpha() else f"{base[0]}.{base[1:]}"
        opt_value = None

        if os.path.exists(self.sol_file):
            with open(self.sol_file, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 2 and parts[0].upper() == sol_id:
                        opt_value = float(parts[1])
                        break
        else:
            print(f"⚠️ Solutions file '{self.sol_file}' not found in current directory.")

        return opt_value
    

    def summary(self, max_show=4):
        """
        Print a summary of the SCP instance.
        """
        print("=" * 70)
        print(f"📘 Instance: {self.filename}")
        print(f"  Attributes (m): {self.m}")
        print(f"  Airplanes (n):  {self.n}")
        print(f"  Known optimal cost: {self.opt_value if self.opt_value else 'Unknown'}")
        print("Costs sample:\n", self.costs[:6], "..." if len(self.costs) > 10 else "")
        print("Example coverage:")
        for i in range(min(max_show, self.m)):
            print(f"  Attribute {i}: covered by {list(self.sets_of_attr[i])[:8]}")
        print("Example airplane coverage:")
        for j in range(min(max_show, self.n)):
            print(f"  Airplane {j}: covers {list(self.attr_of_set[j])[:8]}")
        print("=" * 70)

In [8]:
import os
import pandas as pd

def load_results(csv_filename, results_folder="results", instances_folder="SCP-Instances", verbose=True):
    """
    Loads a results CSV file and verifies that stored costs equal
    the sum of set costs from the instance definition.
    Prints a single warning line if any mismatch occurs.
    """
    csv_path = os.path.join(results_folder, csv_filename)
    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"Results file not found: {csv_path}")

    df = pd.read_csv(csv_path)

    if verbose:
        print(f" Loaded {len(df)} rows from '{csv_path}'")
        if "deviation_%" in df.columns:
            avg_dev = df["deviation_%"].dropna().mean()
            print(f"📊 Average deviation: {avg_dev:+.2f}%")
        if "time_sec" in df.columns:
            total_time = df["time_sec"].sum()
            print(f"⏱️  Total runtime: {total_time:.2f}s")

    # --- cost sanity check -------------------------------------------------
    bad = 0
    if "solution_sets" in df.columns and "solution_cost" in df.columns:
        for _, row in df.iterrows():
            name = row["instance_name"]
            try:
                inst = SCPInstance(  # load instance by name
                    sorted(os.listdir(instances_folder)).index(f"{name}.txt"),
                    folder=instances_folder
                )
            except Exception:
                continue  # skip if file missing

            raw = str(row["solution_sets"]).strip("[] ")
            if not raw:
                continue
            try:
                selected = [int(x) for x in raw.split(",") if x.strip()]
            except ValueError:
                bad += 1
                continue

            real_cost = sum(inst.costs[i] for i in selected)
            if real_cost != row["solution_cost"]:
                bad += 1

    if bad:
        print(f"⚠️ !WARNING INCONSISTENT VALUES ({bad} mismatched rows)")
    else:
        print("✅ All solution costs verified successfully.")

    return df


# Greedy Cost Efficient : Effect of Local Search

In [16]:
from IPython.display import display
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel, wilcoxon

# ------------------------------------------------------
# 🔹 Load results
# ------------------------------------------------------
df_greedy_RE = load_results("greedy_RE.csv")
df_greedy_FI_drop_or_swap = load_results("greedy_FI_drop_or_swap.csv")
df_greedy_BI_drop_or_swap = load_results("greedy_BI_drop_or_swap.csv")

# ------------------------------------------------------
# 🔹 Merge on instance_name
# ------------------------------------------------------
merged = (
    df_greedy_RE[["instance_name", "solution_cost", "deviation_%"]]
        .rename(columns={"solution_cost": "cost_greedy_RE",
                         "deviation_%": "dev_greedy_RE"})
    .merge(
        df_greedy_FI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={"solution_cost": "cost_greedy_FI_drop_or_swap",
                             "deviation_%": "dev_greedy_FI_drop_or_swap"}),
        on="instance_name"
    )
    .merge(
        df_greedy_BI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={"solution_cost": "cost_greedy_BI_drop_or_swap",
                             "deviation_%": "dev_greedy_BI_drop_or_swap"}),
        on="instance_name"
    )
)

#display(merged.head())

# ------------------------------------------------------
# 🔹 Statistical Comparison Function
# ------------------------------------------------------
def compare_variants(df, col_ref, col_new, label):
    """
    Compare two algorithm variants statistically.
    Shows fraction of improvements, average improvement %, and significance tests.
    """
    # Drop missing data for fairness
    df = df.dropna(subset=[col_ref, col_new])

    x = df[col_ref].values
    y = df[col_new].values

    diff = y - x
    rel_diff = diff / x * 100  # % difference relative to baseline

    improved = (diff < 0).mean()
    equal = (diff == 0).mean()
    worse = (diff > 0).mean()

    # Global average improvement (negative means lower cost)
    avg_rel_improv_all = -rel_diff.mean()  # positive = improvement

    # ✅ Student's paired t-test
    t_stat, t_p = ttest_rel(x, y)

    # ✅ Wilcoxon signed-rank test (robust, nonparametric)
    try:
        w_stat, w_p = wilcoxon(x, y)
    except ValueError:
        w_stat, w_p = np.nan, np.nan  # happens if all diffs == 0

    print(f"\n🔸 {label}:")
    print(f"  → Improved: {improved:.2%}")
    print(f"  → Equal:    {equal:.2%}")
    print(f"  → Worse:    {worse:.2%}")
    print(f"  → Avg. % improvement (overall): {avg_rel_improv_all:.2f}%")
    print(f"  → t-test p-value:       {t_p:.4f} ({'significant ✅' if t_p < 0.05 else 'ns'})")
    print(f"  → Wilcoxon p-value:     {w_p:.4f} ({'significant ✅' if w_p < 0.05 else 'ns'})")

# ------------------------------------------------------
# 🔹 Run detailed comparisons
# ------------------------------------------------------
print("🔹 Detailed Comparison vs Greedy+RE:")
compare_variants(merged, "cost_greedy_RE",
                 "cost_greedy_FI_drop_or_swap",
                 "Greedy+RE → +FI_drop_or_swap")

compare_variants(merged, "cost_greedy_RE",
                 "cost_greedy_BI_drop_or_swap",
                 "Greedy+RE → +BI_drop_or_swap")


 Loaded 42 rows from 'results/greedy_RE.csv'
📊 Average deviation: +5.52%
⏱️  Total runtime: 13.29s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/greedy_FI_drop_or_swap.csv'
📊 Average deviation: +4.92%
⏱️  Total runtime: 120.86s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/greedy_BI_drop_or_swap.csv'
📊 Average deviation: +4.92%
⏱️  Total runtime: 355.71s
✅ All solution costs verified successfully.
🔹 Detailed Comparison vs Greedy+RE:

🔸 Greedy+RE → +FI_drop_or_swap:
  → Improved: 52.38%
  → Equal:    45.24%
  → Worse:    2.38%
  → Avg. % improvement (overall): 0.56%
  → t-test p-value:       0.0010 (significant ✅)
  → Wilcoxon p-value:     0.0000 (significant ✅)

🔸 Greedy+RE → +BI_drop_or_swap:
  → Improved: 47.62%
  → Equal:    52.38%
  → Worse:    0.00%
  → Avg. % improvement (overall): 0.56%
  → t-test p-value:       0.0025 (significant ✅)
  → Wilcoxon p-value:     0.0001 (significant ✅)


# Greedy Cost Squared : Effect of Local Search

In [17]:
from IPython.display import display
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel, wilcoxon

# ------------------------------------------------------
# 🔹 Load results for SQUARED
# ------------------------------------------------------
df_squared_RE = load_results("squared_RE.csv")
df_squared_FI_drop_or_swap = load_results("squared_FI_drop_or_swap.csv")
df_squared_BI_drop_or_swap = load_results("squared_BI_drop_or_swap.csv")

# ------------------------------------------------------
# 🔹 Merge all three on instance_name
# ------------------------------------------------------
merged_squared = (
    df_squared_RE[["instance_name", "solution_cost", "deviation_%"]]
        .rename(columns={"solution_cost": "cost_squared_RE",
                         "deviation_%": "dev_squared_RE"})
    .merge(
        df_squared_FI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={"solution_cost": "cost_squared_FI_drop_or_swap",
                             "deviation_%": "dev_squared_FI_drop_or_swap"}),
        on="instance_name"
    )
    .merge(
        df_squared_BI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={"solution_cost": "cost_squared_BI_drop_or_swap",
                             "deviation_%": "dev_squared_BI_drop_or_swap"}),
        on="instance_name"
    )
)

#display(merged_squared.head())

# ------------------------------------------------------
# 🔹 Statistical Comparison Function
# ------------------------------------------------------
def compare_variants(df, col_ref, col_new, label):
    """
    Compare two algorithm variants statistically.
    Shows fraction of improvements, average improvement %, and significance tests.
    """
    # Drop missing data for fairness
    df = df.dropna(subset=[col_ref, col_new])

    x = df[col_ref].values
    y = df[col_new].values

    diff = y - x
    rel_diff = diff / x * 100  # % difference relative to baseline

    improved = (diff < 0).mean()
    equal = (diff == 0).mean()
    worse = (diff > 0).mean()

    avg_rel_improv_all = -rel_diff.mean()  # positive = overall improvement

    # ✅ Student's paired t-test
    t_stat, t_p = ttest_rel(x, y)

    # ✅ Wilcoxon signed-rank test (robust nonparametric)
    try:
        w_stat, w_p = wilcoxon(x, y)
    except ValueError:
        w_stat, w_p = np.nan, np.nan  # happens if all diffs == 0

    print(f"\n🔸 {label}:")
    print(f"  → Improved: {improved:.2%}")
    print(f"  → Equal:    {equal:.2%}")
    print(f"  → Worse:    {worse:.2%}")
    print(f"  → Avg. % improvement (overall): {avg_rel_improv_all:.2f}%")
    print(f"  → t-test p-value:       {t_p:.4f} ({'significant ✅' if t_p < 0.05 else 'ns'})")
    print(f"  → Wilcoxon p-value:     {w_p:.4f} ({'significant ✅' if w_p < 0.05 else 'ns'})")

# ------------------------------------------------------
# 🔹 Run detailed comparisons
# ------------------------------------------------------
print("🔹 Detailed Comparison vs Squared+RE:")
compare_variants(merged_squared,
                 "cost_squared_RE",
                 "cost_squared_FI_drop_or_swap",
                 "Squared+RE → +FI_drop_or_swap")

compare_variants(merged_squared,
                 "cost_squared_RE",
                 "cost_squared_BI_drop_or_swap",
                 "Squared+RE → +BI_drop_or_swap")


 Loaded 42 rows from 'results/squared_RE.csv'
📊 Average deviation: +7.51%
⏱️  Total runtime: 14.22s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/squared_FI_drop_or_swap.csv'
📊 Average deviation: +6.89%
⏱️  Total runtime: 139.88s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/squared_BI_drop_or_swap.csv'
📊 Average deviation: +6.92%
⏱️  Total runtime: 672.03s
✅ All solution costs verified successfully.
🔹 Detailed Comparison vs Squared+RE:

🔸 Squared+RE → +FI_drop_or_swap:
  → Improved: 45.24%
  → Equal:    40.48%
  → Worse:    14.29%
  → Avg. % improvement (overall): 0.55%
  → t-test p-value:       0.0350 (significant ✅)
  → Wilcoxon p-value:     0.0070 (significant ✅)

🔸 Squared+RE → +BI_drop_or_swap:
  → Improved: 35.71%
  → Equal:    64.29%
  → Worse:    0.00%
  → Avg. % improvement (overall): 0.53%
  → t-test p-value:       0.0011 (significant ✅)
  → Wilcoxon p-value:     0.0006 (significant ✅)


# Randomized : Effect of Local Search


In [23]:
from IPython.display import display
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel, wilcoxon

# ------------------------------------------------------
# 🔹 Load results for RANDOMIZED
# ------------------------------------------------------
df_randomized_RE = load_results("randomized_RE.csv")
df_randomized_FI_drop_or_swap = load_results("randomized_FI_drop_or_swap.csv")
df_randomized_BI_drop_or_swap = load_results("randomized_BI_drop_or_swap.csv")

# ------------------------------------------------------
# 🔹 Merge all three on instance_name
# ------------------------------------------------------
merged_randomized = (
    df_randomized_RE[["instance_name", "solution_cost", "deviation_%"]]
        .rename(columns={
            "solution_cost": "cost_randomized_RE",
            "deviation_%": "dev_randomized_RE"
        })
    .merge(
        df_randomized_FI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={
                "solution_cost": "cost_randomized_FI_drop_or_swap",
                "deviation_%": "dev_randomized_FI_drop_or_swap"
            }),
        on="instance_name"
    )
    .merge(
        df_randomized_BI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={
                "solution_cost": "cost_randomized_BI_drop_or_swap",
                "deviation_%": "dev_randomized_BI_drop_or_swap"
            }),
        on="instance_name"
    )
)

#display(merged_randomized.head())

# ------------------------------------------------------
# 🔹 Statistical Comparison Function
# ------------------------------------------------------
def compare_variants(df, col_ref, col_new, label):
    """
    Compare two algorithm variants statistically.
    Shows fraction of improvements, average improvement %, and significance tests.
    """
    # Drop missing data for fairness
    df = df.dropna(subset=[col_ref, col_new])

    x = df[col_ref].values
    y = df[col_new].values

    diff = y - x
    rel_diff = diff / x * 100  # % difference relative to baseline

    improved = (diff < 0).mean()
    equal = (diff == 0).mean()
    worse = (diff > 0).mean()

    avg_rel_improv_all = -rel_diff.mean()  # positive = overall improvement

    # ✅ Student's paired t-test
    t_stat, t_p = ttest_rel(x, y)

    # ✅ Wilcoxon signed-rank test (robust nonparametric)
    try:
        w_stat, w_p = wilcoxon(x, y)
    except ValueError:
        w_stat, w_p = np.nan, np.nan  # happens if all diffs == 0

    print(f"\n🔸 {label}:")
    print(f"  → Improved: {improved:.2%}")
    print(f"  → Equal:    {equal:.2%}")
    print(f"  → Worse:    {worse:.2%}")
    print(f"  → Avg. % improvement (overall): {avg_rel_improv_all:.2f}%")
    print(f"  → t-test p-value:       {t_p:.4f} ({'significant ✅' if t_p < 0.05 else 'ns'})")
    print(f"  → Wilcoxon p-value:     {w_p:.4f} ({'significant ✅' if w_p < 0.05 else 'ns'})")

# ------------------------------------------------------
# 🔹 Run detailed comparisons
# ------------------------------------------------------
print("🔹 Detailed Comparison vs Randomized+RE:")
compare_variants(
    merged_randomized,
    "cost_randomized_RE",
    "cost_randomized_FI_drop_or_swap",
    "Randomized+RE → +FI_drop_or_swap"
)

compare_variants(
    merged_randomized,
    "cost_randomized_RE",
    "cost_randomized_BI_drop_or_swap",
    "Randomized+RE → +BI_drop_or_swap"
)


 Loaded 42 rows from 'results/randomized_RE.csv'
📊 Average deviation: +5.52%
⏱️  Total runtime: 14.04s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/randomized_FI_drop_or_swap.csv'
📊 Average deviation: +4.99%
⏱️  Total runtime: 126.11s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/randomized_BI_drop_or_swap.csv'
📊 Average deviation: +4.92%
⏱️  Total runtime: 372.03s
✅ All solution costs verified successfully.
🔹 Detailed Comparison vs Randomized+RE:

🔸 Randomized+RE → +FI_drop_or_swap:
  → Improved: 47.62%
  → Equal:    45.24%
  → Worse:    7.14%
  → Avg. % improvement (overall): 0.50%
  → t-test p-value:       0.0024 (significant ✅)
  → Wilcoxon p-value:     0.0002 (significant ✅)

🔸 Randomized+RE → +BI_drop_or_swap:
  → Improved: 47.62%
  → Equal:    52.38%
  → Worse:    0.00%
  → Avg. % improvement (overall): 0.56%
  → t-test p-value:       0.0025 (significant ✅)
  → Wilcoxon p-value:     0.0001 (significant ✅)


# RE before FI and BI

In [21]:
from IPython.display import display
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel, wilcoxon

# ------------------------------------------------------
# 🔹 Load results for GREEDY → RE → LocalSearch
# ------------------------------------------------------
df_greedy_RE = load_results("greedy_RE.csv")
df_greedy_RE_FI_drop_or_swap = load_results("greedy_RE_FI_drop_or_swap.csv")
df_greedy_RE_BI_drop_or_swap = load_results("greedy_RE_BI_drop_or_swap.csv")

# ------------------------------------------------------
# 🔹 Merge all three on instance_name
# ------------------------------------------------------
merged_greedy_RE_pipeline = (
    df_greedy_RE[["instance_name", "solution_cost", "deviation_%"]]
        .rename(columns={
            "solution_cost": "cost_greedy_RE",
            "deviation_%": "dev_greedy_RE"
        })
    .merge(
        df_greedy_RE_FI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={
                "solution_cost": "cost_greedy_RE_FI_drop_or_swap",
                "deviation_%": "dev_greedy_RE_FI_drop_or_swap"
            }),
        on="instance_name"
    )
    .merge(
        df_greedy_RE_BI_drop_or_swap[["instance_name", "solution_cost", "deviation_%"]]
            .rename(columns={
                "solution_cost": "cost_greedy_RE_BI_drop_or_swap",
                "deviation_%": "dev_greedy_RE_BI_drop_or_swap"
            }),
        on="instance_name"
    )
)

#display(merged_greedy_RE_pipeline.head())

# ------------------------------------------------------
# 🔹 Statistical Comparison Function
# ------------------------------------------------------
def compare_variants(df, col_ref, col_new, label):
    """
    Compare two algorithm variants statistically.
    Shows fraction of improvements, average improvement %, and significance tests.
    """
    # Drop missing data for fairness
    df = df.dropna(subset=[col_ref, col_new])

    x = df[col_ref].values
    y = df[col_new].values

    diff = y - x
    rel_diff = diff / x * 100  # % difference relative to baseline

    improved = (diff < 0).mean()
    equal = (diff == 0).mean()
    worse = (diff > 0).mean()

    avg_rel_improv_all = -rel_diff.mean()  # positive = overall improvement

    # ✅ Student's paired t-test
    t_stat, t_p = ttest_rel(x, y)

    # ✅ Wilcoxon signed-rank test (robust nonparametric)
    try:
        w_stat, w_p = wilcoxon(x, y)
    except ValueError:
        w_stat, w_p = np.nan, np.nan  # happens if all diffs == 0

    print(f"\n🔸 {label}:")
    print(f"  → Improved: {improved:.2%}")
    print(f"  → Equal:    {equal:.2%}")
    print(f"  → Worse:    {worse:.2%}")
    print(f"  → Avg. % improvement (overall): {avg_rel_improv_all:.2f}%")
    print(f"  → t-test p-value:       {t_p:.4f} ({'significant ✅' if t_p < 0.05 else 'ns'})")
    print(f"  → Wilcoxon p-value:     {w_p:.4f} ({'significant ✅' if w_p < 0.05 else 'ns'})")

# ------------------------------------------------------
# 🔹 Run detailed comparisons
# ------------------------------------------------------
print("🔹 Detailed Comparison vs Greedy+RE:")
compare_variants(
    merged_greedy_RE_pipeline,
    "cost_greedy_RE",
    "cost_greedy_RE_FI_drop_or_swap",
    "Greedy+RE → +FI_drop_or_swap"
)

compare_variants(
    merged_greedy_RE_pipeline,
    "cost_greedy_RE",
    "cost_greedy_RE_BI_drop_or_swap",
    "Greedy+RE → +BI_drop_or_swap"
)


 Loaded 42 rows from 'results/greedy_RE.csv'
📊 Average deviation: +5.52%
⏱️  Total runtime: 13.29s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/greedy_RE_FI_drop_or_swap.csv'
📊 Average deviation: +4.95%
⏱️  Total runtime: 56.20s
✅ All solution costs verified successfully.
 Loaded 42 rows from 'results/greedy_RE_BI_drop_or_swap.csv'
📊 Average deviation: +4.95%
⏱️  Total runtime: 66.04s
✅ All solution costs verified successfully.
🔹 Detailed Comparison vs Greedy+RE:

🔸 Greedy+RE → +FI_drop_or_swap:
  → Improved: 47.62%
  → Equal:    52.38%
  → Worse:    0.00%
  → Avg. % improvement (overall): 0.53%
  → t-test p-value:       0.0028 (significant ✅)
  → Wilcoxon p-value:     0.0001 (significant ✅)

🔸 Greedy+RE → +BI_drop_or_swap:
  → Improved: 47.62%
  → Equal:    52.38%
  → Worse:    0.00%
  → Avg. % improvement (overall): 0.53%
  → t-test p-value:       0.0028 (significant ✅)
  → Wilcoxon p-value:     0.0001 (significant ✅)
