In [1]:
import numpy as np
import json
import cvxpy as cp
from scipy import sparse as sp
import scipy.special as sps
import seaborn as sns
import matplotlib.pyplot as plt
from time import time
import math

from src.problems.utils import sample_data_for_group
from src.problems.problems import compute_producer_optimal_solution, _compute_consumer_optimal_solution_cvar_relaxed_base, _compute_consumer_optimal_solution_cvar
from src.problems.gradient_problem import compute_consumer_optimal_solution_cvar_grad

(CVXPY) May 10 07:57:23 PM: Encountered unexpected exception importing solver GLOP:
RuntimeError('Unrecognized new version of ortools (9.12.4544). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')
(CVXPY) May 10 07:57:23 PM: Encountered unexpected exception importing solver PDLP:
RuntimeError('Unrecognized new version of ortools (9.12.4544). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')


In [2]:
def _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
    allocations: np.ndarray,
    k_rec: int,
) -> tuple[float, np.ndarray]:
    idxs = allocations.argsort(axis=1)[:, -k_rec:]
    alls = np.zeros_like(allocations)
    alls[np.arange(allocations.shape[0])[:, None], idxs] = 1

    return alls

def _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
    allocations: np.ndarray,
    k_rec: int,
) -> tuple[float, np.ndarray]:
    alls = np.round(allocations, 1).astype(int)

    return alls

In [3]:
# load data
with open("./data/amazon_predictions.npy", "rb") as f:
    REL_MATRIX = np.load(f)

with open("./data/amazon_user_groups.json", "r") as f:
    GROUPS_MAP = json.load(f)

In [6]:
N_CONSUMERS = 300
N_PRODUCERS = 300
ALPHA = 0.5
GAMMA = 0.5
GROUP_KEY = "usage_group"
K_REC = 10
SOLVER = cp.GUROBI

In [7]:
def compute_perc_prod_below_threshold(
    allocation: np.ndarray,
    threshold: float,
) -> float:
    producer_allocations = allocation.sum(axis=0)
    num_of_producers_below_threshold = np.sum(producer_allocations < threshold)
    return num_of_producers_below_threshold / len(producer_allocations)

def compute_mean_prod_below_threshold(
    allocation: np.ndarray,
    threshold: float,
) -> float:
    producer_allocations = allocation.sum(axis=0)
    return np.mean(math.ceil(threshold) - producer_allocations[producer_allocations < threshold])

def compute_mean_consumer_utility(
    allocation: np.ndarray,
    rel_matrix: np.ndarray,
) -> float:
    return (allocation * rel_matrix).sum() / allocation.sum()

def compute_perc_cons_above_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    num_of_consumers_above_threshold = np.sum(consumer_allocations > k_rec)
    return num_of_consumers_above_threshold / len(consumer_allocations)

def compute_perc_cons_below_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    num_of_consumers_below_threshold = np.sum(consumer_allocations < k_rec)
    return num_of_consumers_below_threshold / len(consumer_allocations)

def compute_mean_cons_above_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    return np.mean(k_rec - consumer_allocations[consumer_allocations > k_rec])


def compute_mean_cons_below_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    return np.mean(k_rec - consumer_allocations[consumer_allocations < k_rec])

def compute_std_between_groups(allocations: np.ndarray, group_assignments: np.ndarray) -> float:
    unique_groups, group_indices = np.unique(group_assignments, return_inverse=True)
    num_groups = len(unique_groups)
    group_masks = [group_indices == i for i in range(num_groups)]
    group_sizes = np.array([mask.sum() for mask in group_masks])

    means = []
    for mask, size in zip(group_masks, group_sizes):
        group_alloc = allocations[mask]
        mean = np.mean(group_alloc.sum(axis=0))
        means.append(mean)

    return np.std(means)

In [8]:
results = []

for size in [100, 500]:
    print(f"Running for size {size}")
    for k_rec in [5, 10, 25]:
        print(f"Running for k_rec {k_rec}")
        for run in range(3):
            rel_matrix_sampled, consumer_ids, group_assignments = sample_data_for_group(
                n_consumers=size,
                n_producers=size,
                groups_map=GROUPS_MAP,
                group_key=GROUP_KEY,
                data=REL_MATRIX,
                naive_sampling=True,
                seed=run,
            )
            producer_max_min_utility, _ = compute_producer_optimal_solution(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                solver=SOLVER,
            )
            start_time = time()
            _, optimal_allocation = _compute_consumer_optimal_solution_cvar(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                solver=SOLVER,
            )
            optimal_time = time() - start_time

            start_time = time()
            _, _base_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_base(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                solver=SOLVER,
            )
            relaxed_time = time() - start_time

            rounded_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
                allocations=_base_cvar_allocation,
                k_rec=k_rec,
            )
            topk_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
                allocations=_base_cvar_allocation,
                k_rec=k_rec,
            )

            start_time = time()
            _base_cvar_grad_allocations = compute_consumer_optimal_solution_cvar_grad(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                hidden_dim=200,
                max_epochs=30000,
                verbose=False,
                max_patience=15,
                patience_delta=1e-2
            )
            grad_time = time() - start_time

            rounded_cvar_grad_allocation = _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
                allocations=_base_cvar_grad_allocations,
                k_rec=k_rec,
            )
            topk_cvar_grad_allocation = _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
                allocations=_base_cvar_grad_allocations,
                k_rec=k_rec,
            )


            stats = {}
            for allocation, method in zip(
                [optimal_allocation, rounded_cvar_allocation, topk_cvar_allocation, rounded_cvar_grad_allocation, topk_cvar_grad_allocation],
                ["Binary", "Rounded", "TopK", "Grad Rounded", "Grad TopK"]
            ):
                stats[f"Mean cons. util._{method}"] = compute_mean_consumer_utility(
                    allocation=allocation,
                    rel_matrix=rel_matrix_sampled,
                )
                stats[f"Groups mean STD_{method}"] = compute_std_between_groups(
                    allocations=allocation,
                    group_assignments=group_assignments,
                )
                stats[f"Prod. util. below min \%_{method}"] = compute_perc_prod_below_threshold(
                    allocation=allocation,
                    threshold=producer_max_min_utility * GAMMA,
                )
                stats[f"Mean prod. util. below min_{method}"] = compute_mean_prod_below_threshold(
                    allocation=allocation,
                    threshold=producer_max_min_utility * GAMMA,
                )
                stats[f"Cons. alloc. above thresh. \%_{method}"] = compute_perc_cons_above_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats[f"Mean cons. above thresh._{method}"] = compute_mean_cons_above_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats[f"Cons. alloc. below thresh. \%_{method}"] = compute_perc_cons_below_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats[f"Mean cons. below thresh._{method}"] = compute_mean_cons_below_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )

            results.append({
                "$N_{c}$, N_{p}$}": size,
                "k": k_rec,
                "optimal_time": optimal_time,
                "relaxed_time": relaxed_time,
                "grad_time": grad_time,
                **stats,
                })

Running for size 100
Running for k_rec 5
Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2653936
Academic license 2653936 - for non-commercial use only - registered to do___@student.uva.nl


  stats[f"Prod. util. below min \%_{method}"] = compute_perc_prod_below_threshold(
  stats[f"Cons. alloc. above thresh. \%_{method}"] = compute_perc_cons_above_threshold(
  stats[f"Cons. alloc. below thresh. \%_{method}"] = compute_perc_cons_below_threshold(
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)


Running for k_rec 10
Running for k_rec 25
Running for size 500
Running for k_rec 5
Running for k_rec 10
Running for k_rec 25


In [9]:
import pandas as pd
import re

In [10]:
df = pd.DataFrame(results)
df

Unnamed: 0,"$N_{c}$, N_{p}$}",k,optimal_time,relaxed_time,grad_time,Mean cons. util._Binary,Groups mean STD_Binary,Prod. util. below min \%_Binary,Mean prod. util. below min_Binary,Cons. alloc. above thresh. \%_Binary,...,Cons. alloc. below thresh. \%_Grad Rounded,Mean cons. below thresh._Grad Rounded,Mean cons. util._Grad TopK,Groups mean STD_Grad TopK,Prod. util. below min \%_Grad TopK,Mean prod. util. below min_Grad TopK,Cons. alloc. above thresh. \%_Grad TopK,Mean cons. above thresh._Grad TopK,Cons. alloc. below thresh. \%_Grad TopK,Mean cons. below thresh._Grad TopK
0,100,5,1.070012,0.267979,49.350496,0.943973,1.187668,0.0,,0.0,...,0.19,1.210526,0.93859,1.187668,0.1,1.0,0.0,,0.0,
1,100,5,0.650495,0.256599,50.922772,0.939272,1.187668,0.0,,0.0,...,0.1,1.1,0.931071,1.187668,0.1,1.0,0.0,,0.0,
2,100,5,0.342925,0.241857,35.509413,0.937025,1.187668,0.0,,0.0,...,0.07,1.142857,0.930671,1.187668,0.06,1.0,0.0,,0.0,
3,100,10,0.281543,0.23578,48.405559,0.940739,2.375336,0.0,,0.0,...,0.05,1.0,0.936093,2.375336,0.01,1.0,0.0,,0.0,
4,100,10,0.294164,0.248166,36.75604,0.935912,2.375336,0.0,,0.0,...,0.0,,0.929543,2.375336,0.0,,0.0,,0.0,
5,100,10,0.326233,0.231978,44.845002,0.93378,2.375336,0.0,,0.0,...,0.04,1.0,0.928433,2.375336,0.03,1.0,0.0,,0.0,
6,100,25,0.259466,0.237749,55.319474,0.927234,5.938341,0.0,,0.0,...,0.01,1.0,0.920482,5.938341,0.0,,0.0,,0.0,
7,100,25,0.318151,0.256646,45.370595,0.924693,5.938341,0.0,,0.0,...,0.0,,0.917837,5.938341,0.0,,0.0,,0.0,
8,100,25,0.363668,0.239523,50.938439,0.92216,5.938341,0.0,,0.0,...,0.02,1.0,0.916147,5.938341,0.01,1.0,0.0,,0.0,
9,500,5,10.757917,6.786068,65.386002,0.947156,1.199342,0.0,,0.0,...,1.0,3.0,0.903491,1.199342,0.982,3.0,0.0,,0.0,


In [11]:
df.to_csv("experiments.csv", index=False)

In [95]:
GROUP_BY_COLS = ["$N_{c}$", "$N_{p}$", "k"]
METHODS = ["Binary", "Rounded", "TopK", "Grad Rounded", "Grad TopK"]
BASE_METRICS = [
    "Mean cons. util.",
    "Groups mean STD",
    "Prod. util. below min \%",
    "Mean prod. util. below min",
    "Cons. alloc. above thresh. \%",
    "Cons. alloc. below thresh. \%",
    "Mean cons. above thresh.",
]

def aggregate_wide(df: pd.DataFrame):
    # build the dict of aggregations
    agg_dict = {
        f"{base}_{m}": ["mean", "std"]
        for m in METHODS
        for base in BASE_METRICS
    }
    # group and flatten the MultiIndex
    grouped = df.groupby(GROUP_BY_COLS).agg(agg_dict)
    # flatten columns: e.g. ("mean_consumer_utility_optimal","mean") → "mean_consumer_utility_optimal_mean"
    grouped.columns = [
        f"{col[0]}_{col[1]}" for col in grouped.columns
    ]
    return grouped.reset_index()

agg_df = aggregate_wide(df)

def aggregate_long(df: pd.DataFrame):
    # melt into a tall table
    value_vars = [
        f"{base}_{m}"
        for m in METHODS
        for base in BASE_METRICS
    ]
    df_long = df.melt(
        id_vars=GROUP_BY_COLS,
        value_vars=value_vars,
        var_name="metric_method",
        value_name="value",
    )
    method_pattern = "|".join(re.escape(m) for m in METHODS)
    extract_re = rf"(?P<metric>.*)_(?P<Method>{method_pattern})$"

    # do the extract:
    df_long[["metric", "Method"]] = df_long["metric_method"].str.extract(extract_re)
    # split “mean_consumer_utility_optimal” → (“mean_consumer_utility”, “optimal”)
    # now group
    return (
        df_long
        .groupby(GROUP_BY_COLS + ["Method", "metric"])["value"]
        .agg(mean="mean", std="std")
        .reset_index()
    )

agg_long_df = aggregate_long(df)



  "Prod. util. below min \%",
  "Cons. alloc. above thresh. \%",
  "Cons. alloc. below thresh. \%",


In [93]:
agg_long_df.to_clipboard()

In [98]:
# 1) combine mean+std into one column
def fmt(x, y):
    if pd.isna(x):
        return ""
    return f"{x:.3f} ({y:.3f})"

agg_long_df["mean_std"] = agg_long_df.apply(
    lambda row: fmt(row["mean"], row["std"]),
    axis=1
)

# 2) pivot so metrics become columns
pivot = (
    agg_long_df
    .pivot_table(
        index=["$N_{c}$", "$N_{p}$", "k", "Method"],
        columns="metric",
        values="mean_std",
        aggfunc="first"
    )
    # flatten the column Index
    .rename_axis(None, axis=1)
    .reset_index()
)

# 3) export to LaTeX
latex = pivot.to_latex(
    index=False,
    na_rep="",
    column_format="lll" + "r" * (pivot.shape[1] - 3),
    longtable=False,
    caption="Performance by method and metric",
    label="tab:results",
    escape=False,
    bold_rows=False,
    multicolumn=True,
)

print(latex)

\begin{table}
\caption{Performance by method and metric}
\label{tab:results}
\begin{tabular}{lllrrrrrrrr}
\toprule
$N_{c}$ & $N_{p}$ & k & Method & Cons. alloc. above thresh. \% & Cons. alloc. below thresh. \% & Groups mean STD & Mean cons. above thresh. & Mean cons. util. & Mean prod. util. below min & Prod. util. below min \% \\
\midrule
100 & 100 & 5 & Binary & 0.000 (0.000) & 0.000 (0.000) & 1.188 (0.000) &  & 0.920 (0.002) &  & 0.000 (0.000) \\
100 & 100 & 5 & Grad Rounded & 0.000 (0.000) & 0.185 (0.035) & 1.117 (0.012) &  & 0.909 (0.009) & 4.000 (0.000) & 0.350 (0.057) \\
100 & 100 & 5 & Grad TopK & 0.000 (0.000) & 0.000 (0.000) & 1.188 (0.000) &  & 0.908 (0.009) & 4.000 (0.000) & 0.140 (0.000) \\
100 & 100 & 5 & Rounded & 0.000 (0.000) & 0.045 (0.007) & 1.185 (0.003) &  & 0.920 (0.002) & 4.000 (0.000) & 0.045 (0.007) \\
100 & 100 & 5 & TopK & 0.000 (0.000) & 0.000 (0.000) & 1.188 (0.000) &  & 0.920 (0.002) &  & 0.000 (0.000) \\
100 & 100 & 10 & Binary & 0.000 (0.000) & 0.000 (0.