In [1]:
import numpy as np
import json
import cvxpy as cp
from scipy import sparse as sp
import scipy.special as sps
import pandas as pd
import re
import seaborn as sns
import matplotlib.pyplot as plt
from time import time
import math

from src.problems.utils import sample_data_for_group
from src.problems.problems import compute_producer_optimal_solution, _compute_consumer_optimal_solution_cvar_relaxed_base, _compute_consumer_optimal_solution_cvar
from src.problems.gradient_problem import compute_consumer_optimal_solution_cvar_grad

In [2]:
def _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
    allocations: np.ndarray,
    k_rec: int,
) -> tuple[float, np.ndarray]:
    idxs = allocations.argsort(axis=1)[:, -k_rec:]
    alls = np.zeros_like(allocations)
    alls[np.arange(allocations.shape[0])[:, None], idxs] = 1

    return alls

def _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
    allocations: np.ndarray,
    k_rec: int,
) -> tuple[float, np.ndarray]:
    alls = np.round(allocations, 1).astype(int)

    return alls

In [None]:
# load data
with open("../../data/amazon_predictions.npy", "rb") as f:
    REL_MATRIX = np.load(f)

with open("../../data/amazon_user_groups.json", "r") as f:
    GROUPS_MAP = json.load(f)

In [4]:
N_CONSUMERS = 300
N_PRODUCERS = 300
ALPHA = 0.5
GAMMA = 0.5
GROUP_KEY = "usage_group"
K_REC = 10
SOLVER = cp.GUROBI

In [5]:
def compute_perc_prod_below_threshold(
    allocation: np.ndarray,
    threshold: float,
) -> float:
    producer_allocations = allocation.sum(axis=0)
    num_of_producers_below_threshold = np.sum(producer_allocations < threshold)
    return num_of_producers_below_threshold / len(producer_allocations)

def compute_mean_prod_below_threshold(
    allocation: np.ndarray,
    threshold: float,
) -> float:
    producer_allocations = allocation.sum(axis=0)
    return np.mean(math.ceil(threshold) - producer_allocations[producer_allocations < threshold])

def compute_mean_consumer_utility(
    allocation: np.ndarray,
    rel_matrix: np.ndarray,
) -> float:
    return (allocation * rel_matrix).sum() / allocation.sum()

def compute_perc_cons_above_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    num_of_consumers_above_threshold = np.sum(consumer_allocations > k_rec)
    return num_of_consumers_above_threshold / len(consumer_allocations)

def compute_perc_cons_below_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    num_of_consumers_below_threshold = np.sum(consumer_allocations < k_rec)
    return num_of_consumers_below_threshold / len(consumer_allocations)

def compute_mean_cons_above_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    return np.mean(k_rec - consumer_allocations[consumer_allocations > k_rec])


def compute_mean_cons_below_threshold(
    allocation: np.ndarray,
    k_rec: int
) -> float:
    consumer_allocations = allocation.sum(axis=1)
    return np.mean(k_rec - consumer_allocations[consumer_allocations < k_rec])

def compute_std_between_groups(allocations: np.ndarray, group_assignments: np.ndarray) -> float:
    unique_groups, group_indices = np.unique(group_assignments, return_inverse=True)
    num_groups = len(unique_groups)
    group_masks = [group_indices == i for i in range(num_groups)]
    group_sizes = np.array([mask.sum() for mask in group_masks])

    means = []
    for mask, size in zip(group_masks, group_sizes):
        group_alloc = allocations[mask]
        mean = np.mean(group_alloc.sum(axis=0))
        means.append(mean)

    return np.std(means)

In [6]:
results = []

for size in [500]:
    print(f"Running for size {size}")
    for k_rec in [5]:
        print(f"Running for k_rec {k_rec}")
        for run in range(3):
            rel_matrix_sampled, consumer_ids, group_assignments = sample_data_for_group(
                n_consumers=size,
                n_producers=size,
                groups_map=GROUPS_MAP,
                group_key=GROUP_KEY,
                data=REL_MATRIX,
                naive_sampling=True,
                seed=run,
            )
            producer_max_min_utility, _ = compute_producer_optimal_solution(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                solver=SOLVER,
            )
            start_time = time()
            _, optimal_allocation = _compute_consumer_optimal_solution_cvar(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                solver=SOLVER,
            )
            optimal_time = time() - start_time

            start_time = time()
            _, _base_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_base(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                solver=SOLVER,
            )
            relaxed_time = time() - start_time

            rounded_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
                allocations=_base_cvar_allocation,
                k_rec=k_rec,
            )
            topk_cvar_allocation = _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
                allocations=_base_cvar_allocation,
                k_rec=k_rec,
            )

            start_time = time()
            _base_cvar_grad_allocations, _ = compute_consumer_optimal_solution_cvar_grad(
                rel_matrix=rel_matrix_sampled,
                k_rec=k_rec,
                producer_max_min_utility=producer_max_min_utility,
                gamma=GAMMA,
                group_assignments=group_assignments,
                alpha=ALPHA,
                hidden_dim=200,
                max_epochs=30000,
                verbose=False,
                max_patience=10,
            )
            grad_time = time() - start_time

            rounded_cvar_grad_allocation = _compute_consumer_optimal_solution_cvar_relaxed_naive_rounding(
                allocations=_base_cvar_grad_allocations,
                k_rec=k_rec,
            )
            topk_cvar_grad_allocation = _compute_consumer_optimal_solution_cvar_relaxed_topk_rounding(
                allocations=_base_cvar_grad_allocations,
                k_rec=k_rec,
            )


            stats = {}
            for allocation, method in zip(
                [optimal_allocation, rounded_cvar_allocation, topk_cvar_allocation, rounded_cvar_grad_allocation, topk_cvar_grad_allocation],
                ["Baseline", "LP Round", "LP TopK", "Grad Round", "Grad TopK"]
            ):
                stats["$\mathbb{E}[{U_{C}}]$_" + method] = compute_mean_consumer_utility(
                    allocation=allocation,
                    rel_matrix=rel_matrix_sampled,
                )
                stats["$\sigma(\mathbb{E}[U_{G}])$_" + method] = compute_std_between_groups(
                    allocations=allocation,
                    group_assignments=group_assignments,
                )
                stats["$\#(U_{P} < U_{P\min})/N_{P}$_" + method] = compute_perc_prod_below_threshold(
                    allocation=allocation,
                    threshold=producer_max_min_utility * GAMMA,
                )
                stats["$\sum(V - U_{P})/N_{P^{b}}$_" + method] = compute_mean_prod_below_threshold(
                    allocation=allocation,
                    threshold=producer_max_min_utility * GAMMA,
                )
                stats["$\#(A_{C} > k)/N_{C}$_" + method] = compute_perc_cons_above_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats["$\sum(A_{C} - k)/N_{A_{c}^{a}}$_" + method] = compute_mean_cons_above_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats["$\#(A_{C} < k)/N_{C}$_" + method] = compute_perc_cons_below_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )
                stats["$\sum(k - A_{C})/N_{A_{c}^{b}}$_" + method] = compute_mean_cons_below_threshold(
                    allocation=allocation,
                    k_rec=k_rec,
                )

            results.append({
                "$N_{c}$, N_{p}$}": size,
                "k": k_rec,
                "optimal_time": optimal_time,
                "relaxed_time": relaxed_time,
                "grad_time": grad_time,
                **stats,
                })

  stats["$\mathbb{E}[{U_{C}}]$_" + method] = compute_mean_consumer_utility(
  stats["$\sigma(\mathbb{E}[U_{G}])$_" + method] = compute_std_between_groups(
  stats["$\#(U_{P} < U_{P\min})/N_{P}$_" + method] = compute_perc_prod_below_threshold(
  stats["$\sum(V - U_{P})/N_{P^{b}}$_" + method] = compute_mean_prod_below_threshold(
  stats["$\#(A_{C} > k)/N_{C}$_" + method] = compute_perc_cons_above_threshold(
  stats["$\sum(A_{C} - k)/N_{A_{c}^{a}}$_" + method] = compute_mean_cons_above_threshold(
  stats["$\#(A_{C} < k)/N_{C}$_" + method] = compute_perc_cons_below_threshold(
  stats["$\sum(k - A_{C})/N_{A_{c}^{b}}$_" + method] = compute_mean_cons_below_threshold(


Running for size 500
Running for k_rec 5
Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2653936
Academic license 2653936 - for non-commercial use only - registered to do___@student.uva.nl


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = ret.dtype.type(ret / rcount)


In [12]:
df = pd.DataFrame(results)

In [2]:
df = pd.read_csv("output.csv")

In [13]:
df

Unnamed: 0,"$N_{c}$, N_{p}$}",k,optimal_time,relaxed_time,grad_time,$\mathbb{E}[{U_{C}}]$_Baseline,$\sigma(\mathbb{E}[U_{G}])$_Baseline,$\#(U_{P} < U_{P\min})/N_{P}$_Baseline,$\sum(V - U_{P})/N_{P^{b}}$_Baseline,$\#(A_{C} > k)/N_{C}$_Baseline,...,$\#(A_{C} < k)/N_{C}$_Grad Round,$\sum(k - A_{C})/N_{A_{c}^{b}}$_Grad Round,$\mathbb{E}[{U_{C}}]$_Grad TopK,$\sigma(\mathbb{E}[U_{G}])$_Grad TopK,$\#(U_{P} < U_{P\min})/N_{P}$_Grad TopK,$\sum(V - U_{P})/N_{P^{b}}$_Grad TopK,$\#(A_{C} > k)/N_{C}$_Grad TopK,$\sum(A_{C} - k)/N_{A_{c}^{a}}$_Grad TopK,$\#(A_{C} < k)/N_{C}$_Grad TopK,$\sum(k - A_{C})/N_{A_{c}^{b}}$_Grad TopK
0,500,5,2.772978,3.201725,21.592036,0.94716,1.199342,0.0,,0.0,...,0.026,1.076923,0.923718,1.199342,0.042,1.0,0.0,,0.0,
1,500,5,2.83756,3.158102,20.307686,0.948063,1.199342,0.0,,0.0,...,0.018,1.222222,0.923025,1.199342,0.044,1.0,0.0,,0.0,
2,500,5,2.711179,3.282703,26.394739,0.944104,1.199342,0.0,,0.0,...,0.044,1.090909,0.920094,1.199342,0.048,1.0,0.0,,0.0,


In [17]:
df

Unnamed: 0,"$N_{C}, N_{P}$",k,optimal_time,relaxed_time,grad_time,$\mathbb{E}[{U_{C}}]$_Baseline,$\sigma(\mathbb{E}[U_{G}])$_Baseline,$\#(U_{P} < U_{P\min})/N_{P}$_Baseline,$\sum(V - U_{P})/N_{P^{b}}$_Baseline,$\#(A_{C} > k)/N_{C}$_Baseline,...,$\#(A_{C} < k)/N_{C}$_Grad Round,$\sum(k - A_{C})/N_{A_{c}^{b}}$_Grad Round,$\mathbb{E}[{U_{C}}]$_Grad TopK,$\sigma(\mathbb{E}[U_{G}])$_Grad TopK,$\#(U_{P} < U_{P\min})/N_{P}$_Grad TopK,$\sum(V - U_{P})/N_{P^{b}}$_Grad TopK,$\#(A_{C} > k)/N_{C}$_Grad TopK,$\sum(A_{C} - k)/N_{A_{c}^{a}}$_Grad TopK,$\#(A_{C} < k)/N_{C}$_Grad TopK,$\sum(k - A_{C})/N_{A_{c}^{b}}$_Grad TopK
0,500,5,2.772978,3.201725,21.592036,0.94716,1.199342,0.0,,0.0,...,0.026,1.076923,0.923718,1.199342,0.042,1.0,0.0,,0.0,
1,500,5,2.83756,3.158102,20.307686,0.948063,1.199342,0.0,,0.0,...,0.018,1.222222,0.923025,1.199342,0.044,1.0,0.0,,0.0,
2,500,5,2.711179,3.282703,26.394739,0.944104,1.199342,0.0,,0.0,...,0.044,1.090909,0.920094,1.199342,0.048,1.0,0.0,,0.0,


In [14]:
# rename first column
df.rename(columns={df.columns[0]: "$N_{C}, N_{P}$"}, inplace=True)

In [15]:
# 1) combine mean+std into one column

GROUP_BY_COLS = ["$N_{C}, N_{P}$", "k"]
METHODS = ["Baseline", "LP Round", "LP TopK", "Grad Round", "Grad TopK"]
BASE_METRICS = [
    "$\mathbb{E}[{U_{C}}]$",
    "$\sigma(\mathbb{E}[U_{G}])$",
    "$\#(U_{P} < U_{P\min})/N_{P}$",
    "$\sum(V - U_{P})/N_{P^{b}}$",
    "$\#(A_{C} < k)/N_{C}$",
    "$\sum(k - A_{C})/N_{A_{c}^{b}}$",
]

def aggregate_wide(df: pd.DataFrame):
    # build the dict of aggregations
    agg_dict = {
        f"{base}_{m}": ["mean", "std"]
        for m in METHODS
        for base in BASE_METRICS
    }
    # group and flatten the MultiIndex
    grouped = df.groupby(GROUP_BY_COLS).agg(agg_dict)
    # flatten columns: e.g. ("mean_consumer_utility_optimal","mean") → "mean_consumer_utility_optimal_mean"
    grouped.columns = [
        f"{col[0]}_{col[1]}" for col in grouped.columns
    ]
    return grouped.reset_index()

agg_df = aggregate_wide(df)

def aggregate_long(df: pd.DataFrame):
    # melt into a tall table
    value_vars = [
        f"{base}_{m}"
        for m in METHODS
        for base in BASE_METRICS
    ]
    df_long = df.melt(
        id_vars=GROUP_BY_COLS,
        value_vars=value_vars,
        var_name="metric_method",
        value_name="value",
    )
    method_pattern = "|".join(re.escape(m) for m in METHODS)
    extract_re = rf"(?P<metric>.*)_(?P<Method>{method_pattern})$"

    # do the extract:
    df_long[["metric", "Method"]] = df_long["metric_method"].str.extract(extract_re)
    # split “mean_consumer_utility_optimal” → (“mean_consumer_utility”, “optimal”)
    # now group
    return (
        df_long
        .groupby(GROUP_BY_COLS + ["Method", "metric"])["value"]
        .agg(mean="mean", std="std")
        .reset_index()
    )

agg_long_df = aggregate_long(df)



def fmt(x, y):
    if pd.isna(x):
        return "-"
    if pd.isna(y):
        y = 0
    return f"{x:.2f}$\scriptstyle{{\pm{y:.2f}}}$"

agg_long_df["mean_std"] = agg_long_df.apply(
    lambda row: fmt(row["mean"], row["std"]),
    axis=1
)

# 2) pivot so metrics become columns
pivot = (
    agg_long_df
    .pivot_table(
        index=["$N_{C}, N_{P}$", "k", "Method"],
        columns="metric",
        values="mean_std",
        aggfunc="first"
    )
    # order the columns
    .reindex(
        columns=[
            "$\mathbb{E}[{U_{C}}]$",
            "$\sigma(\mathbb{E}[U_{G}])$",
            "$\#(U_{P} < U_{P\min})/N_{P}$",
            "$\sum(V - U_{P})/N_{P^{b}}$",
            "$\#(A_{C} < k)/N_{C}$",
            "$\sum(k - A_{C})/N_{A_{c}^{b}}$",
        ]
    )
    # flatten the column Index
    .rename_axis(None, axis=1)
    .reset_index()

)

latex = pivot.to_latex(
    index=False,
    na_rep="",
    column_format="lll" + "r" * (pivot.shape[1] - 3),
    longtable=False,
    caption="Performance by method and metric",
    label="tab:results",
    escape=False,
    bold_rows=False,
    multicolumn=True,
)


  "$\mathbb{E}[{U_{C}}]$",
  "$\sigma(\mathbb{E}[U_{G}])$",
  "$\#(U_{P} < U_{P\min})/N_{P}$",
  "$\sum(V - U_{P})/N_{P^{b}}$",
  "$\#(A_{C} < k)/N_{C}$",
  "$\sum(k - A_{C})/N_{A_{c}^{b}}$",
  return f"{x:.2f}$\scriptstyle{{\pm{y:.2f}}}$"
  return f"{x:.2f}$\scriptstyle{{\pm{y:.2f}}}$"
  "$\mathbb{E}[{U_{C}}]$",
  "$\sigma(\mathbb{E}[U_{G}])$",
  "$\#(U_{P} < U_{P\min})/N_{P}$",
  "$\sum(V - U_{P})/N_{P^{b}}$",
  "$\#(A_{C} < k)/N_{C}$",
  "$\sum(k - A_{C})/N_{A_{c}^{b}}$",


In [11]:
# insert \resizebox{\columnwidth}{!}{
latex = re.sub(
    r"\\begin{tabular}",
    r"\\resizebox{\\columnwidth}{!}{\\begin{tabular}",
    latex
)
latex = re.sub(
    r"\\end{tabular}",
    r"\\end{tabular}}",
    latex
)
print(latex)

\begin{table}
\caption{Performance by method and metric}
\label{tab:results}
\resizebox{\columnwidth}{!}{\begin{tabular}{lllrrrrrr}
\toprule
$N_{C}, N_{P}$ & k & Method & $\mathbb{E}[{U_{C}}]$ & $\sigma(\mathbb{E}[U_{G}])$ & $\#(U_{P} < U_{P\min})/N_{P}$ & $\sum(V - U_{P})/N_{P^{b}}$ & $\#(A_{C} < k)/N_{C}$ & $\sum(k - A_{C})/N_{A_{c}^{b}}$ \\
\midrule
500 & 5 & Baseline & 0.95$\scriptstyle{\pm0.00}$ & 1.20$\scriptstyle{\pm0.00}$ & 0.00$\scriptstyle{\pm0.00}$ & - & 0.00$\scriptstyle{\pm0.00}$ & - \\
500 & 5 & Grad Round & 0.92$\scriptstyle{\pm0.00}$ & 1.19$\scriptstyle{\pm0.00}$ & 0.07$\scriptstyle{\pm0.02}$ & 1.00$\scriptstyle{\pm0.00}$ & 0.03$\scriptstyle{\pm0.01}$ & 1.13$\scriptstyle{\pm0.08}$ \\
500 & 5 & Grad TopK & 0.92$\scriptstyle{\pm0.00}$ & 1.20$\scriptstyle{\pm0.00}$ & 0.04$\scriptstyle{\pm0.00}$ & 1.00$\scriptstyle{\pm0.00}$ & 0.00$\scriptstyle{\pm0.00}$ & - \\
500 & 5 & LP Round & 0.95$\scriptstyle{\pm0.00}$ & 1.19$\scriptstyle{\pm0.00}$ & 0.05$\scriptstyle{\pm0.01}$ & 1.0