In [None]:
import numpy as np
import json
import cvxpy as cp
from pathlib import Path
from collections import defaultdict
import pandas as pd
import math

from src.problems.utils import sample_data_for_group
from src.problems.problems import compute_producer_optimal_solution, _compute_consumer_optimal_solution_cvar_relaxed_base, _compute_consumer_optimal_solution_cvar

In [2]:
DATA_PATH_ROOT = Path("../../data")

In [3]:
# load data
with open(DATA_PATH_ROOT / "amazon_predictions.npy", "rb") as f:
    REL_MATRIX = np.load(f)

with open(DATA_PATH_ROOT / "amazon_user_groups.json", "r") as f:
    GROUPS_MAP = json.load(f)

In [4]:
N_CONSUMERS = 300
N_PRODUCERS = 300
GROUP_KEY = "top_category"
K_REC = 10
SOLVER = cp.SCIP

In [5]:
def get_sample_and_utils(
    n_consumers,
    n_producers,
    groups_map,
    group_key,
    data,
    solver,
    seed,
    k_rec
):
    rel_matrix_sampled, consumer_ids, group_assignments = sample_data_for_group(
        n_consumers=n_consumers,
        n_producers=n_producers,
        groups_map=groups_map,
        group_key=group_key,
        data=data,
        naive_sampling=True,
        seed=seed,
    )

    producer_max_min_utility, _ = compute_producer_optimal_solution(
        rel_matrix=rel_matrix_sampled,
        k_rec=k_rec,
        solver=solver,
    )

    return rel_matrix_sampled, consumer_ids, group_assignments, producer_max_min_utility

In [6]:
def top_k_allocations(allocations, k_rec):
    idxs = allocations.argsort(axis=1)[:, -k_rec:]
    alls = np.zeros_like(allocations)
    alls[np.arange(allocations.shape[0])[:, None], idxs] = 1

    return alls

def naive_allocation(allocations):
    return np.round(allocations, 1).astype(int)

In [None]:
def compute_correlation(a, b):
    a = a.flatten()
    b = b.flatten()

    a_mean = np.mean(a)
    b_mean = np.mean(b)

    numerator = np.sum((a - a_mean) * (b - b_mean))
    denominator = np.sqrt(np.sum((a - a_mean) ** 2) * np.sum((b - b_mean) ** 2))

    return numerator / denominator

In [None]:
gamma = 0.5
alpha = 0.95
default_k_rec = K_REC  # whatever K_REC was set to previously

results = defaultdict(list)

# define each experiment in one place
experiments = [
    {
        'name': 'vary_consumers',
        'n_consumers': [100, 250, 400, 600, 800, 1000],
        'n_producers': [100],
        'k_rec_vals': [default_k_rec],
        'runs': 1
    },
    {
        'name': 'vary_producers',
        'n_consumers': [100],
        'n_producers': [100, 250, 400, 600, 800, 1000],
        'k_rec_vals': [default_k_rec],
        'runs': 3
    },
    {
        'name': 'vary_k',
        'n_consumers': [300],
        'n_producers': [300],
        'k_rec_vals': [1, 2, 5, 10, 20, 30, 50],
        'runs': 3
    }
]

for exp in experiments:
    # pull out lists or singletons
    n_consumers_list = exp["n_consumers"]
    n_producers_list = exp["n_producers"]
    k_rec_list       = exp["k_rec_vals"]
    name = exp['name']
    for n_consumers in n_consumers_list:
        for n_producers in n_producers_list:
            for k_rec in k_rec_list:
                for run in range(exp['runs']):
                    seed = run if exp['runs'] > 1 else None

                    # 1) sample your data & get the guaranteed utility
                    rel_matrix, consumer_ids, group_assignments, producer_max_min_utility = \
                        get_sample_and_utils(
                            n_consumers=n_consumers,
                            n_producers=n_producers,
                            groups_map=GROUPS_MAP,
                            group_key=GROUP_KEY,
                            data=REL_MATRIX,
                            solver=SOLVER,
                            seed=seed,
                            k_rec=k_rec  # if your util function needs it
                        )
                    producer_max_min_utility = int(producer_max_min_utility)

                    _, true_alloc = _compute_consumer_optimal_solution_cvar(
                        rel_matrix=rel_matrix,
                        k_rec=k_rec,
                        producer_max_min_utility=producer_max_min_utility,
                        gamma=gamma,
                        group_assignments=group_assignments,
                        alpha=alpha,
                    )

                    # 2) solve the CVaR-relaxed allocation
                    problem_value, allocations = _compute_consumer_optimal_solution_cvar_relaxed_base(
                        rel_matrix, k_rec, producer_max_min_utility,
                        gamma, group_assignments,
                        alpha=alpha, solver=SOLVER
                    )
                    min_util = math.ceil(producer_max_min_utility * gamma)

                    # 3a) top-k
                    topk_alloc = top_k_allocations(allocations, k_rec).sum(axis=0)
                    corr = compute_correlation(topk_alloc, true_alloc)

                    below = np.sum(topk_alloc < min_util)
                    mean_diff = np.mean(min_util - topk_alloc[topk_alloc < min_util]) \
                                if below > 0 else 0.0
                    results['top_k'].append({
                        'experiment': name,
                        'n_consumers': n_consumers,
                        'n_producers': n_producers,
                        'k_rec': k_rec,
                        'gamma': gamma,
                        'alpha': alpha,
                        'producers_below_threshold': below,
                        'mean_diff': mean_diff,
                        'correlation': corr
                    })

                    # 3b) naive “round‐robin”
                    round_alloc = naive_allocation(allocations).sum(axis=0)
                    corr = compute_correlation(round_alloc, true_alloc)
                    below = np.sum(round_alloc < min_util)
                    mean_diff = np.mean(min_util - round_alloc[round_alloc < min_util]) \
                                if below > 0 else 0.0
                    results['round'].append({
                        'experiment': name,
                        'n_consumers': n_consumers,
                        'n_producers': n_producers,
                        'k_rec': k_rec,
                        'gamma': gamma,
                        'alpha': alpha,
                        'producers_below_threshold': below,
                        'mean_diff': mean_diff,
                        'correlation': corr
                    })

topk_df  = pd.DataFrame(results['top_k'])
round_df = pd.DataFrame(results['round'])

topk_df["method"] = "top_k"
round_df["method"] = "round"
df = pd.concat([topk_df, round_df], axis=0)

df.to_csv(
    DATA_PATH_ROOT / "exps.csv",
    index=False
)