In [None]:
#@title Install Gurobi
!pip install gurobipy

# Implementation with Multiprocessing (Process + Queue)

## Functions

In [None]:
# Libraries
import multiprocessing
import time
import numpy as np
from itertools import combinations
from gurobipy import GRB, Model
from collections import deque
import pandas as pd
import json
from google.colab import drive
import operator
drive.mount('/content/drive')

In [None]:
### Helper functions

# Perform topological sort
# Returns a list of node indices in topological order. If a cycle is detected, returns an empty list.
def topological_sort(graph):
    in_degree = {u: 0 for u in graph}
    for u in graph:
        for v in graph[u]:
            in_degree[v] += 1

    queue = deque([u for u in in_degree if in_degree[u] == 0])
    top_order = []

    while queue:
        u = queue.popleft()
        top_order.append(u)
        for v in graph[u]:
            in_degree[v] -= 1
            if in_degree[v] == 0:
                queue.append(v)

    if len(top_order) != len(in_degree):
        return []  # Cycle detected, not a valid ordering
    return top_order

# Get the ordering of products based on the sign representation of a polyhedron
# returns the topological sort (linear ordering) of the products
def get_product_ordering(n, poly_rep):
    """
    n: Number of products
    poly_rep: A sign vector (+1, -1) defining the orientation of each hyperplane.

    returns a list, which is the topological ordering of the products based on the polyhedron representation.
    """
    graph = {i: [] for i in range(n)}
    index = 0
    for s, t in combinations(range(n), 2):
        direction = poly_rep[index]
        if direction == 1:
            graph[s].append(t)
        elif direction == -1:
            graph[t].append(s)
        index += 1
    return topological_sort(graph)

# Checks if a polyhedron, defined by poly_rep, is full-dimensional using a linear program (LP).
def full_dimensionality_test(model, poly_rep, a_array, m):
    """
    model: Gurobi model object
    poly_rep: A sign vector (+1, -1) defining the orientation of each hyperplane.
    a_array: The array of normal vectors for all hyperplanes.
    m: Number of hyperplanes.
    """
    is_full_dimensional = True
    # -diag(poly_rep) @ a_array @ x + epsilon <= 0
    A_coeff = -np.diag(poly_rep) @ a_array                    # Create the coefficient matrix for the constraints, each constraint corresponds to a hyperplane.
    A_coeff_full = np.hstack([A_coeff, np.ones((m, 1))])
    model_copy = model.copy()
    vars_array = model_copy.getVars()
    model_copy.addMConstr(A_coeff_full, vars_array, GRB.LESS_EQUAL, np.zeros(m))
    model_copy.update()
    model_copy.optimize()

    # The polyhedron is not full-dimensional if the LP is infeasible or the optimal objective is 0.
    if model_copy.status == GRB.INFEASIBLE or (model_copy.status == GRB.OPTIMAL and model_copy.ObjVal == 0):
        is_full_dimensional = False

    # returns True if the polyhedron is full-dimensional, False otherwise.
    return is_full_dimensional

# Generates the sign vector for a neighboring polyhedron by flipping the sign corresponding to the hyperplane between two adjacent items in the ordering.
def get_new_poly_rep(poly_rep, ordering, first_pos, a_array_index):
    """
    poly_rep (array): A sign vector (+1, -1) defining the orientation of each hyperplane.
    ordering (list): The current ordering of products.
    first_pos: The index of the first product in the adjacent pair to be swapped.
    a_array_index: A mapping from a product pair (i, j) to its hyperplane index.

    returns the new sign vector
    """
    element1, element2 = ordering[first_pos], ordering[first_pos + 1]
    rep_index = a_array_index[(element1, element2)]
    new_poly_rep = poly_rep.copy()
    new_poly_rep[rep_index] = -new_poly_rep[rep_index]
    return new_poly_rep

# Generates a new product ordering by swapping two adjacent elements.
def get_new_ordering(ordering, first_pos):
    if 0 <= first_pos <= len(ordering) - 2:
        new_ordering = ordering.copy()
        new_ordering[first_pos], new_ordering[first_pos + 1] = new_ordering[first_pos + 1], new_ordering[first_pos]
        return new_ordering
    else:
        print("Invalid index")

# Converts a polyhedron sign vector (e.g., [1, -1, 1, 1]) into a compact integer bitmask.
def polyhedron_to_bitmask(arr: np.ndarray) -> int:
    bitmask = 0
    for idx, val in enumerate(arr):
        if val == -1:
            bitmask |= 1 << idx  # Set bit at position idx if value is -1
    return bitmask

In [None]:
# Sets up a complete problem instance with n products.
# Includes generating random revenues, costs, and attractiveness values, and defining the multi-objective pseudo-revenue vectors and hyperplanes.
def initialize_parameters(n, rev_range, c_range, v_range, seed):
 # Core parameters
    params = {}
    params['n'] = n
    params['m'] = int(n*(n-1)/2)  # Number of hyperplanes = C(n,2)
    params['d'] = 3               # Dimension of pseudo-revenue vectors (number of objectives)
    params['half_n'] = n // 2     # Half the products for cost splitting

    # Generate random parameters
    np.random.seed(seed)
    params['rev_Init'] = np.random.uniform(*rev_range, n)   # Raw revenue vector
    params['cost_Init'] = np.random.uniform(*c_range, n)        # Raw cost vector
    params['v_Init'] = np.random.uniform(*v_range, n)     # Raw attractiveness vector


    # Sort by descending revenue
    sort_idx = params['rev_Init'].argsort()[::-1]  # Descending order indices
    params['rev'] = params['rev_Init'][sort_idx]   # Sorted revenue vector
    params['cost'] = params['cost_Init'][sort_idx] # Aligned cost vector
    params['v'] = params['v_Init'][sort_idx]       # Aligned attractiveness vector
    params['sort_indices'] = sort_idx              # Preserve sorting indices

    # Split costs into two groups
    permuted_idx = np.random.permutation(n)
    params['group1_idx'] = permuted_idx[:params['half_n']]  # First group indices
    params['group2_idx'] = permuted_idx[params['half_n']:]  # Second group indices

    # Initialize cost arrays with original positions maintained
    params['cost1'] = np.zeros_like(params['cost'])
    params['cost2'] = np.zeros_like(params['cost'])
    params['cost1'][params['group1_idx']] = params['cost'][params['group1_idx']]
    params['cost2'][params['group2_idx']] = params['cost'][params['group2_idx']]

# Create pseudo-revenue matrix
    # Each row: [revenue, cost1, cost2] for a product
    params['coeff'] = np.column_stack((params['rev'], params['cost1'], params['cost2']))

#Generate hyperplane
    params['a_array'] = []
    params['a_array_index'] = {}

    # Create hyperplane from all product pairs
    for idx, (i,j) in enumerate(combinations(range(n), 2)):
        # Hyperplane normal = difference between pseudo-revenue vectors
        params['a_array'].append(params['coeff'][i] - params['coeff'][j])

        #Create bidirectional index mapping (i,j) <-> (j,i)
        params['a_array_index'][(i,j)] = idx
        params['a_array_index'][(j,i)] = idx

    params['a_array'] = np.array(params['a_array'])  # Convert to numpy array

    return params

In [None]:
# worker function for each parallel process.
def consumer(state_queue, visited_set, candidate_assortments, task_counter, num_consumers, a_array, a_array_index, m, model, full_d_count):
    while True:
        item = state_queue.get()
        if item==-1:
          "Terminate here."
          break

        poly_rep, ordering = item
        n = len(ordering)
        # Explore neighbors by swapping every adjacent pair in the current ordering.
        for first_pos in range(n - 1):
            neighbor_rep = get_new_poly_rep(poly_rep, ordering, first_pos, a_array_index)
            neighbor_rep_mask = polyhedron_to_bitmask(neighbor_rep)

            # Check if this neighbor has already been visited to avoid redundant work.
            if neighbor_rep_mask not in visited_set:
                visited_set[neighbor_rep_mask] = True
                test = full_dimensionality_test(model, neighbor_rep, a_array, m)  # Perform the expensive full-dimensionality test.
                if test:                                                          # If it's a valid new state, add it to the queue for further exploration.
                    new_ordering = get_new_ordering(ordering, first_pos)
                    state_queue.put((neighbor_rep, new_ordering))
                    with task_counter.get_lock():
                        task_counter.value += 1
                    candidate_assortments.append(frozenset(new_ordering[:first_pos + 1]))   # From the new ordering, generate a candidate assortment.
                    with full_d_count.get_lock():
                      full_d_count.value += 1

        # all neighbors are checked, decrement the counter.
        with task_counter.get_lock():
            task_counter.value -= 1
            # If the counter reaches 0, it means the entire state space has been explored.
            # send termination signals to all other workers.
            if task_counter.value == 0:
              for _ in range(num_consumers):
                state_queue.put(-1)

In [None]:
# Run the entire parallel breadth-first search of the polyhedron state space.
def run_experiment_multiprocessing(initial_points, num_consumers, a_array, a_array_index, n, m):
    """
    initial_points: List of initial points in the polyhedron state space.
    num_consumers: Number of parallel processes to run.
    a_array: Array of hyperplane normal vectors.
    a_array_index: Mapping from product pairs to hyperplane indices.
    n: Number of products.
    m: Number of hyperplanes.
    """
    start_time = time.time()
    with multiprocessing.Manager() as manager:
        state_queue = multiprocessing.Queue()
        visited_set = manager.dict()                      #  Shared dictionary to track visited states.
        candidate_assortments = manager.list()            # Shared list to store candidate assortments.
        full_d_count = multiprocessing.Value('i', 0)      # Global counter for full-dimensional polyhedra found.
        task_counter = multiprocessing.Value('i', 0)      # Global counter for active tasks in the queue.

        model = Model("LP")
        model.setParam("OutputFlag", 0)
        model.setParam('Presolve', 1)
        x = model.addMVar(3, lb=-GRB.INFINITY, ub=GRB.INFINITY, name="x")
        epsilon = model.addVar(lb=-GRB.INFINITY, ub=1, name="epsilon")
        model.setObjective(epsilon, GRB.MAXIMIZE)
        model.update()

        for point in initial_points:
            # Initialize the polyhedra corresponding to initial points, compute the ordering of products in it and convert it to bitmask.
            poly_rep = np.sign(np.dot(a_array, point))        
            ordering = get_product_ordering(n,poly_rep)
            bit_mask = polyhedron_to_bitmask(poly_rep)

            # if the focal polyhedron has not been visited yet, add it to the queue and mark it as visited.
            if bit_mask not in visited_set:
                visited_set[bit_mask] = True
                state_queue.put((poly_rep, ordering))
                with task_counter.get_lock():
                    task_counter.value += 1
            # Generate initial candidate assortments from the first ordering.
            current_elements = set()          # If not visited, take first k (k=1...n) products as an assortment and add to the candidates
            for element in ordering:
                current_elements.add(element)
                candidate_assortments.append(frozenset(current_elements))

        processes = []
        for i in range(num_consumers):
            p = multiprocessing.Process(
                target=consumer,
                args=(state_queue, visited_set, candidate_assortments, task_counter, num_consumers, a_array, a_array_index, m, model, full_d_count)
            )
            processes.append(p)   
            p.start()
        # Wait for all consumer processes to complete their work and terminate.
        for p in processes:
            p.join()

      # Retrieve the final results
        visited_set_final = dict(visited_set)  # Copy shared dictionary to local
        candidate_assortments_final = set(candidate_assortments)  # Copy shared list to local

        end_time = time.time()
        SPBFS_total_time = end_time - start_time
        return {
          "visited_set": visited_set_final,
          "candidate_assortments": candidate_assortments_final,
          "full_d_count": full_d_count.value,
          "product #": n,
          "visited_P_#": len(visited_set),
          "candidate_assortments_#": len(candidate_assortments_final),
          "SPBFS_Total_time": SPBFS_total_time                           # LP_total_time + Initialization_total_time = Total time of SPBFS
          }

In [None]:
def dict_to_vertical_df(d):
    """
    Converts a dictionary `d` into a DataFrame where:
      - Each key in `d` becomes a DataFrame column.
      - If the value is scalar, it is repeated on every row.
      - If the value is a list or ndarray, its elements are placed row by row.
    The number of rows = max length among all list/array values.
    Shorter lists or scalars get None in the extra rows.

    Returns a pandas DataFrame.
    """
    # 1) Determine how many rows we need
    max_len = 1
    for v in d.values():
        if isinstance(v, (list, np.ndarray)):
            max_len = max(max_len, len(v))

    # 2) Create a data dict with columns for each key,
    #    all initialized to None
    data = {k: [None]*max_len for k in d.keys()}

    # 3) Fill column by column
    for key, val in d.items():
        if isinstance(val, (list, np.ndarray)):
            # Place each element vertically in the column
            for i, element in enumerate(val):
                data[key][i] = element
        else:
            # It's scalar, so repeat the same value in every row
            for i in range(max_len):
                data[key][i] = val

    # 4) Build the DataFrame
    df = pd.DataFrame(data)
    return df

def expand_candidate_assortment(results, assortment_key="candidate_assortments"):
    # 1) Separate out the candidate assortment
    candidate_assortment = results.get(assortment_key, frozenset())

    # 2) Copy everything else into 'common_fields'
    #    so we don't lose it
    common_fields = {
        k: v for k, v in results.items() if k != assortment_key
    }

    # 3) Build a list of row dicts
    rows = []
    for sub_fs in candidate_assortment:
        # Each 'sub_fs' is one of the frozensets in the parent frozenset
        row = dict(common_fields)
        # Convert the sub-frozenset to a list (or you can keep it as string)
        row[assortment_key] = list(sub_fs)
        rows.append(row)

    # 4) Convert to a DataFrame
    df = pd.DataFrame(rows)
    return df

## Runing

In [None]:
# Main
if __name__ == "__main__":
    google_drive_dir = "/content/drive/MyDrive/my_experiments"

    n = 10
    num_initial_points = 16
    num_consumers = multiprocessing.cpu_count()
    rev_range = (0.5, 1)                # Revenue value range
    c_range = (0.5, 1)                  # Cost value range
    v_range = (0, 1)

    total_runs = 50                     # total number of independent experiments to run for the given n, each run uses a different random seed
    runs_per_group = 50                 # how many individual runs are in a single group
                                        # reduce the number and run with different `group_index` below if having timeout issues in Google Colab
    total_groups = total_runs // runs_per_group  # total number of groups to run for the given n, each group runs in parallel
    all_seeds = list(range(total_runs))
    
    # which slice of the group to run, MANUALLY CHANGE THIS VALUE TO RUN DIFFERENT GROUPS (to avoid timeout issues)
    group_index = 0              # 0,1,2,3,4,5,6,7,8,9

    # use the group_index to select the specific slice of seeds for the current execution
    start = group_index * runs_per_group
    end = start + runs_per_group
    group_seeds = all_seeds[start:end]

    for run_seed in group_seeds:
      # Initialize all parameters for the current run using its unique seed.
      params = initialize_parameters(n, rev_range, c_range, v_range, seed = run_seed)        # use the index of run as random seed
      a_array, a_array_index, m = params['a_array'], params['a_array_index'], params['m']

      #Generate random starting points and run the parallel search algorithm
      initial_points = [np.random.randn(3)*10 for _ in range(num_initial_points)]
      results = run_experiment_multiprocessing(initial_points, num_consumers, a_array, a_array_index, n, m)

      # Save and organize parameters and results
      keys_to_extract_params = ['rev', 'cost', 'v', 'group1_idx', 'group2_idx', 'cost1', 'cost2']
      getter_params = operator.itemgetter(*keys_to_extract_params)
      vals_params = getter_params(params)
      params_record = dict(zip(keys_to_extract_params, vals_params))

      # Convert the records into DataFrames formatted for CSV export.
      keys_to_extract_results = ['candidate_assortments', 'full_d_count', 'product #', 'visited_P_#', 'candidate_assortments_#', 'SPBFS_Total_time']
      getter_results = operator.itemgetter(*keys_to_extract_results)
      vals_results = getter_results(results)
      results_record = dict(zip(keys_to_extract_results, vals_results))

      df_params = dict_to_vertical_df(params_record)
      df_results = expand_candidate_assortment(results_record)

      params_filename = f"{google_drive_dir}/n={n}_seed_{run_seed}_params.csv"
      results_filename = f"{google_drive_dir}/n={n}_seed_{run_seed}_results.csv"
      df_results.to_csv(results_filename, index=False)
      df_params.to_csv(params_filename, index=False)

    print("Done. Group", group_index, "saved to Google Drive.")

