In [1]:
import gurobipy as gp
from gurobipy import GRB
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from collections import deque
from math import isclose
import random
import pickle
from time import time

## Input paramters

In [2]:
# input_file = f'./inputs/graph_n{n}_{conflictGraphType}_{mainProjectDist}_{secondaryProjectDist}.pickle'
# output_file = f'./results/graph_n{n}_lambda{lambdaToString}_{conflictGraphType}_{mainProjectDist}_{secondaryProjectDist}.pickle'
input_file = './bu_spark_2022/csDS519.pickle'

pipageSetting = "randomized"
# n = 100
# mainProjectDist = "trunct_exponential" # uniform or trunc_exponential
# secondaryProjectDist = "trunc_gaussian" # uniform or trunc_gaussian
# conflictGraphType = "friends" # friends or diversity
lambda_mul = 10
output_file = f'./bu_spark_results/csDS519_lambda{lambda_mul}.pickle'

times = dict() # dictionary to keep time for different stages
start = time()
# Read graph
with open(input_file, 'rb') as file:
    projects = pickle.load(file)
    capacities = pickle.load(file)
    numOfProjects = len(capacities)
    c = pickle.load(file)
    w = pickle.load(file)
    edges = pickle.load(file)
    assignment = pickle.load(file)

G = nx.Graph()
G.add_edges_from(edges)
nodes = list(G.nodes)
n = len(nodes)

lambda_ = lambda_mul * sum(w.values()) / n

print(f'number of nodes: {n}')
print(f'number of (conflict) edges: {len(edges)}')
print(f'number of projects: {numOfProjects}')
times["read_input"] = time() - start
# G_c = nx.complement(G) # friends graph
# nx.draw(G, pos=nx.spring_layout(G))

number of nodes: 28
number of (conflict) edges: 359
number of projects: 7


## Problem definition
The objective function is the following: \
$$
\max F(\mathbf{x}) = 
    \lambda \sum_{v \in V} \sum_{t=1}^k c_{vt} x_{vt}
    + \sum_{(u, v) \in E} w_{uv} (1 - \sum_{t=1}^k x_{ut}x_{vt})
$$
The constraints are:
- $\sum_{t=1}^k x_{ut} = 1$, for all $t \in [k]$
- $\sum_{u \in V} x_{ut} \leq p_t$, for all $u \in V$
- $x_{ut} \in \{0, 1\}$

In [3]:
start = time()

# Create a new model
m = gp.Model("quadratic")
m.Params.timeLimit = 120

# Create variables
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = m.addVar(vtype=GRB.BINARY, name=f"X({u},{p})")

# Linear (project preference) term
F1 = gp.LinExpr()
for u in nodes:
    for p in projects:
        if (u, p) in c:
            F1 += c[(u, p)] * X[(u, p)]
F1 *= lambda_
 
# max-cut term
F3 = dict()
F2 = gp.QuadExpr()
for e in edges:
    u = e[0]; v = e[1]
    F3[e] = gp.QuadExpr()
    for p in projects:
        F3[e] -=  X[(u, p)] * X[(v, p)]
    F3[e] += 1
    F3[e] *= w[(u, v)]
for e in edges:
    F2 += F3[e]
    
F = gp.QuadExpr()
F = F1 + F2
m.setObjective(F, GRB.MAXIMIZE)
    
# Add constraints

# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [X[(u, t)] for t in projects])
    m.addConstr(expr == 1)
    
# Project max capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [X[(u, p)] for u in nodes])
    m.addConstr(expr <= capacities[p])
    
times["construct_model"] = time() - start

Set parameter GURO_PAR_SPECIAL
Set parameter TokenServer to value "sccsvc"
Set parameter TimeLimit to value 120


In [4]:
projects

['86839', '29c21', '14ebd', 'fc925', 'f5c70', '902c6', '63a0e']

In [5]:
start = time()
# Optimize model
m.optimize()

# Convert solution to dictionary format
x_opt = dict()
for u in nodes:
    for p in projects:
        x_opt[(u, p)] = X[(u, p)].X
        
print(f'Obj quadratic: {m.ObjVal}')
times["optimize_quad"] = time() - start

Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: Intel(R) Xeon(R) Gold 6132 CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 28 physical cores, 28 logical processors, using up to 28 threads

Optimize a model with 35 rows, 196 columns and 392 nonzeros
Model fingerprint: 0xf6ce1ce0
Model has 2513 quadratic objective terms
Variable types: 0 continuous, 196 integer (196 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+01, 1e+02]
  QObjective range [2e+00, 2e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
Found heuristic solution: objective 1528.1476757
Presolve time: 0.01s
Presolved: 2548 rows, 2709 columns, 7931 nonzeros
Variable types: 0 continuous, 2709 integer (2709 binary)

Root relaxation: objective 3.169083e+03, 242 iterations, 0.01 seconds (0.01 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent   

## Pipage rounding

In [6]:
start = time()
# Define objective function
def f(x):
    res = 0
    # project preference term
    for u in nodes:
        for p in projects:
            if (u, p) in c:
                res += c[(u, p)] * x[(u, p)]
    res *= lambda_
    
    # conflict term
    for e in edges:
        u, v = e
        inner_sum = 0
        for p in projects:
            inner_sum += x[(u, p)] * x[(v, p)]
        res += w[(u, v)] * (1 - inner_sum)
    return res        

In [7]:
def construct_graph(x):
    H = nx.Graph()
    H.add_nodes_from(nodes, bipartite=0)
    H.add_nodes_from(projects, bipartite=1)
    edges = [(u, p) for u in nodes for p in projects if not isclose(x[(u, p)], 0) and not isclose(x[(u, p)], 1)]
    H.add_edges_from(edges)
    return H

def find_cycle(H):
    try:
        cycle = nx.find_cycle(H)
    except Exception:
        cycle = []
    return cycle

def find_path(graph):
    def dfs_with_backtracking(vertex, path):
        nonlocal max_path
        path.append(vertex)

        for neighbor in graph[vertex]:
            if neighbor not in path:
                dfs_with_backtracking(neighbor, path)

        if len(path) > len(max_path):
            max_path = path.copy()

        path.pop()

    max_path = []
    for start_vertex in graph.nodes:
        dfs_with_backtracking(start_vertex, [])
        if max_path:
            return list(zip(max_path, max_path[1:]))
    
    print("No path found...")
    return max_path

def format_path(R):
    R_new = deque()
    for e in R:
        u, p = e if e[0] in nodes else tuple(reversed(e)) # (student, project) edge
        R_new.append((u, p))
    return list(R_new)

def calc_eps(x, R):
    # Divide R into M1 and M2 matchings
    M1 = R[::2]
    M2 = R[1::2]
    
    # Calculate eps1, eps2
    eps1 = min(min([x[e] for e in M1]), min([1 - x[e] for e in M2]))
    eps2 = min(min([1 - x[e] for e in M1]), min([x[e] for e in M2]))
    
    return eps1, eps2, M1, M2

def remove_dec_error(x):
    for e in x:
        if isclose(x[e], 0):
            x[e] = 0
        elif isclose(x[e], 1):
            x[e] = 1
    return x

def step(x, eps, M1, M2):
    x_new = x.copy()
    for e in M1:
        x_new[e] += eps
    for e in M2:
        x_new[e] -= eps
    return x_new

def round(x, eps1, eps2, M1, M2):
    x1 = step(x, -eps1, M1, M2)
    x2 = step(x, eps2, M1, M2)
    if f(x1) > f(x2):
        return x1
    return x2

def rand_round(x, eps1, eps2, M1, M2):
    rand = rand = random.uniform(0, 1)
    if rand < eps1 / (eps1 + eps2):
        x_new = step(x, -eps1, M1, M2)
    else:
        x_new = step(x, eps2, M1, M2)
    return x_new
    
def clean_edges(x, H):
    integral_edges = [e for e in H.edges if isclose(x[e], 0) or isclose(x[e], 1)]
    H.remove_edges_from(integral_edges)
    return H
        
def pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = remove_dec_error(round(x, eps1, eps2, M1, M2))

    return x_new

def rand_pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = remove_dec_error(rand_round(x, eps1, eps2, M1, M2))
    return x_new

In [8]:
def pipage(x, setting="deterministic"):
    H = construct_graph(x)    
    numOfEdges = len(H.edges)
    iterations = 1
    while True:
        R = find_cycle(H)
        R = R if R else find_path(H)

        if R:
            x = pipage_help(x, R) if setting == 'determinisitc' else rand_pipage_help(x, R)
            clean_edges(x, H)
        else:
            return remove_dec_error(x)
        
        iterations += 1
        if iterations > numOfEdges + 1:
            print("exceeded numOfEdges iterations... Error")
            break
    return "error"

##  3/4 - approximation algorithm (with "balancing" assumption)
We consider the following relaxation of $F(\mathbf{x})$: \
$$
L(\mathbf{x}) = 
    \lambda \sum_{v \in V} \sum_{t=1}^k c_{vt} x_{vt} - w(E)
    + \sum_{(u, v) \in E} \sum_{t=1}^k w_{uv} \min(1, x_{ut} + x_{vt})
$$
and we relax the intregrality constraints to fractional constraints
$$
    0 \leq x_{ut} \leq 1
$$
Finally, we assume that the following "balancing" assumption holds for any feasible solution: \
$$
    \lambda \geq \frac{w(E)}{\sum_{v \in V} \sum_{t=1}^k c_{vt} x_{vt}}
            \geq \frac{w(E)}{|V|} = \frac{d_{avg}}{2}
$$

Note that $0 \leq c_{vt} \leq 1$.

In [9]:
start = time()
# Relax variables to the fractional domain
for u in nodes:
    for t in projects:
        X[(u, t)].vtype = GRB.CONTINUOUS

# Auxiliary variables
Z = dict()
S = dict()
for e in edges:
    u = e[0]
    v = e[1]
    for t in projects:
        Z[(u, v, t)] = m.addVar(vtype=GRB.CONTINUOUS, name=f"Z({u}, {v}, {t})")
        S[(u, v, t)] = m.addVar(vtype=GRB.CONTINUOUS, name=f"S({u}, {v}, {t})")
        m.addConstr(S[(u, v, t)] == X[(u, t)] + X[(v, t)])
        m.addConstr(Z[(u, v, t)] == gp.min_(S[(u, v, t)], constant = 1))

# Relaxed objective L(x)
L = gp.QuadExpr()

# Linear (project preference) term
for u in nodes:
    for p in projects:
        if (u, p) in c:
            L += c[(u, p)] * X[(u, p)]
L *= lambda_

# Max-cut term
for e in edges:
    u = e[0]; v = e[1]
    for p in projects:
        L += w[(u, v)] * Z[(u, v, p)]      
L -= sum(w.values())

m.setObjective(L, GRB.MAXIMIZE)    
times["relax_model34"] = time() - start

In [10]:
start = time()
# Optimize model
m.optimize()

# Convert solution to dictionary format
x_frac34 = dict()
for u in nodes:
    for p in projects:
        x_frac34[(u, p)] = X[(u, p)].X
        
print(f'Obj relaxation: {m.ObjVal}')
times["optimize_relaxation34"] = time() - start


# round solution
start = time()
x_round34 = pipage(x_frac34, pipageSetting)
obj_round34 = f(x_round34)
times["rounding34"] = time() - start

Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: Intel(R) Xeon(R) Gold 6132 CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 28 physical cores, 28 logical processors, using up to 28 threads

Optimize a model with 2548 rows, 5222 columns and 7931 nonzeros
Model fingerprint: 0x14b8723c
Model has 2513 general constraints
Variable types: 5222 continuous, 0 integer (0 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
  GenCon const rng [1e+00, 1e+00]

MIP start from previous solve produced solution with objective 3169.08 (0.05s)
Loaded MIP start from previous solve with objective 3169.08

Presolve removed 951 rows and 3500 columns
Presolve time: 0.01s
Presolved: 1597 rows, 1722 columns, 5012 nonzeros
Variable types: 1722 continuous, 0 integer (0 binary)

Root relaxation: cutoff, 1461 iterations, 0.01 seconds (0.01 work units)

    Nod

### 1/2 - approximation algorithm (no assumptions)

In [11]:
start = time()
# Create a new model
m2 = gp.Model("linear")
m2.Params.timeLimit = 120

# Create variables
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = m2.addVar(vtype=GRB.BINARY, name=f"X({u},{p})")

# Auxiliary variables
Z = dict()
S = dict()
for e in edges:
    u = e[0]
    v = e[1]
    Z[(u, v)] = m2.addVar(vtype=GRB.CONTINUOUS, name=f"Z({u}, {v})")
    for p in projects:
        S[(u, v, p)] = m2.addVar(vtype=GRB.CONTINUOUS, name=f"S({u}, {v}, {p})")
        m2.addConstr(S[(u, v, p)] == 2 - X[(u, p)] - X[(v, p)])

    m2.addConstr(Z[(u, v)] == gp.min_([S[(u, v, p)] for p in projects], constant = 1))
        
# Add constraints

# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [X[(u, t)] for t in projects])
    m2.addConstr(expr == 1)
    
# Project capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [X[(u, p)] for u in nodes])
    m2.addConstr(expr <= capacities[p])

# Linear (project preference) term
F1 = gp.LinExpr()
for u in nodes:
    for p in projects:
        if (u, p) in c:
            F1 += c[(u, p)] * X[(u, p)]
F1 *= lambda_

# max-cut (conflicts) term
F2 = dict()
F2 = gp.LinExpr()
for e in edges:
    F2 += w[e] * Z[e]
F = F1 + F2

times["relax_model12"] = time() - start

Set parameter TimeLimit to value 120


In [12]:
start = time()
m2.setObjective(F, GRB.MAXIMIZE)
m2.optimize()

# copy solution
x_frac12 = dict()
for u in nodes:
    for p in projects:
        x_frac12[(u, p)] = X[(u, p)].X
        
print(f'Obj relaxation: {m2.ObjVal}')

# round solution
x_round12 = pipage(x_frac12, pipageSetting)
obj_round12 = f(x_round12)
times["rounding12"] = time() - start

Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: Intel(R) Xeon(R) Gold 6132 CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 28 physical cores, 28 logical processors, using up to 28 threads

Optimize a model with 2548 rows, 3068 columns and 7931 nonzeros
Model fingerprint: 0x837fc8fe
Model has 359 general constraints
Variable types: 2872 continuous, 196 integer (196 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
  GenCon const rng [1e+00, 1e+00]
Presolve removed 0 rows and 2513 columns
Presolve time: 0.01s
Presolved: 2548 rows, 555 columns, 7931 nonzeros
Variable types: 0 continuous, 555 integer (555 binary)
Found heuristic solution: objective 1672.4747732
Found heuristic solution: objective 3123.3452381

Root relaxation: objective 3.169083e+03, 220 iterations, 0.01 seconds (0.01 work units)

    Nodes    |    Current Node    

In [13]:
obj_opt = f(x_opt)
approx12 = obj_round12 / obj_opt # approximation ratio
approx34 = obj_round34 / obj_opt 
print(f'rounded (3/4 approx) f(x) = {obj_round12}')
print(f'rounded (1/2 approx) f(x) = {obj_round34}')
print(f'optimal (integral) f(x) = {obj_opt}')
print(f'approximation ratio (3/4 approx) = {approx34}')
print(f'approximation ratio (1/2 approx) = {approx12}')

rounded (3/4 approx) f(x) = 3168.0833333333335
rounded (1/2 approx) f(x) = 3168.0833333333335
optimal (integral) f(x) = 3168.0833333333335
approximation ratio (3/4 approx) = 1.0
approximation ratio (1/2 approx) = 1.0


### Baselines

In [14]:
# Create a new model
m = gp.Model("quadratic")
m.Params.timeLimit = 120

# Create variables
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = m.addVar(vtype=GRB.BINARY, name=f"X({u},{p})")
        
# Add constraints

# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [X[(u, t)] for t in projects])
    m.addConstr(expr == 1)
    
# Project capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [X[(u, p)] for u in nodes])
    m.addConstr(expr <= capacities[p])

Set parameter TimeLimit to value 120


In [15]:
# Linear (project preference) term
F1 = gp.LinExpr()
for u in nodes:
    for p in projects:
        if (u, p) in c:
            F1 += c[(u, p)] * X[(u, p)]
F1 *= lambda_

# max-cut (conflicts) term
F3 = dict()
F2 = gp.QuadExpr()
for e in edges:
    u = e[0]; v = e[1]
    F3[e] = gp.QuadExpr()
    for p in projects:
        F3[e] -=  X[(u, p)] * X[(v, p)]
    F3[e] += 1
    F3[e] *= w[(u, v)]
for e in edges:
    F2 += F3[e]

In [16]:
start = time()
# optimize using only the project preferences term
m.setObjective(F1, GRB.MAXIMIZE)
m.optimize()

# copy solution
x_project = dict()
for u in nodes:
    for p in projects:
        x_project[(u, p)] = X[(u, p)].X
        
print(f'Obj project: {m.ObjVal}')
times["project_only"] = time() - start

# optimize using only the conflicts term
m.setObjective(F2, GRB.MAXIMIZE)
m.optimize()

# copy solution
x_conflict = dict()
for u in nodes:
    for p in projects:
        print(X[(u, p)])
        x_conflict[(u, p)] = X[(u, p)].X
        
print(f'Obj conflict: {m.ObjVal}')
times["conflict_only"] = time() - start

Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (linux64)

CPU model: Intel(R) Xeon(R) Gold 6132 CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 28 physical cores, 28 logical processors, using up to 28 threads

Optimize a model with 35 rows, 196 columns and 392 nonzeros
Model fingerprint: 0x0ffced79
Variable types: 0 continuous, 196 integer (196 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+01, 1e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
Found heuristic solution: objective 1207.1476757
Presolve time: 0.00s
Presolved: 35 rows, 196 columns, 392 nonzeros
Variable types: 0 continuous, 196 integer (196 binary)

Root relaxation: objective 2.842083e+03, 28 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

*    0     0               0    2842.0833333 2842.08

### Save results

In [17]:
results = dict()
results["quad_sol"] = x_opt
results["quad_val"] = obj_opt
results["frac_sol34"] = x_frac34
results["frac_sol12"] = x_frac12
results["rounded_sol34"] = x_round34
results["rounded_val34"] = obj_round34
results["rounded_sol12"] = x_round12
results["rounded_val12"] = obj_round12
results["project_sol"] = x_project
results["project_val"] = f(x_project)
results["conflict_sol"] = x_conflict
results["conflict_val"] = f(x_conflict)

settings = dict()
settings["nodes"] = nodes
settings["edges"] = edges
settings["w"] = w
settings["c"] = c
settings["capacities"] = capacities
settings["projects"] = projects
settings["lambda_mul"] = lambda_mul
settings["lambda_"] = lambda_


In [18]:
with open(output_file, 'wb') as file:
    pickle.dump(settings, file)
    pickle.dump(edges, file) # conflict edges
    pickle.dump(results, file)
    pickle.dump(times, file)
    file.close()