In [1]:
import gurobipy as gp
from gurobipy import GRB
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from collections import deque
from math import isclose
import random
import pickle
from time import time

## Input paramters

In [2]:
input_file = './diversity/DEI.pickle'
pipageSetting = "randomized"
timeLimit = 4000

env = gp.Env(empty=True)
# env.setParam("Threads", 16)
env.setParam("OutputFlag",0) # suppress gurobi console output
env.start()

times = dict() # dictionary to keep time for different stages
start = time()
# Read graph
with open(input_file, 'rb') as file:
    projects = pickle.load(file)
    capacities = pickle.load(file)
    numOfProjects = len(capacities)
    c = pickle.load(file)
    w = pickle.load(file)
    edges = pickle.load(file)
    nodes = pickle.load(file)
    assignment = pickle.load(file)

G = nx.Graph()
G.add_edges_from(edges)
n = len(nodes)

print(f'number of nodes: {n}')
print(f'number of (conflict) edges: {len(edges)}')
print(f'number of projects: {numOfProjects}')
print(f'projects {projects}')
times["read_input"] = time() - start

number of nodes: 4000
number of (conflict) edges: 10166
number of projects: 4
projects ['PR', 'IT', 'Sales', 'HR']


In [3]:
capacities

{'PR': 1000, 'IT': 1000, 'Sales': 1000, 'HR': 1000}

In [4]:
len(c)

16000

### Helper functions

In [5]:
# Define objective function
def f(x):
    res = 0
    # project preference term
    for u in nodes:
        for p in projects:
            if (u, p) in c and (u, p) in x:
                res += c[(u, p)] * x[(u, p)]
    res *= lambda_
    
    # conflict term
    for e in edges:
        u, v = e
        inner_sum = 0
        for p in projects:
            if (u, p) in x and (v, p) in x:
                inner_sum += x[(u, p)] * x[(v, p)]
        res += 1 * (1 - inner_sum)
    return res

# Pipage rounding
def construct_graph(x):
    H = nx.Graph()
    H.add_nodes_from(nodes, bipartite=0)
    H.add_nodes_from(projects, bipartite=1)
    edges = [(u, p) for u in nodes for p in projects if (u, p) in x and not isclose(x[(u, p)], 0) and not isclose(x[(u, p)], 1)]
    H.add_edges_from(edges)
    return H

def find_cycle(H):
    try:
        cycle = nx.find_cycle(H)
    except Exception:
        cycle = []
    return cycle

def find_path(graph):
    def dfs_with_backtracking(vertex, path):
        nonlocal max_path
        path.append(vertex)

        for neighbor in graph[vertex]:
            if neighbor not in path:
                dfs_with_backtracking(neighbor, path)

        if len(path) > len(max_path):
            max_path = path.copy()

        path.pop()

    max_path = []
    for start_vertex in graph.nodes:
        dfs_with_backtracking(start_vertex, [])
        if max_path:
            return list(zip(max_path, max_path[1:]))
    
    print("No path found...")
    return max_path

def format_path(R):
    R_new = deque()
    for e in R:
        u, p = e if e[0] in nodes else tuple(reversed(e)) # (student, project) edge
        R_new.append((u, p))
    return list(R_new)

def calc_eps(x, R):
    # Divide R into M1 and M2 matchings
    M1 = R[::2]
    M2 = R[1::2]
    
    # Calculate eps1, eps2
    eps1 = min(min([x[e] for e in M1]), min([1 - x[e] for e in M2]))
    eps2 = min(min([1 - x[e] for e in M1]), min([x[e] for e in M2]))
    
    return eps1, eps2, M1, M2

def remove_dec_error(x):            
    #only keep the keys corresponding to value 1
    x_new = dict()
    for e in x:
        if isclose(x[e], 1):
            x_new[e] = 1
    return x_new

def step(x, eps, M1, M2):
    x_new = x.copy()
    for e in M1:
        x_new[e] += eps
    for e in M2:
        x_new[e] -= eps
    return x_new

def round(x, eps1, eps2, M1, M2):
    x1 = step(x, -eps1, M1, M2)
    x2 = step(x, eps2, M1, M2)
    if f(x1) > f(x2):
        return x1
    return x2

def rand_round(x, eps1, eps2, M1, M2):
    rand = rand = random.uniform(0, 1)
    if rand < eps1 / (eps1 + eps2):
        x_new = step(x, -eps1, M1, M2)
    else:
        x_new = step(x, eps2, M1, M2)
    return x_new
    
def clean_edges(x, H):
    integral_edges = [e for e in H.edges if isclose(x[e], 0) or isclose(x[e], 1)]
    H.remove_edges_from(integral_edges)
    return H
        
def pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = round(x, eps1, eps2, M1, M2)
    return x_new

def rand_pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = rand_round(x, eps1, eps2, M1, M2)
    return x_new

def pipage(x, setting="deterministic"):
    H = construct_graph(x)    
    numOfEdges = len(H.edges)
    iterations = 1
    while True:
        R = find_cycle(H)
        R = R if R else find_path(H)
        if R:
            x = pipage_help(x, R) if setting == 'deterministic' else rand_pipage_help(x, R)
            clean_edges(x, H)
        else:
            return remove_dec_error(x)
        
        iterations += 1
        if iterations > numOfEdges + 1:
            print("exceeded numOfEdges iterations... Error")
            break
    return "error"

def copy_solution(sol):
    x = dict()
    for u in nodes:
        for p in projects:
            if sol[(u, p)].X != 0:
                x[(u, p)] = sol[(u, p)].X
    return x

In [6]:
# Helper function
def teams_to_x(teams):
    x = dict()
    for t in teams:
        team = teams[t]
        for u in team:
            x[(u, t)] = 1
    return x

def baselines():
    # Random
    teams_random = {t: [] for t in projects}
    for u in nodes:
        for t in teams_random:
            if len(teams_random[t]) < capacities[t]:
                teams_random[t].append(u)
    x_random = teams_to_x(teams_random)
    
    # Greedy
    teams_greedy = {t: [] for t in projects}
    for u in nodes:
        t_max = -1
        increase = -2**60
        for t in teams_greedy:
            x_greedy = teams_to_x(teams_greedy)
            x_tmp = teams_to_x(teams_greedy)
            if len(teams_greedy[t]) < capacities[t]:
                x_tmp[(u, t)] = 1
                if f(x_tmp) - f(x_greedy) > increase:
                    increase = f(x_tmp) - f(x_greedy)
                    t_max = t
        teams_greedy[t_max].append(u)
    x_greedy = teams_to_x(teams_greedy)
    
    return x_random, x_greedy

def run_experiments():
    # ===== Quadratic algoritm ======
    print(f'Running the quadratic algorithm for lambda = {lambda_mul} ...')
    F = lambda_ * F1 + F2
    m.setObjective(F, GRB.MAXIMIZE)

    start = time()
    m.optimize()
    x_opt = copy_solution(X)
    times["optimize_quad"] = time() - start
    print(f'optimize_quad time {times["optimize_quad"]}')
    
    # ==== 3/4 pipage rounding algorithm
    print(f'Running the 3/4 pipage algorithm for lambda = {lambda_mul}')
    start = time()
    W = lambda_ * W1 + W2
    m3.setObjective(W, GRB.MAXIMIZE)

    m3.optimize()
    x_frac34 = copy_solution(N)
    x_round34 = pipage(x_frac34, pipageSetting)
    times["optimize_relaxation34"] = time() - start
    
    # ===== 1/2 pipage rounding algorithm
    print(f'Running the 1/2 pipage algorithm for lambda = {lambda_mul}')
    start = time()
    L = lambda_ * L1 + L2
    m2.setObjective(L, GRB.MAXIMIZE)
    
    m2.optimize()
    x_frac12 = copy_solution(Y)
    x_round12 = pipage(x_frac12, pipageSetting)
    times["optimize_relaxation12"] = time() - start
    print(f'optimize_relaxation12 time {times["optimize_relaxation12"]}')
    
    # ==== Baselines =====
    x_random, x_greedy = baselines()
    
    print(f'Saving results for lambda = {lambda_mul}')
    # ==== Save results ======
    results = dict()
    results["quad_sol"] = x_opt
    results["rounded_sol34"] = x_round34
    results["rounded_sol12"] = x_round12
    results["random_sol"] = x_random
    results["greedy_sol"] = x_greedy

    settings = dict()
    settings["lambda_mul"] = lambda_mul
    settings["lambda_"] = lambda_
    
    with open(output_file, 'wb') as file:
        pickle.dump(settings, file)
        pickle.dump(results, file)
        pickle.dump(times, file)
        file.close()

### Quadratic model

In [7]:
# Create a new model
print(f'Creating quadratic model ...')
m = gp.Model("quadratic", env=env)
m.Params.timeLimit = timeLimit

# Create variables
print(f'Creating variables X[(u, p)] ...')
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = m.addVar(vtype=GRB.BINARY, name=f"X({u},{p})")

# Linear (project preference) term
print(f'Creating objective ...')
F1 = gp.LinExpr()
for u in nodes:
    for p in projects:
        if (u, p) in c:
            F1 += c[(u, p)] * X[(u, p)]

# max-cut term
F3 = dict()
F2 = gp.QuadExpr()
for e in edges:
    u = e[0]; v = e[1]
    F3[e] = gp.QuadExpr()
    for p in projects:
        F3[e] -=  X[(u, p)] * X[(v, p)]
    F3[e] += 1
    F3[e] *= w[(u, v)]
for e in edges:
    F2 += F3[e]
    
# Add constraints
print(f'Adding constraints ...')
# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [X[(u, t)] for t in projects])
    m.addConstr(expr == 1)

# Project max capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [X[(u, p)] for u in nodes])
    m.addConstr(expr <= capacities[p])

print(f'Done creating quadratic model.')

Creating quadratic model ...
Creating variables X[(u, p)] ...
Creating objective ...
Adding constraints ...
Done creating quadratic model.


### 1/2 pipage model

In [8]:
print(f'Creating model for 1/2 pipage algorithm ...')
start = time()
m2 = gp.Model("linear", env=env)
# m2.Params.timeLimit = timeLimit

print(f'Creating variables Y[(u, p)] ...')
Y = dict()
for u in nodes:
    for p in projects:
        Y[(u, p)] = m2.addVar(lb=0.0, ub=1.0, vtype=GRB.CONTINUOUS, name=f"Y({u},{p})")

print(f'Creating auxiliary variables ...')
# Auxiliary variables
Z = dict()
S = dict()
for e in edges:
    u = e[0]
    v = e[1]
    Z[(u, v)] = m2.addVar(vtype=GRB.CONTINUOUS, name=f"Z({u}, {v})")
    for p in projects:
        S[(u, v, p)] = m2.addVar(vtype=GRB.CONTINUOUS, name=f"S({u}, {v}, {p})")
        m2.addConstr(S[(u, v, p)] == 2 - Y[(u, p)] - Y[(v, p)])

    m2.addConstr(Z[(u, v)] == gp.min_([S[(u, v, p)] for p in projects], constant = 1))
        
# Add constraints
print(f'Adding constraints')
# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [Y[(u, t)] for t in projects])
    m2.addConstr(expr == 1)
    
# Project capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [Y[(u, p)] for u in nodes])
    m2.addConstr(expr <= capacities[p])

print(f'Creating the objective ...')
# Linear (project preference) term
L1 = gp.LinExpr()
for u in nodes:
    for p in projects:
        if (u, p) in c:
            L1 += c[(u, p)] * Y[(u, p)]


# max-cut (conflicts) term
L2 = gp.LinExpr()
for e in edges:
    L2 += w[e] * Z[e]

times["relax_model12"] = time() - start
print(f'Done creating 1/2 pipage model')

Creating model for 1/2 pipage algorithm ...
Creating variables Y[(u, p)] ...
Creating auxiliary variables ...
Adding constraints
Creating the objective ...
Done creating 1/2 pipage model


### 3/4 pipage model

In [9]:
print(f'Creating 3/4 pipage model')
start = time()
m3 = gp.Model("linear", env=env)
m3.Params.timeLimit = timeLimit

N = dict()
for u in nodes:
    for p in projects:
        N[(u, p)] = m3.addVar(lb=0.0, ub=1.0, vtype=GRB.CONTINUOUS, name=f"N({u},{p})")
        
# Auxiliary variables
G = dict()
H = dict()
for e in edges:
    u = e[0]
    v = e[1]
    for p in projects:
        G[(u, v, p)] = m3.addVar(vtype=GRB.CONTINUOUS, name=f"G({u}, {v}, {p})")
        H[(u, v, p)] = m3.addVar(vtype=GRB.CONTINUOUS, name=f"H({u}, {v}, {p})")
        m3.addConstr(H[(u, v, p)] == N[(u, p)] + N[(v, p)])
        m3.addConstr(G[(u, v, p)] == gp.min_(H[(u, v, p)], constant = 1))

# Add constraints
print(f'Adding constraints')
# Each student assigned to exactly one project
for u in nodes:
    expr = gp.LinExpr(numOfProjects*[1], [N[(u, p)] for p in projects])
    m3.addConstr(expr == 1)
    
# Project capacity constraints
for p in projects:
    expr = gp.LinExpr(n*[1], [N[(u, p)] for u in nodes])
    m3.addConstr(expr <= capacities[p])
    
# Relaxed objective L(x)
W1 = gp.LinExpr()

# Linear (project preference) term
for u in nodes:
    for p in projects:
        if (u, p) in c:
            W1 += c[(u, p)] * N[(u, p)]

W2 = gp.LinExpr()
# Max-cut term
for e in edges:
    u = e[0]; v = e[1]
    for p in projects:
        W2 += w[(u, v)] * G[(u, v, p)]      
W2 -= sum(w.values())
times["relax_model34"] = time() - start
print(f'Done creating 3/4 pipage model.')

Creating 3/4 pipage model
Adding constraints
Done creating 3/4 pipage model.


### Run code

In [10]:
for l in [0, 0.5, 1, 3, 4, 5, 10]:
    lambda_mul = l
    lambda_ = lambda_mul * sum(w.values()) / n
    output_file = f'./diversity/results/DEI_lambda{lambda_mul}.pickle'
    print(f'Running experiments for lambda = {lambda_mul}')
    run_experiments()

Running experiments for lambda = 3
Running the quadratic algorithm for lambda = 3 ...
optimize_quad time 1.5273714065551758
Running the 3/4 pipage algorithm for lambda = 3
Running the 1/2 pipage algorithm for lambda = 3
optimize_relaxation12 time 0.8772315979003906
Saving results for lambda = 3
Running experiments for lambda = 4
Running the quadratic algorithm for lambda = 4 ...
optimize_quad time 1.4627385139465332
Running the 3/4 pipage algorithm for lambda = 4
Running the 1/2 pipage algorithm for lambda = 4
optimize_relaxation12 time 0.7272117137908936
Saving results for lambda = 4
Running experiments for lambda = 5
Running the quadratic algorithm for lambda = 5 ...
optimize_quad time 1.2314434051513672
Running the 3/4 pipage algorithm for lambda = 5
Running the 1/2 pipage algorithm for lambda = 5
optimize_relaxation12 time 0.6929490566253662
Saving results for lambda = 5
Running experiments for lambda = 10
Running the quadratic algorithm for lambda = 10 ...
optimize_quad time 1.243