In [1]:
import cvxpy as cp
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from collections import deque
from math import isclose
import random
import pickle
from time import time

## Read input

In [2]:
input_file = './diversity/DEI.pickle'
pipageSetting = "randomized"

times = dict() # dictionary to keep time for different stages
start = time()
# Read graph
with open(input_file, 'rb') as file:
    projects = pickle.load(file)
    capacities = pickle.load(file)
    numOfProjects = len(capacities)
    c = pickle.load(file)
    w = pickle.load(file)
    edges = pickle.load(file)
    nodes = pickle.load(file)
    assignment = pickle.load(file)

G = nx.Graph()
G.add_edges_from(edges)
n = len(nodes)

print(f'number of nodes: {n}')
print(f'number of (conflict) edges: {len(edges)}')
print(f'number of projects: {numOfProjects}')
print(f'projects {projects}')

number of nodes: 4000
number of (conflict) edges: 10166
number of projects: 4
projects ['PR', 'IT', 'Sales', 'HR']


In [3]:
capacities

{'PR': 1000, 'IT': 1000, 'Sales': 1000, 'HR': 1000}

In [4]:
len(c)

16000

In [5]:
lambda_mul = 1
lambda_ = lambda_mul * sum(w.values()) / n

## Helper functions

In [41]:
# Define objective function
def f(x):
    res = 0
    # project preference term
    for u in nodes:
        for p in projects:
            if (u, p) in c and (u, p) in x:
                res += c[(u, p)] * x[(u, p)]
    res *= lambda_
    
    # conflict term
    for e in edges:
        u, v = e
        inner_sum = 0
        for p in projects:
            if (u, p) in x and (v, p) in x:
                inner_sum += x[(u, p)] * x[(v, p)]
        res += w[(u, v)] * (1 - inner_sum)
    return res

# Pipage rounding
def construct_graph(x):
    H = nx.Graph()
    H.add_nodes_from(nodes, bipartite=0)
    H.add_nodes_from(projects, bipartite=1)
    edges = [(u, p) for u in nodes for p in projects if (u, p) in x and not isclose(x[(u, p)], 0) and not isclose(x[(u, p)], 1)]
    H.add_edges_from(edges)
    return H

def find_cycle(H):
    try:
        cycle = nx.find_cycle(H)
    except Exception:
        cycle = []
    return cycle

def find_path(graph):
    def dfs_with_backtracking(vertex, path):
        nonlocal max_path
        path.append(vertex)

        for neighbor in graph[vertex]:
            if neighbor not in path:
                dfs_with_backtracking(neighbor, path)

        if len(path) > len(max_path):
            max_path = path.copy()

        path.pop()

    max_path = []
    for start_vertex in graph.nodes:
        dfs_with_backtracking(start_vertex, [])
        if max_path:
            return list(zip(max_path, max_path[1:]))
    
    print("No path found...")
    return max_path

def format_path(R):
    R_new = deque()
    for e in R:
        u, p = e if e[0] in nodes else tuple(reversed(e)) # (student, project) edge
        R_new.append((u, p))
    return list(R_new)

def calc_eps(x, R):
    # Divide R into M1 and M2 matchings
    M1 = R[::2]
    M2 = R[1::2]
    
    # Calculate eps1, eps2
    eps1 = min(min([x[e] for e in M1]), min([1 - x[e] for e in M2]))
    eps2 = min(min([1 - x[e] for e in M1]), min([x[e] for e in M2]))
    
    return eps1, eps2, M1, M2

def remove_dec_error(x):            
    #only keep the keys corresponding to value 1
    x_new = dict()
    for e in x:
        if isclose(x[e], 1):
            x_new[e] = 1
    return x_new

def step(x, eps, M1, M2):
    x_new = x.copy()
    for e in M1:
        x_new[e] += eps
    for e in M2:
        x_new[e] -= eps
    return x_new

def round(x, eps1, eps2, M1, M2):
    x1 = step(x, -eps1, M1, M2)
    x2 = step(x, eps2, M1, M2)
    if f(x1) > f(x2):
        return x1
    return x2

def rand_round(x, eps1, eps2, M1, M2):
    rand = rand = random.uniform(0, 1)
    if rand < eps1 / (eps1 + eps2):
        x_new = step(x, -eps1, M1, M2)
    else:
        x_new = step(x, eps2, M1, M2)
    return x_new
    
def clean_edges(x, H):
    integral_edges = [e for e in H.edges if isclose(x[e], 0) or isclose(x[e], 1)]
    H.remove_edges_from(integral_edges)
    return H
        
def pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = round(x, eps1, eps2, M1, M2)
    return x_new

def rand_pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = rand_round(x, eps1, eps2, M1, M2)
    return x_new

def pipage(x, setting="deterministic"):
    H = construct_graph(x)    
    numOfEdges = len(H.edges)
    iterations = 1
    while True:
        R = find_cycle(H)
        R = R if R else find_path(H)
        if R:
            x = pipage_help(x, R) if setting == 'deterministic' else rand_pipage_help(x, R)
            clean_edges(x, H)
        else:
            return remove_dec_error(x)
        
        iterations += 1
        if iterations > numOfEdges + 1:
            print("exceeded numOfEdges iterations... Error")
            break
    return "error"

## 1/2 approximation algorithm using Convex Optimization

In [18]:
print(f'Creating model for 1/2 pipage algorithm ...')
start = time()

print(f'Creating variables X[(u, p)] ...')
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = cp.Variable()
        
# Add constraints
print(f'Adding constraints')
constr = list()

# Each variable between 0 and 1
for u in nodes:
    for p in projects:
        constr.append(X[(u, p)] >= 0)
        constr.append(X[(u, p)] <= 1)
        
# Each student assigned to exactly one project
for u in nodes:
    constr.append(sum([X[(u, p)] for p in projects]) == 1)
    
# Project capacity constraints
for p in projects:
    constr.append(sum([X[(u, p)] for u in nodes]) <= capacities[p])

print(f'Creating the objective ...')
# Linear (project preference) term
L1 = sum(c[(u, p)] * X[(u, p)] for u in nodes for p in projects)

# max-cut (conflicts) term
L2 = 0
for e in edges:
    u, v = e
    L3 = cp.min(cp.vstack([2 - X[(u, p)] - X[(v, p)] for p in projects]))
    L2 += w[e] * cp.minimum(1, L3)
    
obj = lambda_ * L1 + L2
times["relax_model12"] = time() - start
print(f'Done creating 1/2 pipage model')

Creating model for 1/2 pipage algorithm ...
Creating variables X[(u, p)] ...
Adding constraints
Creating the objective ...
Done creating 1/2 pipage model


In [20]:
pipage_model = cp.Problem(cp.Maximize(obj), constr)
pipage_model.solve(verbose=True)

                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Sep 22 02:30:42 PM: Your problem has 16000 variables, 36004 constraints, and 0 parameters.
(CVXPY) Sep 22 02:30:42 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Sep 22 02:30:42 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Sep 22 02:30:42 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Sep 22 02:30:42 PM: Using cached ASA map, for faster compilation (bypassing reduction chain).
(CVXPY) Sep 22 02:30:42 PM: Finished problem compilatio

18413.801647266508

In [37]:
if pipage_model.status == cp.OPTIMAL:
    optimal_solution = pipage_model.value
    x_frac = {key: var.value for key, var in X.items()}
else:
    print("Optimization problem is infeasible or failed to converge.")

In [49]:
x_pipage = pipage(x_frac, 'deterministic')

In [50]:
obj_opt = 17484.095541401275
obj_pipage = f(x_pipage)
approx_pipage = obj_pipage / obj_opt
print(f'Approximation ratio of 1/2-pipage algorithm = {approx_pipage}')

Approximation ratio of 1/2-pipage algorithm = 0.864972109320099


## 3/4 approximation algorithm using Convex Optimization

In [53]:
print(f'Creating model for 3/4 approximation algorithm ...')
start = time()

print(f'Creating variables X[(u, p)] ...')
X = dict()
for u in nodes:
    for p in projects:
        X[(u, p)] = cp.Variable()
        
# Add constraints
print(f'Adding constraints')
constr = list()

# Each variable between 0 and 1
for u in nodes:
    for p in projects:
        constr.append(X[(u, p)] >= 0)
        constr.append(X[(u, p)] <= 1)
        
# Each student assigned to exactly one project
for u in nodes:
    constr.append(sum([X[(u, p)] for p in projects]) == 1)
    
# Project capacity constraints
for p in projects:
    constr.append(sum([X[(u, p)] for u in nodes]) <= capacities[p])

print(f'Creating the objective ...')
# Linear (project preference) term
L1 = sum(c[(u, p)] * X[(u, p)] for u in nodes for p in projects)

# max-cut (conflicts) term
L2 = 0
for e in edges:
    u, v = e
    L3 = sum([cp.minimum(1, X[(u, p)] + X[(v, p)]) for p in projects])
    L2 += w[e] * L3
    
obj = lambda_ * L1 + L2
print(f'Done creating 3/4 pipage model.')

Creating model for 3/4 approximation algorithm ...
Creating variables X[(u, p)] ...
Adding constraints
Creating the objective ...
Done creating 3/4 pipage model.


In [54]:
randpipage_model = cp.Problem(cp.Maximize(obj), constr)
randpipage_model.solve(verbose=True)

                                     CVXPY                                     
                                     v1.3.2                                    
(CVXPY) Sep 22 04:02:41 PM: Your problem has 16000 variables, 36004 constraints, and 0 parameters.
(CVXPY) Sep 22 04:02:43 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Sep 22 04:02:43 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Sep 22 04:02:43 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Sep 22 04:02:45 PM: Compiling problem (target solver=ECOS).
(CVXPY) Sep 22 04:02:45 PM: Reduction chain: FlipObjective -> Dcp2Cone -> CvxAttr2Constr 

28579.80164733043

In [56]:
if pipage_model.status == cp.OPTIMAL:
    optimal_solution = randpipage_model.value
    x_frac34 = {key: var.value for key, var in X.items()}
else:
    print("Optimization problem is infeasible or failed to converge.")

In [58]:
x_round34 = pipage(x_frac34, pipageSetting)
obj_round34 = f(x_round34)
approx_round34 = obj_round34 / obj_opt
print(f'Approximation ratio of 1/2-pipage algorithm = {approx_round34}')

Approximation ratio of 1/2-pipage algorithm = 0.8590163823136076
