In [4]:
import cvxpy as cp
import gurobipy as gp
from gurobipy import GRB
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from collections import deque
import random
import pickle
from time import time
from dsd import *

In [9]:
input_file = './yahoo.pickle'
pipageSetting = "randomized"

env = gp.Env(empty=True)
# env.setParam("Threads", 16)
# env.setParam("OutputFlag",0) # suppress gurobi console output
env.start()
timeLimit = 1000

times = dict() # dictionary to keep time for different stages
start = time()
# Read graph
with open(input_file, 'rb') as file:
    projects = pickle.load(file)
    capacities = pickle.load(file)
    numOfProjects = len(capacities)
    c = pickle.load(file)
    w = pickle.load(file)
    edges = pickle.load(file)

G = nx.Graph()
G.add_edges_from(edges)
nodes = list(G.nodes)
n = len(nodes)
max_capacity = max(capacities.values())

# print(f'projects = {projects}')
print(f'number of nodes: {n}')
print(f'number of (conflict) edges: {len(edges)}')
print(f'number of projects: {numOfProjects}')
times["read_input"] = time() - start
G_c = nx.complement(G) # friends graph
# nx.draw(G_c, pos=nx.spring_layout(G_c))

Set parameter GURO_PAR_SPECIAL
Set parameter TokenServer to value "sccsvc"
number of nodes: 8438
number of (conflict) edges: 35598
number of projects: 4772


In [10]:
# helper function
def copy_solution(X):
    x = dict()
    for u in new_nodes:
        for p in projects:
            x[(u, p)] = X[(u, p)].X
    return x

## Compaction

In [14]:
start = time()
print('Removing isolated nodes ...')
# remove isolated nodes
H = nx.complement(G) # copy of friends graph
superNodeSize = dict()
isolated_nodes = [node for node in H.nodes() if H.degree(node) == 0]
new_nodes = isolated_nodes
for node in new_nodes:
    superNodeSize[node] = 1
H.remove_nodes_from(isolated_nodes)

print('Finding connected components ...')
# find connected components
connected_comps = []
for comp in list(nx.connected_components(H)):
    if len(comp) <= 5: # compacting small components
        H.remove_nodes_from(list(comp))
        super_node = ','.join(list(comp))
        superNodeSize[super_node] = len(list(comp))
        new_nodes.append(super_node)
    else:
        connected_comps.append(H.subgraph(list(comp)))
        
print('Finding dense subgraphs ...')
# Find dense subgraphs in each connected component
for cc in connected_comps:
    cc_tmp = H.subgraph(cc.nodes)
#     cc_tmp.add_nodes_from(cc.nodes)
#     cc_tmp.add_edges_from(cc.edges)
    while cc_tmp.nodes:  
        print('start')
        greedy_R = greedy_charikar(cc_tmp)
        print('test', len(cc_tmp.nodes))
#         greedy_r = exact_densest(cc_tmp)
        super_node = ",".join(greedy_R[0])
        superNodeSize[super_node] = len(greedy_R[0])
        new_nodes.append(super_node)
        cc_tmp.remove_nodes_from(greedy_R[0])
        
print('Setting up the compact problem ...')
Gc_compact = nx.Graph()
Gc_compact.add_nodes_from(new_nodes)
G_compact = nx.complement(Gc_compact)
w_compact = dict()
for e in G_compact.edges():
    u, v = e
    w_compact[e] = superNodeSize[u] * superNodeSize[v]
    
c_compact = dict()
for superNode in new_nodes:
    for p in projects:
        c_compact[(superNode, p)] = 0

for superNode in new_nodes:
    simple_nodes = superNode.split(',')
    for u in simple_nodes:
        for p in projects:
            if (u, p) in c:
                c_compact[(superNode, p)] += c[(u, p)]
times['compaction'] = time() - start

Removing isolated nodes ...
Finding connected components ...
Finding dense subgraphs ...
start


KeyboardInterrupt: 

In [None]:
print(f'# of (compact) conflict edges = {len(G_compact.edges)}')
print(f'# of (compact) friend edges = {len(Gc_compact.edges)}')
print(f'# of (compact) nodes = {len(G_compact.nodes)}')

In [None]:
superNodeSize.values()

## Baselines

In [None]:
# Helper function
def teams_to_x(teams):
    x = dict()
    for t in teams:
        team = teams[t]
        for u in team:
            x[(u, t)] = 1
    return x

def baselines():
    # Random
    teams_random = {t: [] for t in random.sample(projects, len(projects))}
    for u in random.sample(nodes, len(nodes)):
        for t in teams_random:
            if len(teams_random[t]) < capacities[t]:
                teams_random[t].append(u)
    x_random = teams_to_x(teams_random)
    
    # Greedy
    teams_greedy = {t: [] for t in projects}
    for u in nodes:
        t_max = -1
        increase = -2**60
        for t in teams_greedy:
            x_greedy = teams_to_x(teams_greedy)
            f_greedy = f(x_greedy)
            x_tmp = teams_to_x(teams_greedy)
            if len(teams_greedy[t]) < capacities[t]:
                x_tmp[(u, t)] = 1
                inc = f(x_tmp) - f_greedy
                if inc > increase:
                    increase = inc
                    t_max = t
        teams_greedy[t_max].append(u)
    x_greedy = teams_to_x(teams_greedy)
    
    return x_random, x_greedy

## Helper functions

In [None]:
# Define objective function
def f(x):
    res = 0
    # project preference term
    for u in nodes:
        for p in projects:
            if (u, p) in c and (u, p) in x:
                res += c[(u, p)] * x[(u, p)]
    res *= lambda_
    
    # conflict term
    for e in edges:
        u, v = e
        inner_sum = 0
        for p in projects:
            if (u, p) in x and (v, p) in x:
                inner_sum += x[(u, p)] * x[(v, p)]
        res += (1 - inner_sum)
    return res

# Pipage rounding
def construct_graph(x):
    H = nx.Graph()
    H.add_nodes_from(nodes, bipartite=0)
    H.add_nodes_from(projects, bipartite=1)
    edges = [(u, p) for u in nodes for p in projects if (u, p) in x and not np.isclose(x[(u, p)], 0) and not np.isclose(x[(u, p)], 1)]
    H.add_edges_from(edges)
    return H

def find_cycle(H):
    try:
        cycle = nx.find_cycle(H)
    except Exception:
        cycle = []
    return cycle

def find_path(graph):
    def dfs_with_backtracking(vertex, path):
        nonlocal max_path
        path.append(vertex)

        for neighbor in graph[vertex]:
            if neighbor not in path:
                dfs_with_backtracking(neighbor, path)

        if len(path) > len(max_path):
            max_path = path.copy()

        path.pop()

    max_path = []
    for start_vertex in graph.nodes:
        dfs_with_backtracking(start_vertex, [])
        if max_path:
            return list(zip(max_path, max_path[1:]))
    
    print("No path found...")
    return max_path

def format_path(R):
    R_new = deque()
    for e in R:
        u, p = e if e[0] in nodes else tuple(reversed(e)) # (student, project) edge
        R_new.append((u, p))
    return list(R_new)

def calc_eps(x, R):
    # Divide R into M1 and M2 matchings
    M1 = R[::2]
    M2 = R[1::2]
    
    # Calculate eps1, eps2
    eps1 = min(min([x[e] for e in M1]), min([1 - x[e] for e in M2]))
    eps2 = min(min([1 - x[e] for e in M1]), min([x[e] for e in M2]))
    
    return eps1, eps2, M1, M2

def remove_dec_error(x):            
    #only keep the keys corresponding to value 1
    x_new = dict()
    for e in x:
        if np.isclose(x[e], 1):
            x_new[e] = 1
    return x_new

def step(x, eps, M1, M2):
    x_new = x.copy()
    for e in M1:
        x_new[e] += eps
    for e in M2:
        x_new[e] -= eps
    return x_new

def round(x, eps1, eps2, M1, M2):
    x1 = step(x, -eps1, M1, M2)
    x2 = step(x, eps2, M1, M2)
    if f(x1) > f(x2):
        return x1
    return x2

def rand_round(x, eps1, eps2, M1, M2):
    rand = rand = random.uniform(0, 1)
    if rand < eps1 / (eps1 + eps2):
        x_new = step(x, -eps1, M1, M2)
    else:
        x_new = step(x, eps2, M1, M2)
    return x_new
    
def clean_edges(x, H):
    integral_edges = [e for e in H.edges if np.isclose(x[e], 0) or np.isclose(x[e], 1)]
    H.remove_edges_from(integral_edges)
    return H
        
def pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = round(x, eps1, eps2, M1, M2)
    return x_new

def rand_pipage_help(x, R):
    R = format_path(R)
    eps1, eps2, M1, M2 = calc_eps(x, R)
    x_new = rand_round(x, eps1, eps2, M1, M2)
    return x_new

def pipage(x, setting="deterministic"):
    H = construct_graph(x)    
    while True:
        R = find_cycle(H)
        R = R if R else find_path(H)
        if len(R) == 1:
            print(f'R = {R}')
            print(f'x = {x}')
        if R and len(R) > 1:
            x = pipage_help(x, R) if setting == 'deterministic' else rand_pipage_help(x, R)
            clean_edges(x, H)
        else:
            return remove_dec_error(x)
        
    print("Error: Pipage rounding failed ...")


## Solve compacted problem

In [None]:
def round34():
    print(f'Running round34 ...')

    start = time()

    # Create a new model
    m = gp.Model("linear") #, env=env)
    m.Params.timeLimit = 300 #timeLimit

    print('Creating variables ...')
    # Create variables
    X = dict()
    for u in new_nodes:
        for p in projects:
            X[(u, p)] = m.addVar(vtype=GRB.CONTINUOUS)

    print('Creating auxiliary variables ...')
    # Auxiliary variables
    Z = dict()
    S = dict()
    print(f'# of G_compact edges = {len(G_compact.edges())}')
    print(f'# of nodes of G_compact = {len(G_compact.nodes())}')
    for e in G_compact.edges():
        u = e[0]
        v = e[1]
        for t in projects:
            Z[(u, v, t)] = m.addVar(vtype=GRB.CONTINUOUS)
            S[(u, v, t)] = m.addVar(vtype=GRB.CONTINUOUS)
            m.addConstr(S[(u, v, t)] == X[(u, t)] + X[(v, t)])
            m.addConstr(Z[(u, v, t)] == gp.min_(S[(u, v, t)], constant = 1))

    print('Adding constraints ...')   
    # Add constraints
    # Each student assigned to exactly one project
    for u in new_nodes:
        expr = gp.LinExpr(numOfProjects*[1], [X[(u, t)] for t in projects])
        m.addConstr(expr == 1)

    # Project capacity constraints
    for p in projects:
        expr = gp.LinExpr([superNodeSize[u] for u in new_nodes], [X[(u, p)] for u in new_nodes])
        m.addConstr(expr <= capacities[p])

    # Relaxed objective L(x)
    L = gp.LinExpr()

    # Linear (project preference) term
    for u in new_nodes:
        for p in projects:
            if (u, p) in c_compact:
                L += c_compact[(u, p)] * X[(u, p)]
    L *= lambda_

    # Max-cut term
    for e in G_compact.edges():
        u = e[0]; v = e[1]
        for p in projects:
            L += w_compact[(u, v)] * Z[(u, v, p)] 
            
    L -= sum(w_compact.values())

    m.setObjective(L, GRB.MAXIMIZE)    
    times["relax_model34"] = time() - start

    print(f'Optimizing ...')
    start = time()
    # Optimize model
    m.optimize()

    # Convert solution to dictionary format
    x_frac34 = copy_solution(X)
    times["optimize_relaxation34"] = time() - start

    return x_frac34

In [None]:
lambda_mul = 1
lambda_ = lambda_mul * sum(w_compact.values()) / n
x_frac34 = round34()
x_frac12 = round12()

## Unroll solution

In [None]:
def unroll(x_compact):
    x_unroll = dict()
    for e in x_compact:
        superNode, p = e
        simple_nodes = superNode.split(',')
        for u in simple_nodes:
            if x_compact[e] != 0:
                x_unroll[(u, p)] = x_compact[e]
    return x_unroll

In [None]:
x_unroll34 = unroll(x_frac34)
x_unroll12 = unroll(x_frac12)

In [None]:
def construct_teams(x):
    teams = dict()
    for p in projects:
        teams[p] = []

    for e in x:
        u, p = e
        if x[e] == 1:
            u, p = e
            teams[p].append(u)
            
    for p in projects:
        teams[p].sort()
        
    return teams

## Round solution

In [None]:
print('Rounding 3/4 solution ...')
start = time()
x_round34 = pipage(x_unroll34, 'randomized')
times['rounding_34'] = time() - start
# print('Rounding 1/2 solution ...')
# start = time()
# x_round12 = pipage(x_unroll12, 'deterministic')
# times['rounding_12'] = time() - start
print('Constructing teams ...')
teams34 = construct_teams(x_round34)
# teams12 = construct_teams(x_round12)

In [None]:
val_34 = f(x_round34)
times
# val_12 = f(x_round12)

In [None]:
x_random, x_greedy = baselines()

In [None]:
val_random, val_greedy = f(x_random), f(x_greedy)