In [None]:
import numpy as np
import random
import gurobipy as gp
import time

Variables to be used to generate nodes and cluster

In [None]:
np.random.seed(42)

# Number of nodes
n = 200
# Number of clusters
p = 2

capacity_mean = 10
capacity_stddev = 2
weight_mean = 1
weight_stddev = 0.1
lambda_param = 0.5

Generate Random Instances using capacity mean and standard deviation

In [None]:
def generate_instances(n, p, capacity_mean, capacity_stddev):
    # Generate 2D positions for n nodes
    nodes = np.random.rand(n, 2)

    # Generate capacities for clusters
    capacities = np.random.normal(capacity_mean, capacity_stddev, p)

    return nodes, capacities

Generate Random Weights using weight mean and standard deviation

In [None]:
def generate_weights(n, weight_mean, weight_stddev):
    # Generate weights for n nodes
    weights = np.random.normal(weight_mean, weight_stddev, n)

    return weights

 Euclidean distance between two points p and q

In [None]:
def euclidean_distance(p, q):
    return np.sqrt(np.sum((p-q)**2))

In [None]:
def solve_ccp(nodes, capacities, weights, lambda_param):
    start = time.time()
    n = nodes.shape[0]
    p = len(capacities)

    # Create gurobi model
    model = gp.Model('ccp')

    # Create decision variables
    x = {}
    y = {}

    # Update decision variables as mentioned in the problem statement
    for i in range(n):
        for j in range(p):
            x[i, j] = model.addVar(vtype=gp.GRB.BINARY, name=f'x[{i},{j}]')
        y[i] = model.addVar(vtype=gp.GRB.BINARY, name=f'y[{i}]')

    # Set objective function
    obj = gp.quicksum(euclidean_distance(nodes[i], nodes[j]) * x[i,j] for i in range(n) for j in range(p))
    obj += lambda_param * gp.quicksum(capacities[j]*y[j] for j in range(p))
    obj -= lambda_param * gp.quicksum(weights[i]*x[i,j] for i in range(n) for j in range(p))
    model.setObjective(obj, gp.GRB.MINIMIZE)

    # Add constraints
    for i in range(n):
        model.addConstr(gp.quicksum(x[i,j] for j in range(p)) == 1, name=f'assign[{i}]')

    model.addConstr(gp.quicksum(y[j] for j in range(p)) <= p, name='num_clusters')

    for i in range(n):
        for j in range(p):
            model.addConstr(x[i,j] <= y[j], name=f'x_c[{i},{j}]')

    # Solve model
    model.optimize()
    # Extract solution
    clusters = []
    for j in range(p):
        cluster = [i for i in range(n) if x[i,j].X > 0.5]
        clusters.append(cluster)
    end = time.time()
    return clusters, end-start

In [None]:
def jaccard_similarity(clusters_1, clusters_2):
    jaccard_scores = []
    for cluster_1 in clusters_1:
        jaccard_scores_per_cluster = []
        for cluster_2 in clusters_2:
            union = len(set(cluster_1).union(cluster_2))
            intersection = len(set(cluster_1).intersection(cluster_2))
            jaccard_index = intersection / union
            jaccard_scores_per_cluster.append(jaccard_index)
        jaccard_scores.append(max(jaccard_scores_per_cluster))
    return np.mean(jaccard_scores)

In [None]:
def rand_index(clusters_1, clusters_2):
    tp = 0
    tn = 0
    fp = 0
    fn = 0

    n = len(clusters_1)
    for i in range(n):
        for j in range(i+1, n):
            if (clusters_1[i] == clusters_1[j] and clusters_2[i] == clusters_2[j]):
                tp += 1
            elif (clusters_1[i] != clusters_1[j] and clusters_2[i] != clusters_2[j]):
                tn += 1
            elif (clusters_1[i] == clusters_1[j] and clusters_2[i] != clusters_2[j]):
                fp += 1
            else:
                fn += 1

    return (tp + tn) / (tp + tn + fp + fn)


In [None]:
def insample_stability(clusters_reference,nodes, capacities, weights, lambda_param, num_runs=100):
    jaccard_scores = []
    for i in range(num_runs):
        clusters, _ = solve_ccp(nodes, capacities, weights, lambda_param)
        jaccard_scores.append(jaccard_similarity(clusters, clusters_reference))
    return np.mean(jaccard_scores)


In [None]:
def split_data(nodes, weights, train_ratio=0.7):
    n = nodes.shape[0]
    indices = np.arange(n)
    np.random.shuffle(indices)
    train_indices = indices[:int(n * train_ratio)]
    val_indices = indices[int(n * train_ratio):]
    return nodes[train_indices], weights[train_indices], nodes[val_indices], weights[val_indices]


In [None]:
def out_sample_stability(num_runs=10):
    rand_index_scores = []
    
    for i in range(num_runs):

        nodes, capacities = generate_instances(n, p, capacity_mean, capacity_stddev)
        weights = generate_weights(n, weight_mean, weight_stddev)
        
        nodes_train, weights_train, nodes_val, weights_val = split_data(nodes, weights)
        
        clusters_train,_ = solve_ccp(nodes_train, capacities, weights_train, lambda_param)
        clusters_val,_ = solve_ccp(nodes_val, capacities, weights_val, lambda_param)
        rand_index_scores.append(rand_index(clusters_train, clusters_val))
    return np.mean(rand_index_scores)

In [None]:
def greedy_cluster_nodes(nodes, capacities, weights):
    start = time.time()
    n = nodes.shape[0]
    p = len(capacities)

    # Create a list to store the clusters
    clusters = [[] for _ in range(p)]

    # Create a list to store the remaining capacities of each cluster
    remaining_capacities = capacities.copy()

    # Sort the nodes by their weights in descending order
    node_weights = [(node, weight) for node, weight in enumerate(weights)]
    node_weights.sort(key=lambda x: x[1], reverse=True)

    # Greedily assign each node to a cluster with enough capacity
    for node, weight in node_weights:
        for j in range(p):
            if remaining_capacities[j] >= weight:
                clusters[j].append(node)
                remaining_capacities[j] -= weight
                break
    end = time.time()
    # Return the list of clusters, each represented by a list of node indices
    return clusters, end - start


In [None]:
# Generate 2D nodes and random capacities for clusters
nodes, capacities = generate_instances(n, p, capacity_mean, capacity_stddev)
# Generate random weight for each node
weights = generate_weights(n, weight_mean, weight_stddev)

clusters, time1 = solve_ccp(nodes, capacities, weights, lambda_param)

greedy_clusters, time2 = greedy_cluster_nodes(nodes, capacities, weights)


In [None]:
# in_sample_stability_score = insample_stability(clusters,nodes, capacities, weights, lambda_param, num_runs=100)
# print("Insample Stability:",in_sample_stability_score)

In [None]:

# out_sample_stability_score = out_sample_stability(num_runs=100)
# print("Outsample Stability:",out_sample_stability_score)

In [None]:
greedy_clusters

In [None]:
clusters