In [8]:
#!pip3 install pulp
#!pip install csv

In [9]:
import pulp
import pandas as pd
import csv
import itertools

In [10]:
df = pd.read_csv("clean_data_actualitzat.csv")


distances = []

with open('distance_matrix.csv', 'r') as fitxer:
    lector = csv.reader(fitxer, delimiter=',')
    for fila in lector:
        # Si vols convertir els valors a números enters o flotants
        distances.append([float(element) for element in fila])

In [11]:
df_4 = df[(df['preferred_team_size'] == 4) | (df['preferred_team_size'] == 0)]
df_3 = df[df['preferred_team_size'] == 3]
df_2 = df[df['preferred_team_size'] == 2]
df_1 = df[df['preferred_team_size'] == 1]

In [12]:
n = len(df)
n4 = len(df_4)
n3 = len(df_3)
n2 = len(df_2)
n1 = len(df_1)

print(n)
print(n4)
print(n3)
print(n2)
print(n1)


persones = list(range(n4))

924
801
98
19
6


## CLUSTERITZACIÓ 4

In [13]:
mida_grup = 4
num_grups = n4 // mida_grup

while n4 % mida_grup != 0:
    random_row = df_3.sample(n=1)
    df2 = pd.concat([df_4, random_row], ignore_index=True)
    df1 = df_3.drop(random_row.index)

    n4 += 1
    n3 -= 1

num_grups = n4 // mida_grup


individuals = range(n4)
clusters = range(num_grups)


In [14]:
prob = pulp.LpProblem("ClusteringProblem", pulp.LpMinimize)

# Decision variables
X = pulp.LpVariable.dicts(
    "X", (individuals, clusters), cat="Binary"
)
Y = pulp.LpVariable.dicts(
    "Y", ([(i, j) for i in individuals for j in individuals if i < j], clusters), cat="Binary"
)

# Constraints
# Each individual is assigned to exactly one cluster
for i in individuals:
    prob += pulp.lpSum(X[i][k] for k in clusters) == 1, f"Assignment_{i}"

# Each cluster has exactly 'cluster_size' individuals
for k in clusters:
    prob += pulp.lpSum(X[i][k] for i in individuals) == mida_grup, f"ClusterSize_{k}"

# Linearization constraints for Y_{ijk} = X_{ik} * X_{jk}
for (i, j) in itertools.combinations(individuals, 2):
    for k in clusters:
        prob += Y[(i, j)][k] <= X[i][k], f"YleX_{i}_{j}_{k}_1"
        prob += Y[(i, j)][k] <= X[j][k], f"YleX_{i}_{j}_{k}_2"
        prob += Y[(i, j)][k] >= X[i][k] + X[j][k] - 1, f"YgeX_{i}_{j}_{k}"

# Objective function
prob += pulp.lpSum(
    distances[i][j] * Y[(i, j)][k]
    for (i, j) in itertools.combinations(individuals, 2)
    for k in clusters
), "TotalSquaredDistance"


solver = pulp.CPLEX_CMD(options=['miplimits/iterlim=10000'], msg=True) 

prob.solve(solver)

: 

Problema d'optimització

In [None]:
x[0,0]

x_(0,_0)

In [None]:
optim = pulp.LpProblem("Clustering_de_Persones", pulp.LpMinimize)

# Funció objectiu
optim += pulp.lpSum()

TypeError: Non-constant expressions cannot be multiplied

## Restriccions

Restricció 1: Cada persona ha d'estar en un únic grup

In [None]:
for i in df_4:
    optim += pulp.lpSum(x[i, j] for j in range(num_grups)) == 1

Restricció 2: Cada grup ha de tenir exactament 4 persones (o menys si hi ha persones sobrants)


In [None]:
for j in range(num_grups):
    optim += pulp.lpSum(x[i, j] for i in df_4) <= mida_grup
    optim += pulp.lpSum(x[i, j] for i in df_4) >= mida_grup


In [None]:
optim.solve()

In [None]:
from collections import defaultdict

grups = defaultdict(list)
for i in df_4:
    for j in range(num_grups):
        if pulp.value(x[i, j]) == 1:
            grups[j].append(i)

# Imprimir els grups
for j in sorted(grups):
    print(f"Grup {j+1}: {grups[j]}")

In [None]:
print("Status:", pulp.LpStatus[optim.status])
print("Cost total:", pulp.value(optim.objective))