Esempio ottimizzazione a numeri interi per minimizzare il numero di gruppi da creare 

In [1]:
import pulp
import json

# 1.0 Data Retrieval

Recupero dati generati da Lu tramite API

In [2]:
res = {"query": "services.http.response.html_title: \"X Acceleration Codec\" or services.http.response.body: \"X Acceleration Codec\"","field": "location.country","total": 2194,"duration": 1099,"total_omitted": 0,"potential_deviation": 0,"buckets": [{"key": "Netherlands","count": 454},{"key": "Spain","count": 392},{"key": "Germany","count": 267},{"key": "France","count": 246},{"key": "United States","count": 139},{"key": "Brazil","count": 108},{"key": "United Kingdom","count": 59},{"key": "Finland","count": 55},{"key": "Bulgaria","count": 41},{"key": "Italy","count": 39},{"key": "Romania","count": 32},{"key": "Ukraine","count": 32},{"key": "Canada","count": 25},{"key": "Serbia","count": 23},{"key": "Bosnia and Herzegovina","count": 21},{"key": "Thailand","count": 19},{"key": "India","count": 18},{"key": "Turkey","count": 16},{"key": "Singapore","count": 15},{"key": "Colombia","count": 14},{"key": "Pakistan","count": 14},{"key": "Poland","count": 14},{"key": "Lithuania","count": 12},{"key": "Israel","count": 10},{"key": "Portugal","count": 10},{"key": "Iraq","count": 9},{"key": "Russia","count": 8},{"key": "Croatia","count": 7},{"key": "Sweden","count": 7},{"key": "Austria","count": 6},{"key": "Switzerland","count": 6},{"key": "Indonesia","count": 5},{"key": "Belgium","count": 4},{"key": "Costa Rica","count": 4},{"key": "Palestinian Territory","count": 4},{"key": "Albania","count": 3},{"key": "Australia","count": 3},{"key": "Czech Republic","count": 3},{"key": "Estonia","count": 3},{"key": "Macedonia","count": 3},{"key": "Peru","count": 3},{"key": "South Africa","count": 3},{"key": "Tunisia","count": 3},{"key": "Argentina","count": 2},{"key": "Cyprus","count": 2},{"key": "Dominican Republic","count": 2},{"key": "Hungary","count": 2},{"key": "Iceland","count": 2},{"key": "Ireland","count": 2},{"key": "Paraguay","count": 2},{"key": "United Arab Emirates","count": 2},{"key": "Armenia","count": 1},{"key": "Bolivia","count": 1},{"key": "Chile","count": 1},{"key": "Denmark","count": 1},{"key": "Ecuador","count": 1},{"key": "Egypt","count": 1},{"key": "El Salvador","count": 1},{"key": "French Polynesia","count": 1},{"key": "Honduras","count": 1},{"key": "Japan","count": 1},{"key": "Jordan","count": 1},{"key": "Malta","count": 1},{"key": "Montenegro","count": 1},{"key": "Morocco","count": 1},{"key": "Myanmar","count": 1},{"key": "Nepal","count": 1},{"key": "Norway","count": 1},{"key": "Slovakia","count": 1},{"key": "Vietnam","count": 1}]}

In [3]:
data = res['buckets']

In [4]:
len(data)

70

In [5]:
data[0:4]

[{'key': 'Netherlands', 'count': 454},
 {'key': 'Spain', 'count': 392},
 {'key': 'Germany', 'count': 267},
 {'key': 'France', 'count': 246}]

# 2.0 Definizione problema

In [6]:
# Capacità massima per gruppo
capacity = 1000

# Prepara i dati
countries = [item['key'] for item in data]
counts = [item['count'] for item in data]
num_items = len(countries)

# Stima superiore del numero di gruppi (ogni paese in un gruppo separato)
num_bins = num_items

In [7]:
# Creazione del problema
prob = pulp.LpProblem("Ottimizzare chiamate API a Censys", pulp.LpMinimize)

# Variabili di decisione
x = pulp.LpVariable.dicts('x', ((i,j) for i in range(num_items) for j in range(num_bins)), cat='Binary')
y = pulp.LpVariable.dicts('y', (j for j in range(num_bins)), cat='Binary')

# Funzione obiettivo: minimizzare il numero di gruppi utilizzati
prob += pulp.lpSum([y[j] for j in range(num_bins)])

# Vincolo: ogni paese deve essere assegnato esattamente a un gruppo
for i in range(num_items):
    prob += pulp.lpSum([x[(i,j)] for j in range(num_bins)]) == 1

# Vincolo: la somma dei conteggi in ogni gruppo non deve superare la capacità
for j in range(num_bins):
    prob += pulp.lpSum([counts[i] * x[(i,j)] for i in range(num_items)]) <= capacity * y[j]

# Risoluzione del problema
prob.solve()



Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/conda/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/988c1c2c7b354ee9820413fe39c92abc-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/988c1c2c7b354ee9820413fe39c92abc-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 145 COLUMNS
At line 20026 RHS
At line 20167 BOUNDS
At line 25138 ENDATA
Problem MODEL has 140 rows, 4970 columns and 9870 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 2.194 - 0.01 seconds
Cgl0004I processed model has 140 rows, 4970 columns (4970 integer (4970 of which binary)) and 9870 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0045I 70 integer variables out of 4970 objects (4970 integer) have cost of 1 - high priority
Cbc0045I branch on satisfied N create fake objective Y random cost Y
Cbc0038I Initial state - 5 

1

In [8]:
# Stato della soluzione
print("Status:", pulp.LpStatus[prob.status])

# Raggruppamento dei paesi per gruppo
bins = {}
for j in range(num_bins):
    if y[j].varValue > 0.5:
        bins[j] = []
        for i in range(num_items):
            if x[(i,j)].varValue > 0.5:
                bins[j].append((countries[i], counts[i]))

Status: Optimal


In [9]:
# Visualizzazione dei gruppi
total_groups = len(bins)
print(f"Numero totale di gruppi individuati: {total_groups}")
print("=" * 50)
for bin_index, bin_items in bins.items():
    total_count = sum([count for country, count in bin_items])
    print(f"Gruppo {bin_index + 1}: Totale = {total_count}")
    for country, count in bin_items:
        print(f" - {country}: {count}")
    print("-" * 50)

Numero totale di gruppi individuati: 3
Gruppo 1: Totale = 899
 - Germany: 267
 - Brazil: 108
 - United Kingdom: 59
 - Finland: 55
 - Bulgaria: 41
 - Italy: 39
 - Canada: 25
 - Serbia: 23
 - Bosnia and Herzegovina: 21
 - Thailand: 19
 - India: 18
 - Turkey: 16
 - Singapore: 15
 - Colombia: 14
 - Pakistan: 14
 - Poland: 14
 - Lithuania: 12
 - Israel: 10
 - Portugal: 10
 - Iraq: 9
 - Russia: 8
 - Croatia: 7
 - Sweden: 7
 - Austria: 6
 - Switzerland: 6
 - Indonesia: 5
 - Belgium: 4
 - Costa Rica: 4
 - Palestinian Territory: 4
 - Albania: 3
 - Australia: 3
 - Czech Republic: 3
 - Estonia: 3
 - Macedonia: 3
 - Peru: 3
 - South Africa: 3
 - Tunisia: 3
 - Argentina: 2
 - Cyprus: 2
 - Dominican Republic: 2
 - Hungary: 2
 - Iceland: 2
 - Ireland: 2
 - Paraguay: 2
 - United Arab Emirates: 2
 - Armenia: 1
 - Bolivia: 1
 - Chile: 1
 - Denmark: 1
 - Ecuador: 1
 - Egypt: 1
 - El Salvador: 1
 - French Polynesia: 1
 - Honduras: 1
 - Japan: 1
 - Jordan: 1
 - Malta: 1
 - Montenegro: 1
 - Morocco: 1
 - My