versione per confrontro con v3: quale costa di più?

In [1]:
import pulp
import math

# Dati forniti
data = [
    {'key': 'Netherlands', 'count': 454},
    {'key': 'Spain', 'count': 392},
    {'key': 'Germany', 'count': 267},
    {'key': 'France', 'count': 246},
    {'key': 'United States', 'count': 139},
    {'key': 'Brazil', 'count': 108},
    {'key': 'United Kingdom', 'count': 59},
    {'key': 'Finland', 'count': 55},
    {'key': 'Bulgaria', 'count': 41},
    {'key': 'Italy', 'count': 39},
    {'key': 'Romania', 'count': 32},
    {'key': 'Ukraine', 'count': 32},
    {'key': 'Canada', 'count': 25},
    {'key': 'Serbia', 'count': 23},
    {'key': 'Bosnia and Herzegovina', 'count': 21},
    {'key': 'Thailand', 'count': 19},
    {'key': 'India', 'count': 18},
    {'key': 'Turkey', 'count': 16},
    {'key': 'Singapore', 'count': 15},
    {'key': 'Colombia', 'count': 14},
    {'key': 'Pakistan', 'count': 14},
    {'key': 'Poland', 'count': 14},
    {'key': 'Lithuania', 'count': 12},
    {'key': 'Israel', 'count': 10},
    {'key': 'Portugal', 'count': 10},
    {'key': 'Iraq', 'count': 9},
    {'key': 'Russia', 'count': 8},
    {'key': 'Croatia', 'count': 7},
    {'key': 'Sweden', 'count': 7},
    {'key': 'Austria', 'count': 6},
    {'key': 'Switzerland', 'count': 6},
    {'key': 'Indonesia', 'count': 5},
    {'key': 'Belgium', 'count': 4},
    {'key': 'Costa Rica', 'count': 4},
    {'key': 'Palestinian Territory', 'count': 4},
    {'key': 'Albania', 'count': 3},
    {'key': 'Australia', 'count': 3},
    {'key': 'Czech Republic', 'count': 3},
    {'key': 'Estonia', 'count': 3},
    {'key': 'Macedonia', 'count': 3},
    {'key': 'Peru', 'count': 3},
    {'key': 'South Africa', 'count': 3},
    {'key': 'Tunisia', 'count': 3},
    {'key': 'Argentina', 'count': 2},
    {'key': 'Cyprus', 'count': 2},
    {'key': 'Dominican Republic', 'count': 2},
    {'key': 'Hungary', 'count': 2},
    {'key': 'Iceland', 'count': 2},
    {'key': 'Ireland', 'count': 2},
    {'key': 'Paraguay', 'count': 2},
    {'key': 'United Arab Emirates', 'count': 2},
    {'key': 'Armenia', 'count': 1},
    {'key': 'Bolivia', 'count': 1},
    {'key': 'Chile', 'count': 1},
    {'key': 'Denmark', 'count': 1},
    {'key': 'Ecuador', 'count': 1},
    {'key': 'Egypt', 'count': 1},
    {'key': 'El Salvador', 'count': 1},
    {'key': 'French Polynesia', 'count': 1},
    {'key': 'Honduras', 'count': 1},
    {'key': 'Japan', 'count': 1},
    {'key': 'Jordan', 'count': 1},
    {'key': 'Malta', 'count': 1},
    {'key': 'Montenegro', 'count': 1},
    {'key': 'Morocco', 'count': 1},
    {'key': 'Myanmar', 'count': 1},
    {'key': 'Nepal', 'count': 1},
    {'key': 'Norway', 'count': 1},
    {'key': 'Slovakia', 'count': 1},
    {'key': 'Vietnam', 'count': 1}
]

# Capacità massima per gruppo
capacity = 1000

# Prepara i dati
countries = [item['key'] for item in data]
counts = [item['count'] for item in data]
num_items = len(countries)
num_bins = num_items  # Stima superiore del numero di gruppi

# Creazione del problema
prob = pulp.LpProblem("Ottimizzare chiamate API a Censys", pulp.LpMinimize)

# Variabili di decisione
x = pulp.LpVariable.dicts('x', ((i, j) for i in range(num_items) for j in range(num_bins)), cat='Binary')
y = pulp.LpVariable.dicts('y', (j for j in range(num_bins)), cat='Binary')

# Funzione obiettivo: minimizzare il numero di gruppi utilizzati
prob += pulp.lpSum([y[j] for j in range(num_bins)])

# Vincolo: ogni paese deve essere assegnato esattamente a un gruppo
for i in range(num_items):
    prob += pulp.lpSum([x[(i, j)] for j in range(num_bins)]) == 1

# Vincolo: la somma dei conteggi in ogni gruppo non deve superare la capacità
for j in range(num_bins):
    prob += pulp.lpSum([counts[i] * x[(i, j)] for i in range(num_items)]) <= capacity * y[j]

# Risoluzione del problema
prob.solve()

# Raccolta dei gruppi utilizzati
bins = {}
for j in range(num_bins):
    if y[j].varValue > 0.5:
        bin_items = []
        total_count = 0
        for i in range(num_items):
            if x[(i, j)].varValue > 0.5:
                bin_items.append((countries[i], counts[i]))
                total_count += counts[i]
        if total_count > 0:
            bins[j] = bin_items

# Calcolo del costo totale
total_cost = 0
for bin_items in bins.values():
    total_count = sum([count for country, count in bin_items])
    group_cost = 1  # Costo per gruppo
    ip_cost = math.ceil(total_count / 100)  # Costo per 100 IP
    total_group_cost = group_cost + ip_cost
    total_cost += total_group_cost

# Visualizzazione dei gruppi
total_groups = len(bins)
print(f"Numero totale di gruppi individuati: {total_groups}")
print(f"Costo totale (numero di crediti spesi): {total_cost}")
print("=" * 50)
group_number = 1
for bin_index, bin_items in bins.items():
    total_count = sum([count for country, count in bin_items])
    group_cost = 1  # Costo per gruppo
    ip_cost = math.ceil(total_count / 100)
    total_group_cost = group_cost + ip_cost
    print(f"Gruppo {group_number}: Totale = {total_count} indirizzi IP | Costo gruppo = {group_cost} credito | Costo IP = {ip_cost} crediti | Costo totale gruppo = {total_group_cost} crediti")
    for country, count in bin_items:
        print(f" - {country}: {count}")
    print("-" * 50)
    group_number += 1


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/conda/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/78ceae8dfbaf409da862ea0937c26550-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/78ceae8dfbaf409da862ea0937c26550-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 145 COLUMNS
At line 20026 RHS
At line 20167 BOUNDS
At line 25138 ENDATA
Problem MODEL has 140 rows, 4970 columns and 9870 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 2.194 - 0.01 seconds
Cgl0004I processed model has 140 rows, 4970 columns (4970 integer (4970 of which binary)) and 9870 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0045I 70 integer variables out of 4970 objects (4970 integer) have cost of 1 - high priority
Cbc0045I branch on satisfied N create fake objective Y random cost Y
Cbc0038I Initial state - 5 

