In [433]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [434]:
from scipy.optimize import linprog
from tqdm import tqdm

import os
import torch
from scipy.linalg import LinAlgWarning, LinAlgError
from scipy.optimize._optimize import OptimizeWarning
from scipy.optimize._linprog_util import _clean_inputs, _get_Abc
import warnings
import numpy as np

from generate_instances_lp import generate_setcover, Graph, generate_indset, generate_cauctions, generate_capacited_facility_location

In [435]:
rng = np.random.RandomState(1)

In [438]:
root = 'datasets/gen_20_30_0.2'
os.mkdir(root)
os.mkdir(os.path.join(root, 'processed'))

### Generic

In [774]:
density = 0.2
nrows = 20
ncols = 30

def surrogate_gen():
    A = np.random.randn(nrows, ncols)
    A[np.random.rand(nrows, ncols) > density] = 0.
    x_feas = np.abs(np.random.randn(ncols))  # Ensure x_feas is non-negative
    b = A @ x_feas + np.abs(np.random.randn(nrows))  # Ensure feasibility

    c = np.abs(np.random.randn(ncols))
    return A, b, c

bounds = None

### Setcover

In [720]:
density = 0.01
nrows_l = 300
nrows_u = 400
ncols_l = 200
ncols_u = 300

bounds = (0., 1.)

def surrogate_gen():
    nrows = rng.randint(nrows_l, nrows_u)
    ncols = rng.randint(ncols_l, ncols_u)
    nnzrs = int(nrows * ncols * density)
    A, b, c = generate_setcover(nrows, ncols, nnzrs, rng)
    return A, b, c

### Indset

In [None]:
def surrogate_gen():
    # nnodes = rng.randint(10, 20)
    nnodes = rng.randint(250, 300)
    edge_probability = 0.01
    graph = Graph.erdos_renyi(number_of_nodes=nnodes, edge_probability=edge_probability, random=rng)
    A, b, c = generate_indset(graph=graph, nnodes=nnodes)
    return None, None, A, b, c

bounds = (0., 1.)

### Cauctions

In [None]:
def surrogate_gen():
    # n_items=rng.randint(15, 20)
    # n_bids=rng.randint(15, 20)
    n_items=rng.randint(300, 400)
    n_bids=rng.randint(300, 400)
    A, b, c = generate_cauctions(n_items=n_items, n_bids=n_bids, rng=rng, min_value=0.5, max_value=1., add_item_prob=0.5)
    # c = np.ones_like(c, dtype=np.float32) * -1.
    return A, b, c

bounds = (0., 1.)

### Facilities

In [None]:
def surrogate_gen():
    n_customers = rng.randint(60, 70)
    n_facilities = 5
    ratio = 0.5
    # min would be like 0.2-ish
    A_eq, b_eq, A_ub, b_ub, c = generate_capacited_facility_location(n_customers=n_customers, 
                                                                     n_facilities=n_facilities, 
                                                                     ratio=ratio, rng=rng)
    return A_eq, b_eq, A_ub, b_ub, c

bounds = (0., 1.)

# create ineq

In [633]:
from scipy.linalg import qr
from torch_geometric.data import Batch, HeteroData, InMemoryDataset
from collections import namedtuple

_LPProblem = namedtuple('_LPProblem',
                        'c A_ub b_ub A_eq b_eq bounds x0 integrality')
_LPProblem.__new__.__defaults__ = (None,) * 7  # make c the only required arg

In [775]:
def normalize_cons(A, b):
    if A is None or b is None:
        return A, b
    Ab = np.concatenate([A, b[:, None]], axis=1)
    max_logit = np.abs(Ab).max(axis=1)
    max_logit[max_logit == 0] = 1.
    Ab = Ab / max_logit[:, None]
    A = Ab[:, :-1]
    b = Ab[:, -1]
    return A, b

In [903]:
A, b, c = surrogate_gen()
c = c / (np.abs(c).max() + 1.e-10)  # does not change the result
A, b = normalize_cons(A, b)

In [904]:
res = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')

In [905]:
obj = res.fun

In [906]:
def active_idx(A, x, b, eps=1.e-8):
    vio = A @ res.x - b
    assert vio.max() <= eps
    vio_mask = np.abs(vio) < eps
    return np.where(vio_mask)[0]

In [907]:
actives = active_idx(A, res.x, b)

In [908]:
actives

array([ 1,  7,  9, 12, 15, 16, 17, 19])

In [909]:
inactives = np.setdiff1d(np.arange(A.shape[0]), actives)

In [943]:
# heur = np.where(A @ c < 0, A @ c + b, 10000)
heur = A @ c + b

In [944]:
heur

array([-1.46408196, -1.12194763,  1.03493377,  0.89820058, -0.58505166,
        1.64286862,  0.56432216, -1.33292076,  1.12612255, -1.44090078,
        1.78040329,  1.07957904,  1.28507853,  1.00785969,  1.46256524,
        0.98091567, -1.29380029, -1.44830598,  0.79459833, -1.11974753])

In [945]:
sortd_Ac_add_b = np.argsort(heur)

In [973]:
np.isin(sortd_Ac_add_b, actives).astype(np.int32)

array([0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
      dtype=int32)

In [457]:
for row in inactives:
    if A[row].dot(c) > 0:
        for i in range(100):
            A_ = A.copy()
            # if A_[row, i] == 0.:
            A_[row, i] -= 100000
            # A_[row, i] = 0
            res = linprog(c, A_ub=A_, b_ub=b, bounds=bounds, method='highs')
            assert res.fun == obj

AssertionError: 

In [458]:
warnings.filterwarnings("error")

ips = []
graphs = []
pkg_idx = 0
success_cnt = 0

max_iter = 15000
num = 10000

pbar = tqdm(range(max_iter))
for i in pbar:
    A, b, c = surrogate_gen()
    c = c / (np.abs(c).max() + 1.e-10)  # does not change the result
    A, b = normalize_cons(A, b)

    # process LP into standard form Ax=b, x>=0
    # lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, None, None)
    # lp = _clean_inputs(lp)
    # A, b, c, *_ = _get_Abc(lp, 0.)
    # bounds = None
    
    try:
        assert np.linalg.matrix_rank(A) == A.shape[0]
        res = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')
    except (LinAlgWarning, OptimizeWarning, AssertionError, LinAlgError):
        continue
    else:
        if res.success and not np.isnan(res.fun) and res.fun != 0.:
            # calculate active set score
            heur = A @ c / np.linalg.norm(A, ord=None, axis=1) / np.linalg.norm(c) + b
            active_sort_idx = np.argsort(heur)
            
            A = torch.from_numpy(A).to(torch.float)
            b = torch.from_numpy(b).to(torch.float)
            c = torch.from_numpy(c).to(torch.float)
            x = torch.from_numpy(res.x).to(torch.float)

            A_where = torch.where(A)
            data = HeteroData(
                cons={
                    'num_nodes': b.shape[0],
                    'x': torch.empty(b.shape[0]),
                     },
                vals={
                    'num_nodes': c.shape[0],
                    'x': torch.empty(c.shape[0]),
                },
                obj={
                    'num_nodes': 1,
                    'x': torch.zeros(1, 1).float(),
                },
                cons__to__vals={'edge_index': torch.vstack(A_where),
                                'edge_attr': A[A_where][:, None]},
                obj__to__vals={'edge_index': torch.vstack([torch.zeros(A.shape[1]).long(),
                                                           torch.arange(A.shape[1])]),
                                'edge_attr': c[:, None]},
                obj__to__cons={'edge_index': torch.vstack([torch.zeros(A.shape[0]).long(),
                                                           torch.arange(A.shape[0])]),
                                'edge_attr': b[:, None]},
                x_solution=x,
                obj_solution=c.dot(x),
                q=c,
                b=b,
                active_sort_idx=torch.from_numpy(active_sort_idx).long()
            )
            success_cnt += 1
            graphs.append(data)

    if len(graphs) >= 1000 or success_cnt == num:
        torch.save(Batch.from_data_list(graphs), f'{root}/processed/batch{pkg_idx}.pt')
        pkg_idx += 1
        graphs = []

    if success_cnt >= num:
        break

    pbar.set_postfix({'suc': success_cnt, 'obj_solution': c.dot(x).item()})

warnings.resetwarnings()

 68%|██████████████████████████████████████████████████████████████████▎                              | 10248/15000 [00:13<00:06, 764.78it/s, suc=9999, obj_solution=0.549]


In [459]:
from data.dataset import LPDataset

In [460]:
ds = LPDataset(root, 'train')

Processing...
Done!
