In [None]:
import gurobipy as gp
import numpy as np
import pandas as pd
from torch_geometric.data import HeteroData
import torch
from tqdm import tqdm

In [None]:
np.random.seed(1)

In [None]:
path = './qplib/html'

In [None]:
datos = pd.read_csv(f'{path}/instancedata.csv')

find LCQPs

In [None]:
datos = datos[datos['conscurvature'] == 'linear']
datos = datos.drop(columns=['conscurvature'])

remove the indefinite ones, the rest are all convex, no concave, no linear

In [None]:
datos = datos[datos['objcurvature'] == 'convex']
datos = datos.drop(columns=['objcurvature', 'nobjquadnegev', 'convex'])

In [None]:
datos = datos[~np.isnan(datos['solobjvalue'])]
datos = datos[datos['ncons'] > 0]
# datos = datos.drop(columns=['solobjvalue'])

In [None]:
datos = datos.drop(columns=['nsos1', 'nsos2', 'nintvars', 'nquadfunc', 'objsense', 'objquadproblevfrac', 'njacobiannlnz', 'objtype', 'nnlfunc', 'nldensity', 'nnlsemi',
                           'ndiagquadcons', 'nnlintvars', 'nindefinitenlcons', 'solinfeasibility', 'nobjnz', 'nobjnlnz', 'nlnz', 'nz', 'njacobiannz',
                           'nlaghessiandiagnz', 'solsource', 'donor', 'nsemi', 'nquadcons', 'nobjquadnz', 'nlaghessiannz', 'nconvexnlcons', 'nlincons', 'nlinfunc',
                           'nobjquaddiagnz', 'laghessianmaxblocksize', 'nconcavenlcons', 'nlaghessianblocks', 'laghessianminblocksize', 'nobjquadposev',
                           'nnlbinvars', 'ncontvars', 'nnlvars', 'nbinvars', 'nsingleboundedvars', 'nboundedvars', 'laghessianavgblocksize'])

In [None]:
datos = datos.drop([70, 76, 370, 376, 377, 380, 389, 444, 445, 430, 423, 432, 438, 446], axis=0)  # infeasible

In [None]:
datos = datos.drop([421, 424, 450], axis=0)  # OOM

In [None]:
datos

In [None]:
import gurobipy as gp
import numpy as np
from scipy.sparse import vstack, csr_matrix, eye
from tqdm import tqdm

def get_array(name):
    model = gp.read(f"{path}/lp/{name}.lp")
    model = model.relax()
    model.Params.LogToConsole = 0
    
    assert np.all(np.array(model.getAttr("vtype", model.getVars())) == 'C')
    assert model.ModelSense == 1  # 1 for min, -1 for max

    A = model.getA()
    sense = np.array(model.getAttr("Sense", model.getConstrs()))
    b = np.array(model.getAttr("rhs", model.getConstrs()))

    lb = np.array(model.getAttr("LB", model.getVars()))
    ub = np.array(model.getAttr("UB", model.getVars()))
    num_vars = len(lb)

    # Identify where bounds are finite
    has_lb = lb != -np.inf
    has_ub = ub != np.inf

    # Create sparse rows for lb: -x_i <= -lb_i ⇒ row = -e_i
    A_lb = -eye(num_vars, format='csr')[has_lb]
    b_lb = -lb[has_lb]

    # Create sparse rows for ub: x_i <= ub_i ⇒ row = +e_i
    A_ub = eye(num_vars, format='csr')[has_ub]
    b_ub = ub[has_ub]

    # Stack bound constraints
    if A_lb.shape[0] + A_ub.shape[0] > 0:
        A = vstack([A, A_lb, A_ub])
        b = np.concatenate([b, b_lb, b_ub])

    scalars = np.maximum(np.abs(A).max(1).toarray().squeeze(), b)

    A /= scalars[:, None] + 1.e-5
    b /= scalars + 1.e-5

    # Reset bounds to (-inf, inf)
    model.setAttr("LB", model.getVars(), -np.inf)
    model.setAttr("UB", model.getVars(), np.inf)
    model.update()

    Q = model.getQ()
    Q /= np.abs(Q).max() + 1.e-5
    c = np.array(model.getAttr("obj", model.getVars()))
    c /= np.abs(c).max() + 1.e-5

    return Q.tocoo(), c, A.tocoo(), b

In [None]:
from torch_geometric.data import InMemoryDataset

In [None]:
from utils.evaluation import gurobi_solve_qp

## test set, the original instaces

In [None]:
test_sets = []

for qp_name in tqdm(datos['name']):
    print(qp_name)
    Q, c, A, b = get_array(qp_name)

    solution, duals, model = gurobi_solve_qp(Q, c, A, b)
    obj = model.getObjective().getValue()

    inactive_idx = np.where(~(np.abs(A @ solution - b) < 1.e-7))[0]

    data = HeteroData(
        qpid=int(qp_name.split('_')[1]),
        cons={
            'num_nodes': b.shape[0],
            'x': torch.empty(b.shape[0], 0),
             },
        vals={
            'num_nodes': c.shape[0],
            'x': torch.empty(c.shape[0], 0),
        },
        cons__to__vals={'edge_index': torch.from_numpy(np.vstack([A.row, A.col])).long(),
                        'edge_attr': torch.from_numpy(A.data)[:, None].float()},
        vals__to__vals={'edge_index': torch.from_numpy(np.vstack([Q.row, Q.col])).long(),
                        'edge_attr': torch.from_numpy(Q.data)[:, None].float()},
        x_solution=torch.from_numpy(solution).float(),
        duals=torch.ones(1).float(),  # dumb
        obj_solution=torch.tensor(obj).float(),
        q=torch.from_numpy(c).float(),
        b=torch.from_numpy(b).float(),
        inactive_idx=torch.from_numpy(inactive_idx).long(),
        heur_idx=torch.zeros(1, dtype=torch.long)  # dumb
    )
    test_sets.append(data)

torch.save(InMemoryDataset().collate(test_sets), f'datasets/qplib/processed/test.pt')

## train, augment each with 50 instances

In [None]:
train_sets = []
train_rate = 55  # enriched graphs per instance

for qp_name in tqdm(datos['name']):
    print(qp_name)
    Q, c, A, b = get_array(qp_name)

    Q_index = torch.from_numpy(np.vstack([Q.row, Q.col])).long()
    A_index = torch.from_numpy(np.vstack([A.row, A.col])).long()

    train_sets = []
    fails = 0
    q_flag = True
    while len(train_sets) < train_rate:
        Q1 = Q.copy()
        if q_flag:
            Q1.data *= np.random.rand(*Q1.data.shape) + 0.5
        
        A1 = A.copy()
        A1.data *= np.random.rand(*A1.data.shape) + 0.5
        
        c1 = c.copy()
        c1 *= np.random.rand(*c1.shape) + 0.5
        
        b1 = b.copy()
        b1 *= np.random.rand(*b1.shape) + 0.5

        solution, duals, model = gurobi_solve_qp(Q1, c1, A1, b1)
        if solution is not None:
            inactive_idx = np.where(~(np.abs(A1 @ solution - b1) < 1.e-7))[0]
            obj = model.getObjective().getValue()
            data = HeteroData(
                cons={
                    'num_nodes': b1.shape[0],
                    'x': torch.empty(b1.shape[0], 0),
                     },
                vals={
                    'num_nodes': c1.shape[0],
                    'x': torch.empty(c1.shape[0], 0),
                },
                cons__to__vals={'edge_index': A_index,
                                'edge_attr': torch.from_numpy(A1.data)[:, None].float()},
                vals__to__vals={'edge_index': Q_index,
                                'edge_attr': torch.from_numpy(Q1.data)[:, None].float()},
                x_solution=torch.from_numpy(solution).float(),
                duals=torch.from_numpy(duals).float(),
                obj_solution=torch.tensor(obj).float(),
                q=torch.from_numpy(c1).float(),
                b=torch.from_numpy(b1).float(),
                inactive_idx=torch.from_numpy(inactive_idx).long(),
                heur_idx=torch.zeros(1, dtype=torch.long)  # dumb
            )
            train_sets.append(data)
        else:
            fails += 1
            q_flag = False
            if fails > 10:
                break
        print(f'train: {len(train_sets)}')

    if len(train_sets) == train_rate:
        torch.save(InMemoryDataset().collate(train_sets), f'datasets/qplib/processed/enriched{qp_name}.pt')

In [None]:
data_dict = {}
for name in datos['name']:
    data, slices = torch.load(f'datasets/qplib/processed/enriched{name}.pt')
    data_list = [separate(cls=HeteroData,
                        batch=data,
                        idx=i,
                        slice_dict=slices,
                        decrement=False,) for i in range(55)]
    data_dict[name] = data_list

In [None]:
train_list = []

for i in range(50):
    for name in datos['name']:
        train_list.append(data_dict[name][i])

In [None]:
len(data_dict)

In [None]:
len(train_list)

In [None]:
torch.save(InMemoryDataset().collate(train_list), f'datasets/qplib/processed/train.pt')

In [None]:
val_list = []

for i in range(50, 55):
    for name in datos['name']:
        val_list.append(data_dict[name][i])

In [None]:
len(val_list)

In [None]:
torch.save(InMemoryDataset().collate(val_list), f'datasets/qplib/processed/valid.pt')

In [None]:
from data.dataset import LPDataset

In [None]:
ds = LPDataset('datasets/qplib', 'train')