In [3]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [4]:
from tqdm import tqdm

import os
import torch
from scipy.linalg import LinAlgError
import numpy as np
from torch_geometric.data import Batch, HeteroData
from scipy.sparse import coo_array

from sklearn.datasets import make_sparse_spd_matrix
from utils.evaluation import normalize_cons, data_inactive_constraints, data_contraint_heuristic, gurobi_solve_qp

In [5]:
rng = np.random.RandomState(1)

In [6]:
root = 'datasets/lasso_50_0.02'
os.mkdir(root)
os.mkdir(os.path.join(root, 'processed'))

### SVM

In [None]:
def soft_svm(nums, dims, lamb, density, rng):
    A1 = rng.normal(1 / dims / density, 1 / dims / density, (nums // 2, dims))  # generate gaussian
    A2 = rng.normal(-1 / dims / density, 1 / dims / density, (nums // 2, dims))  # generate gaussian
    A = np.vstack([A1, A2])
    A[rng.rand(*A.shape) > density] = 0.

    # labels
    b = np.hstack([np.ones(nums // 2) * 1., np.ones(nums // 2) * -1.])

    A = A * b[:, None]

    A = -np.concatenate([A, np.eye(nums) * 1.], axis=1).astype(np.float64)
    b = -np.ones(nums).astype(np.float64)

    P = np.diag(np.concatenate([np.ones(dims), np.zeros(nums)], axis=0)).astype(np.float64)
    q = np.concatenate([np.zeros(dims), lamb * np.ones(nums)], axis=0).astype(np.float64)

    return A, b, P, q

def surrogate_gen():
    A, b, P, q = soft_svm(100, 100, 0.5, 0.05, rng)
    return P, A, b, q, None, None, 0., float('inf')

### portfolio

In [None]:
def portfolio(n_assets, density, rng):
    P = make_sparse_spd_matrix(n_dim=n_assets, alpha=1 - density / 2.,
                               smallest_coef=0.1, largest_coef=0.9, random_state=rng).astype(np.float64)
    # P += np.eye(P.shape[1])
    q = np.zeros(n_assets, dtype=np.float64)

    A = -rng.randn(1, n_assets) * 0.01
    Aeq = np.ones((1, n_assets)) * 0.01

    # we scale b to 100, and the solution becomes ratio in percentage, and does not affect the solution up to a scale
    b = np.array([-1.])
    beq = np.ones(1, dtype=np.float64)
    return P, A, b, q, Aeq, beq

def surrogate_gen():
    return *portfolio(100, 0.05, rng), 0., float('inf')

### Lasso

https://medium.com/@nicolay.huarancay/lasso-regression-with-gurobi-and-setting-coefficients-approach-179be20eea14

In [36]:
import numpy as np

def lasso(n_samples, n_features, density, reg, rng=None):
    # Generate random design matrix
    X = rng.randn(n_samples, n_features)
    X[rng.rand(*X.shape) > density] = 0.
    
    # Generate sparse ground-truth weights
    w_true = rng.randn(n_features)

    # Generate target with noise
    noise = rng.randn(n_samples) * 0.5
    y = X @ w_true + noise

    Q = 0.5 * X.T @ X
    c = -X.T @ y
    Z = np.zeros_like(Q)
    Q = np.block([[Q, Z], [Z, Z]])
    c = np.hstack([c, np.ones(c.shape[0]) * reg])
    
    A = np.block([[-np.eye(n_features), -np.eye(n_features)], [np.eye(n_features), -np.eye(n_features)]])
    b = np.zeros(n_samples * 2)
    
    return Q, A, b, c

def surrogate_gen():
    Q, A, b, c = lasso(50, 50, 0.02, 0.1, rng)
    return Q, A, b, c, None, None, np.hstack([np.ones(50) * -float('inf'), np.zeros(50)]), float('inf')

# create ineq

In [None]:
ips = []
graphs = []
pkg_idx = 0
success_cnt = 0

max_iter = 15000
num = 10000

pbar = tqdm(range(max_iter))
for i in pbar:
    P, A, b, c, Aeq, beq, lb, ub = surrogate_gen()
    P = P / np.abs(P).max()
    c = c / (np.abs(c).max() + 1.e-10)  # does not change the result
    A, b = normalize_cons(A, b)
    Aeq, beq = normalize_cons(Aeq, beq)
    
    try:
        solution, duals = gurobi_solve_qp(P, c, A, b, Aeq, beq, lb)
        assert solution is not None
        
    except (AssertionError, LinAlgError):
        continue
    else:
        if solution is not None:
            inactive_idx = data_inactive_constraints(A, b, solution)
            heur_idx = data_contraint_heuristic(P, A, b, c)

            obj = 0.5 * solution @ P @ solution + c.dot(solution)

            P = torch.from_numpy(P).to(torch.float)
            P_where = torch.where(P)
            
            A = torch.from_numpy(A).to(torch.float)
            b = torch.from_numpy(b).to(torch.float)
            if Aeq is not None:
                A = torch.vstack([A, torch.from_numpy(Aeq).to(torch.float)])
                b = torch.hstack([b, torch.from_numpy(beq).to(torch.float)])
            c = torch.from_numpy(c).to(torch.float)
            x = torch.from_numpy(solution).to(torch.float)

            A_where = torch.where(A)
            data = HeteroData(
                cons={
                    'num_nodes': b.shape[0],
                    'x': torch.empty(b.shape[0]),
                     },
                vals={
                    'num_nodes': c.shape[0],
                    'x': torch.empty(c.shape[0]),
                },
                cons__to__vals={'edge_index': torch.vstack(A_where),
                                'edge_attr': A[A_where][:, None]},
                vals__to__vals={'edge_index': torch.vstack(P_where),
                                'edge_attr': P[P_where][:, None]},
                x_solution=x,
                duals=torch.from_numpy(duals).float(),
                obj_solution=torch.tensor(obj).float(),
                q=c,
                b=b,
                inactive_idx=torch.from_numpy(inactive_idx).long(),
                heur_idx=torch.from_numpy(heur_idx).long(),
            )
            success_cnt += 1
            graphs.append(data)

    if len(graphs) >= 1000 or success_cnt == num:
        torch.save(Batch.from_data_list(graphs), f'{root}/processed/batch{pkg_idx}.pt')
        pkg_idx += 1
        graphs = []

    if success_cnt >= num:
        break

    pbar.set_postfix({'suc': success_cnt, 'obj': obj})

In [20]:
from data.dataset import LPDataset

In [21]:
ds = LPDataset(root, 'test')

Processing...
  data_list.extend(Batch.to_data_list(torch.load(osp.join(self.processed_dir, f'batch{i}.pt'))))
Done!
  self.data, self.slices = torch.load(path)


In [22]:
data = ds[10]