In [1]:
import torch
import gurobipy as gp

In [2]:
from learn.info import ModelInfo
from learn.feature import VarFeature, ConFeature, EdgFeature
from learn.train import Inst

In [3]:
import gurobipy as gp
from gurobipy import GRB
import numpy as np
np.random.seed(0)

def maximum_independent_set_problem(
    num_nodes=10,
    edge_prob=0.3,
    print_output=True
):
    edges = []
    for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            if np.random.rand() < edge_prob:
                edges.append((i, j))
    m = gp.Model("maximum_independent_set")
    x = m.addVars(num_nodes, vtype=GRB.BINARY, name="x")
    for (i, j) in edges:
        m.addConstr(x[i] + x[j] <= 1, name=f"edge_{i}_{j}")
    m.setObjective(gp.quicksum(x[i] for i in range(num_nodes)), GRB.MAXIMIZE)
    m.update()
    return m

In [4]:
%%capture

var_features = []
con_features = []
edg_features = []
solutions = []

infos = []
for i in range(2048):
    m = maximum_independent_set_problem()
    info = ModelInfo.from_model(m)
    var_features.append(VarFeature.from_info(info.var_info, info.obj_info))
    con_features.append(ConFeature.from_info(info.con_info))
    edg_features.append(EdgFeature.from_info(info.con_info))
    m.optimize()
    s = [v.x for v in m.getVars()]
    solutions.append(s)
    infos.append(info)

inst = Inst(var_features, con_features, edg_features, solutions)

In [5]:
c_v_edges, v_c_edges, node_features, edge_features, n_var, n_con = inst.xs
ys = inst.ys

In [15]:
def get_train_mask(size, ratio):
    num_zero = int(round(size * ratio))
    mask = torch.ones(size, dtype=torch.bool)
    idx = torch.randperm(size)[:num_zero]
    mask[idx] = 0
    return mask


def get_solution_mask(pool, ratio):
    mask = pool.clone()
    ones_indices = torch.where(mask == 1)[0]
    num_keep = int(round(len(ones_indices) * ratio))
    
    if num_keep <= 0:
        mask[ones_indices] = 0
        return mask
        
    if num_keep >= len(ones_indices):
        return mask

    selected_indices = torch.randperm(len(ones_indices))[:num_keep]
    keep_indices = ones_indices[selected_indices]
    mask[ones_indices] = 0
    mask[keep_indices] = 1
    return mask


def get_mask_node_feature(node_feature, y, mask):
    node_feature_with_y = torch.hstack([node_feature, y.unsqueeze(1)])
    mask = torch.cat([mask, torch.zeros(len(y) - len(mask), dtype=torch.bool)])
    masked = node_feature_with_y.clone()
    masked[~mask, -1] = 0
    return torch.hstack([masked, mask.unsqueeze(1)]), mask

In [7]:
def solve(problem, max_level):
    sub_problems = partition(problem)
    if max_level == 0:
        partial_solution = solve_exact(sub_problems[0])
        approx_solution = infer(partial_solution, problem)
        fixed = gb_fix(approx_solution)
        return fixed
    sols = solve(sub_problems, max_level-1)
    combined = combine(sols) # crossover
    fixed = gb_fix(combined)
    return fixed
    
    
    

In [8]:
import torch
from learn.model import FocalLoss, SpGAT
import torch.optim as optim

model = SpGAT(
    nfeat=inst.xs[2][0].shape[1] + 2,
    nhid=64,
    nclass=2,
    dropout=0.1,
    nheads=6,
    alpha=0.1
)

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-3)

In [9]:
from torch import nn
ce = nn.CrossEntropyLoss()

In [11]:
import random

batch_size = 256
for epoch in range(500):
    
    model.train()
    optimizer.zero_grad()
    
    agg_loss = 0
    inst_idxs = list(range(len(ys) - 100))
    random.shuffle(inst_idxs)

    counter = 0
    for i in inst_idxs:
        counter += 1        

        t_mask = train_sets[i]
        s_mask = get_solution_mask(t_mask, 0.5)
        nf, mask = get_mask_node_feature(node_features[i], ys[i], s_mask)
        
        output, _ = model(
            nf, 
            c_v_edges[i], 
            v_c_edges[i], 
            edge_features[i].detach()
        )
        fl = FocalLoss()

        train_mask = torch.cat([~t_mask, torch.zeros(len(mask) - len(t_mask), dtype=torch.bool)])
        loss = fl(output[train_mask, :], ys[i][train_mask])
        agg_loss += loss
        
        if counter >= batch_size:
            (agg_loss/batch_size).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
            print(output[train_mask, :].detach().numpy(), ys[i][train_mask].detach().numpy(), mask.sum())
            print(agg_loss)
            
            print('-'*100)
            test_idx = -1
            
            t_mask = train_sets[test_idx]
            s_mask = get_solution_mask(t_mask, 0.5)
            nf, mask = get_mask_node_feature(node_features[test_idx], ys[test_idx], s_mask)
            output, _ = model(
                nf, 
                c_v_edges[test_idx], 
                v_c_edges[test_idx], 
                edge_features[test_idx].detach()
            )
            test_mask = torch.cat([~t_mask, torch.zeros(len(mask) - len(t_mask), dtype=torch.bool)])
            print(output[test_mask, :].detach().numpy(), ys[test_idx][test_mask].detach().numpy(), mask.sum())
            print('^'*100)
            agg_loss = 0
            counter = 0


[[0.48206437 0.5179356 ]
 [0.48185337 0.51814663]
 [0.48213834 0.51786166]
 [0.47729746 0.5227025 ]
 [0.48028398 0.519716  ]
 [0.48173842 0.5182616 ]
 [0.47925988 0.5207401 ]] [1 1 0 1 0 1 0] tensor(2)
tensor(55.2090, grad_fn=<AddBackward0>)
----------------------------------------------------------------------------------------------------
[[0.47237852 0.52762145]
 [0.47340602 0.526594  ]
 [0.47716433 0.5228357 ]
 [0.4955662  0.5044339 ]
 [0.4687453  0.5312547 ]
 [0.47563374 0.52436626]
 [0.47164354 0.5283565 ]] [0 1 1 1 1 1 0] tensor(2)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[[0.46876177 0.53123826]
 [0.47439408 0.5256059 ]
 [0.47350916 0.52649075]
 [0.47475258 0.52524734]
 [0.47409695 0.525903  ]
 [0.47516564 0.5248344 ]
 [0.47602206 0.52397794]] [1 1 0 0 0 1 1] tensor(2)
tensor(54.9418, grad_fn=<AddBackward0>)
----------------------------------------------------------------------------------------------------
[[0.4681500


KeyboardInterrupt



In [47]:
var_features = []
con_features = []
edg_features = []
solutions = []


infos = []
for i in range(1):
    m = maximum_independent_set_problem()
    info = ModelInfo.from_model(m)
    var_features.append(VarFeature.from_info(info.var_info, info.obj_info))
    con_features.append(ConFeature.from_info(info.con_info))
    edg_features.append(EdgFeature.from_info(info.con_info))
    m.optimize()
    s = [v.x for v in m.getVars()]
    solutions.append(s)
    infos.append(info)

inst = Inst(var_features, con_features, edg_features, solutions)

Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (mac64[x86] - Darwin 22.4.0 22E252)

CPU model: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 18 rows, 10 columns and 36 nonzeros
Model fingerprint: 0xdb44d63d
Variable types: 0 continuous, 10 integer (10 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 4.0000000
Presolve removed 8 rows and 2 columns
Presolve time: 0.00s
Presolved: 10 rows, 8 columns, 23 nonzeros
Variable types: 0 continuous, 8 integer (8 binary)

Root relaxation: cutoff, 3 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0     cutoff    0         4.00000    4.00000  0.

In [48]:
c_v_edges, v_c_edges, node_features, edge_features, n_var, n_con = inst.xs
ys = inst.ys

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.nn as dglnn
from learn.model import FocalLoss

In [31]:
class Model(nn.Module):
    def __init__(self, n_node_feats, n_edge_feats, hidden_size, num_classes):
        super().__init__()
        self.conv1 = dglnn.EdgeGATConv(n_node_feats, n_edge_feats, hidden_size, 8)
        self.conv2 = dglnn.EdgeGATConv(hidden_size * 8, n_edge_feats, hidden_size, 1)
        self.conv3 = dglnn.SAGEConv(hidden_size, num_classes, 'mean')

    def forward(self, graph, node_x, edge_x):
        node_x = self.conv1(graph, node_x, edge_x)
        node_x = F.relu(node_x)
        node_x = self.conv2(graph, node_x.reshape(node_x.size(0), -1), edge_x)
        node_x = node_x.reshape(node_x.size(0), -1)
        node_x = F.relu(node_x)
        node_x = self.conv3(graph, node_x)
        node_x = F.softmax(node_x)
        return node_x

In [49]:
len(ys)

1

In [32]:
def get_free_node(srcs, dsts):
    all_nodes = set(range(max(max(srcs), max(dsts))))
    all_nodes.update(set(dsts))
    for i, j in zip(srcs, dsts):
        if j not in all_nodes:
            continue
        all_nodes.remove(j)
    return list(all_nodes)

In [53]:
train_ratio = 0.0
train_sets = []
for i in range(len(ys)):
    n = n_var[i]
    s = get_train_mask(n, train_ratio)
    train_sets.append(s)

In [54]:
graphs = []
for idx in range(len(ys)):
    srcs = torch.cat([c_v_edges[idx][:, 0], v_c_edges[idx][:, 0]])
    dsts = torch.cat([c_v_edges[idx][:, 1], v_c_edges[idx][:, 1]])
    g = dgl.graph((srcs, dsts))
    
    if (g.in_degrees() == 0).any():
        continue

    t_mask = train_sets[idx]
    s_mask = get_solution_mask(t_mask, 0.5)
    nf, mask = get_mask_node_feature(node_features[idx], ys[idx], s_mask)
    
    g.ndata['feat'] = nf
    g.ndata['label'] = ys[idx]
    g.edata['feat'] = torch.cat([edge_features[idx], edge_features[idx]])
    graphs.append(g)

In [35]:
n_node_feats = node_features[idx].shape[1] + 2
n_edge_feats = edge_features[idx].shape[1]
num_classes = int(ys[idx].max().item()) + 1
hidden_size = 64

In [36]:
model = GNN(n_node_feats, n_edge_feats, hidden_size, num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

In [55]:
g = graphs[0]
model(g, g.ndata['feat'], g.edata['feat'])

  node_x = F.softmax(node_x)


tensor([[5.0340e-01, 4.9660e-01],
        [9.6351e-05, 9.9990e-01],
        [9.9719e-01, 2.8144e-03],
        [7.1648e-01, 2.8352e-01],
        [7.1551e-04, 9.9928e-01],
        [9.9877e-01, 1.2306e-03],
        [9.9888e-01, 1.1214e-03],
        [9.9998e-01, 2.3548e-05],
        [9.2409e-05, 9.9991e-01],
        [9.9999e-01, 1.0090e-05],
        [1.3288e-01, 8.6712e-01],
        [7.4196e-02, 9.2580e-01],
        [3.1725e-02, 9.6827e-01],
        [3.9285e-02, 9.6071e-01],
        [3.4643e-02, 9.6536e-01],
        [2.3017e-02, 9.7698e-01],
        [2.1715e-02, 9.7829e-01],
        [1.8413e-01, 8.1587e-01],
        [8.4325e-02, 9.1567e-01],
        [7.5011e-02, 9.2499e-01],
        [7.7189e-02, 9.2281e-01],
        [1.1860e-01, 8.8140e-01],
        [2.7007e-02, 9.7299e-01],
        [1.3763e-02, 9.8624e-01],
        [5.3681e-02, 9.4632e-01],
        [1.2998e-02, 9.8700e-01],
        [4.5392e-02, 9.5461e-01],
        [6.2112e-03, 9.9379e-01]], grad_fn=<SoftmaxBackward0>)

In [37]:
import random

num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    cntr = 0
    loss = 0

    random.shuffle(graphs)
    for i, g in enumerate(graphs):
        
        logits = model(g, g.ndata['feat'], g.edata['feat'])
        labels = g.ndata['label']          
        
        loss += FocalLoss()(logits[:n_var[i]], labels[:n_var[i]])
        # loss += F.cross_entropy(logits[:n_var[i]], labels[:n_var[i]].long())
        cntr += 1

        if cntr == 256:
            print(loss.detach().numpy())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()

            loss = 0
            cntr = 0
            
    print(logits[:n_var[i]].detach().numpy())
    print(labels[:n_var[i]].detach().numpy())
    print(g.ndata['feat'][:n_var[i], -2:].detach().numpy())
    print('^'*78)

# --------------------------------------------------------
# 5) Inference / Prediction
# --------------------------------------------------------
model.eval()
with torch.no_grad():
    logits = model(g, g.ndata['feat'])
    pred = logits.argmax(dim=1)  # predicted class for each node
    print("Predicted class labels:", pred)

  node_x = F.softmax(node_x)


52.137367
48.272274
45.639393
44.32616
44.84309
[[0.5547546  0.44524536]
 [0.5931188  0.4068812 ]
 [0.58292776 0.41707224]
 [0.5690402  0.43095985]
 [0.6426525  0.35734752]
 [0.5981818  0.40181825]
 [0.59656566 0.40343434]
 [0.603205   0.396795  ]
 [0.597875   0.40212497]
 [0.6591281  0.3408719 ]]
[0 0 1 1 0 1 0 1 1 0]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 1.]]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
45.279312
45.27827
44.913006
44.394238
44.064693
[[0.52814054 0.47185943]
 [0.49362332 0.5063767 ]
 [0.46003097 0.539969  ]
 [0.5140481  0.4859519 ]
 [0.57659256 0.4234074 ]
 [0.4719719  0.5280281 ]
 [0.46705812 0.5329418 ]
 [0.52027535 0.47972462]
 [0.48942044 0.5105796 ]
 [0.51995224 0.48004773]]
[1 0 1 0 0 0 1 1 1 0]
[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43.69509
43.64404

TypeError: GNN.forward() missing 1 required positional argument: 'edge_x'

In [None]:
g.nodes()

In [None]:
g.in_degrees()

In [None]:
assert 1 == 2

In [None]:
var_features = []
con_features = []
edg_features = []
solutions = []

for i in range(1):
    m = maximum_independent_set_problem()
    info = ModelInfo.from_model(m)
    var_features.append(VarFeature.from_info(info.var_info, info.obj_info))
    con_features.append(ConFeature.from_info(info.con_info))
    edg_features.append(EdgFeature.from_info(info.con_info))
    m.optimize()
    s = [v.x for v in m.getVars()]
    solutions.append(s)

inst = Inst(var_features, con_features, edg_features, solutions)

In [None]:
m = build_partial_model(info, const_vars={1: 0.5, 2: 0.5})
m.optimize()

In [None]:
ys[i].shape

In [None]:
assert 1 == 2

In [None]:
inst = Inst(
    [VarFeature.from_info(info.var_info, info.obj_info)],
    [ConFeature.from_info(info.con_info)],
    [EdgFeature.from_info(info.con_info)],
    [[v.x for v in m.getVars()]]
)

In [None]:
import random

for epoch in range(10):
    
    model.train()
    optimizer.zero_grad()
    
    agg_loss = 0
    for i in range(5):
        inst_idx = random.randint(0, len(ys) - 1)
        train_idx = torch.as_tensor(range(2), dtype=torch.int32)
        
        output, edge_features[inst_idx] = model(
            node_features[inst_idx], 
            c_v_edges[inst_idx], 
            v_c_edges[inst_idx], 
            edge_features[inst_idx].detach()
        )
        fl = FocalLoss()
        loss = fl(output[train_idx], ys[inst_idx][train_idx])
        agg_loss += loss
        
    print(output[train_idx].detach().numpy(), ys[inst_idx][train_idx].detach().numpy())
    print(agg_loss)
    
    agg_loss.backward()
    optimizer.step()

In [None]:
output

In [None]:
train_idx

In [None]:
output

In [None]:
assert 1 == 2

In [None]:
def train(epoch, num):
    global data_edge_features
    t = time.time()

    output, data_edge_features[num] = model(data_features[num], data_edge_A[num], data_edge_B[num], data_edge_features[num].detach())
    print(data_solution[num][idx_train])

    lf = Focal_Loss(torch.as_tensor(data_labels[num]))
    loss_train = lf(output[idx_train], data_solution[num][idx_train])

    return loss_train

In [None]:
t_total = time.time()
loss_values = []
for epoch in range(args.epochs):
    model.train()
    optimizer.zero_grad()
    now_loss = 0
    
    for i in range(5):
        now_data = random.randint(0, data_num - 1)
        now_loss += train(epoch, now_data)
        
    loss_values.append(now_loss)
    now_loss.backward()
    optimizer.step()
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(now_loss))

    torch.save(model.state_dict(), '{}.pkl'.format(epoch))
    if loss_values[-1] < best:
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1

In [None]:
# Model and optimizer
model = SpGAT(nfeat=data_features[0].shape[1],    # Feature dimension
            nhid=args.hidden,             # Feature dimension of each hidden layer
            nclass=int(data_solution[0].max()) + 1, # Number of classes
            dropout=args.dropout,         # Dropout
            nheads=args.nb_heads,         # Number of heads
            alpha=args.alpha)             # LeakyReLU alpha coefficient

optimizer = optim.Adam(model.parameters(),    
                       lr=args.lr,                        # Learning rate
                       weight_decay=args.weight_decay)    # Weight decay to prevent overfitting

if args.cuda: # Move to GPU
    model.to(device)
    for now_data in range(data_num):
        data_features[now_data] = data_features[now_data].to(device)
        data_labels[now_data] = data_labels[now_data].to(device)
        data_solution[now_data] = data_solution[now_data].to(device)
        data_edge_A[now_data] = data_edge_A[now_data].to(device)
        data_edge_B[now_data] = data_edge_B[now_data].to(device)
        data_edge_features[now_data] = data_edge_features[now_data].to(device)
        data_idx_train[now_data] = data_idx_train[now_data].to(device)


for now_data in range(data_num):
    data_features[now_data] = Variable(data_features[now_data])
    data_edge_A[now_data] = Variable(data_edge_A[now_data])
    data_edge_B[now_data] = Variable(data_edge_B[now_data])
    data_solution[now_data] = Variable(data_solution[now_data])
    # Define computation graph for automatic differentiation

def train(epoch, num):
    global data_edge_features
    t = time.time()

    output, data_edge_features[num] = model(data_features[num], data_edge_A[num], data_edge_B[num], data_edge_features[num].detach())
    print(data_solution[num][idx_train])

    lf = Focal_Loss(torch.as_tensor(data_labels[num]))
    loss_train = lf(output[idx_train], data_solution[num][idx_train])

    return loss_train

t_total = time.time()
loss_values = []
bad_counter = 0
best = args.epochs + 1
best_epoch = 0
for epoch in range(args.epochs):
    model.train()
    optimizer.zero_grad()
    now_loss = 0
    for i in range(5):
        now_data = random.randint(0, data_num - 1)
        now_loss += train(epoch, now_data)
    loss_values.append(now_loss)
    now_loss.backward()
    optimizer.step()
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(now_loss))

    torch.save(model.state_dict(), '{}.pkl'.format(epoch))
    if loss_values[-1] < best:
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1