In [1]:
import torch
import gurobipy as gp

In [2]:
from learn.info import ModelInfo
from learn.feature import VarFeature, ConFeature, EdgFeature
from learn.train import Inst

In [3]:
import gurobipy as gp
from gurobipy import GRB
import numpy as np
np.random.seed(0)

def maximum_independent_set_problem(
    num_nodes=64,
    edge_prob=0.3,
    print_output=True
):
    edges = []
    for i in range(num_nodes):
        for j in range(i+1, num_nodes):
            if np.random.rand() < edge_prob:
                edges.append((i, j))
    m = gp.Model("maximum_independent_set")
    x = m.addVars(num_nodes, vtype=GRB.BINARY, name="x")
    for (i, j) in edges:
        m.addConstr(x[i] + x[j] <= 1, name=f"edge_{i}_{j}")
    m.setObjective(gp.quicksum(x[i] for i in range(num_nodes)), GRB.MAXIMIZE)
    m.update()
    return m

In [4]:
%%capture

var_features = []
con_features = []
edg_features = []
solutions = []

for i in range(2048):
    m = maximum_independent_set_problem()
    info = ModelInfo.from_model(m)
    var_features.append(VarFeature.from_info(info.var_info, info.obj_info))
    con_features.append(ConFeature.from_info(info.con_info))
    edg_features.append(EdgFeature.from_info(info.con_info))
    m.optimize()
    s = [v.x for v in m.getVars()]
    solutions.append(s)

inst = Inst(var_features, con_features, edg_features, solutions)

In [5]:
c_v_edges, v_c_edges, node_features, edge_features, n_var, n_con = inst.xs
ys = inst.ys

In [6]:
def get_train_mask(size, ratio):
    num_zero = int(round(size * ratio))
    mask = torch.ones(size, dtype=torch.bool)
    idx = torch.randperm(size)[:num_zero]
    mask[idx] = 0
    return mask


def get_solution_mask(pool, ratio):
    mask = pool.clone()
    ones_indices = torch.where(mask == 1)[0]
    num_keep = int(round(len(ones_indices) * ratio))
    
    if num_keep <= 0:
        mask[ones_indices] = 0
        return mask
        
    if num_keep >= len(ones_indices):
        return mask

    selected_indices = torch.randperm(len(ones_indices))[:num_keep]
    keep_indices = ones_indices[selected_indices]
    mask[ones_indices] = 0
    mask[keep_indices] = 1
    return mask


def get_mask_node_feature(node_feature, y, mask):
    node_feature_with_y = torch.hstack([node_feature, y.unsqueeze(1)])
    mask = torch.cat([mask, torch.zeros(len(y) - len(mask), dtype=torch.bool)])
    masked = node_feature_with_y.clone()
    masked[mask, -1] = 0
    return torch.hstack([masked, mask.unsqueeze(1)]), mask

In [7]:
def solve(problem, max_level):
    sub_problems = partition(problem)
    if max_level == 0:
        partial_solution = solve_exact(sub_problems[0])
        approx_solution = infer(partial_solution, problem)
        fixed = gb_fix(approx_solution)
        return fixed
    sols = solve(sub_problems, max_level-1)
    combined = combine(sols) # crossover
    fixed = gb_fix(combined)
    return fixed
    
    
    

In [8]:
import torch
from learn.model import FocalLoss, SpGAT
import torch.optim as optim

model = SpGAT(
    nfeat=inst.xs[2][0].shape[1] + 2,
    nhid=64,
    nclass=2,
    dropout=0.1,
    nheads=6,
    alpha=0.2
)

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-3)

In [9]:
from torch import nn
ce = nn.CrossEntropyLoss()

In [10]:
train_ratio = 0.5
train_sets = []
for i in range(len(ys)):
    n = n_var[i]
    s = get_train_mask(n, train_ratio)
    train_sets.append(s)

In [None]:
import random

batch_size = 256
for epoch in range(500):
    
    model.train()
    optimizer.zero_grad()
    
    agg_loss = 0
    inst_idxs = list(range(len(ys) - 100))
    random.shuffle(inst_idxs)

    counter = 0
    for i in inst_idxs:
        counter += 1        

        t_mask = train_sets[i]
        s_mask = get_solution_mask(t_mask, 0.5)
        nf, mask = get_mask_node_feature(node_features[i], ys[i], s_mask)
        
        output, _ = model(
            nf, 
            c_v_edges[i], 
            v_c_edges[i], 
            edge_features[i].detach()
        )
        fl = FocalLoss()

        train_mask = torch.cat([~t_mask, torch.zeros(len(mask) - len(t_mask), dtype=torch.bool)])
        loss = fl(output[train_mask, :], ys[i][train_mask])
        agg_loss += loss
        
        if counter >= batch_size:
            (agg_loss/batch_size).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            print(output[train_mask, :].detach().numpy(), ys[i][train_mask].detach().numpy(), mask.sum())
            print(agg_loss)
            
            print('-'*100)
            test_idx = -1
            
            t_mask = train_sets[test_idx]
            s_mask = get_solution_mask(t_mask, 0.5)
            nf, mask = get_mask_node_feature(node_features[test_idx], ys[test_idx], s_mask)
            output, _ = model(
                nf, 
                c_v_edges[test_idx], 
                v_c_edges[test_idx], 
                edge_features[test_idx].detach()
            )
            test_mask = torch.cat([~t_mask, torch.zeros(len(mask) - len(t_mask), dtype=torch.bool)])
            print(output[test_mask, :].detach().numpy(), ys[test_idx][test_mask].detach().numpy(), mask.sum())
            print('^'*100)
            agg_loss = 0
            counter = 0


[[0.54240626 0.45759374]
 [0.53063524 0.46936476]
 [0.53687495 0.46312502]
 [0.5366877  0.46331233]
 [0.54531837 0.4546817 ]
 [0.5369345  0.4630655 ]
 [0.523825   0.47617504]
 [0.53083795 0.46916202]
 [0.5364077  0.4635923 ]
 [0.53413343 0.46586654]
 [0.5336314  0.46636862]
 [0.5461938  0.45380622]
 [0.53969324 0.46030676]
 [0.53477573 0.46522427]
 [0.54295903 0.45704097]
 [0.54177976 0.4582202 ]
 [0.5365297  0.46347022]
 [0.53098625 0.46901372]
 [0.5386334  0.46136662]
 [0.5350969  0.4649031 ]
 [0.5301887  0.46981132]
 [0.5372725  0.46272746]
 [0.5411987  0.45880124]
 [0.5348985  0.46510148]
 [0.54060894 0.45939106]
 [0.541353   0.45864704]
 [0.54069686 0.4593031 ]
 [0.54346263 0.4565374 ]
 [0.5407713  0.45922866]
 [0.53801525 0.46198478]
 [0.54070103 0.45929897]
 [0.5359832  0.46401677]] [0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0] tensor(16)
tensor(40.6731, grad_fn=<AddBackward0>)
----------------------------------------------------------------------------------

In [None]:
assert 1 == 2

In [None]:
var_features = []
con_features = []
edg_features = []
solutions = []

for i in range(1):
    m = maximum_independent_set_problem()
    info = ModelInfo.from_model(m)
    var_features.append(VarFeature.from_info(info.var_info, info.obj_info))
    con_features.append(ConFeature.from_info(info.con_info))
    edg_features.append(EdgFeature.from_info(info.con_info))
    m.optimize()
    s = [v.x for v in m.getVars()]
    solutions.append(s)

inst = Inst(var_features, con_features, edg_features, solutions)

In [None]:
m = build_partial_model(info, const_vars={1: 0.5, 2: 0.5})
m.optimize()

In [None]:
ys[i].shape

In [None]:
assert 1 == 2

In [None]:
inst = Inst(
    [VarFeature.from_info(info.var_info, info.obj_info)],
    [ConFeature.from_info(info.con_info)],
    [EdgFeature.from_info(info.con_info)],
    [[v.x for v in m.getVars()]]
)

In [None]:
import random

for epoch in range(10):
    
    model.train()
    optimizer.zero_grad()
    
    agg_loss = 0
    for i in range(5):
        inst_idx = random.randint(0, len(ys) - 1)
        train_idx = torch.as_tensor(range(2), dtype=torch.int32)
        
        output, edge_features[inst_idx] = model(
            node_features[inst_idx], 
            c_v_edges[inst_idx], 
            v_c_edges[inst_idx], 
            edge_features[inst_idx].detach()
        )
        fl = FocalLoss()
        loss = fl(output[train_idx], ys[inst_idx][train_idx])
        agg_loss += loss
        
    print(output[train_idx].detach().numpy(), ys[inst_idx][train_idx].detach().numpy())
    print(agg_loss)
    
    agg_loss.backward()
    optimizer.step()

In [None]:
output

In [None]:
train_idx

In [None]:
output

In [None]:
assert 1 == 2

In [None]:
def train(epoch, num):
    global data_edge_features
    t = time.time()

    output, data_edge_features[num] = model(data_features[num], data_edge_A[num], data_edge_B[num], data_edge_features[num].detach())
    print(data_solution[num][idx_train])

    lf = Focal_Loss(torch.as_tensor(data_labels[num]))
    loss_train = lf(output[idx_train], data_solution[num][idx_train])

    return loss_train

In [None]:
t_total = time.time()
loss_values = []
for epoch in range(args.epochs):
    model.train()
    optimizer.zero_grad()
    now_loss = 0
    
    for i in range(5):
        now_data = random.randint(0, data_num - 1)
        now_loss += train(epoch, now_data)
        
    loss_values.append(now_loss)
    now_loss.backward()
    optimizer.step()
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(now_loss))

    torch.save(model.state_dict(), '{}.pkl'.format(epoch))
    if loss_values[-1] < best:
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1

In [None]:
# Model and optimizer
model = SpGAT(nfeat=data_features[0].shape[1],    # Feature dimension
            nhid=args.hidden,             # Feature dimension of each hidden layer
            nclass=int(data_solution[0].max()) + 1, # Number of classes
            dropout=args.dropout,         # Dropout
            nheads=args.nb_heads,         # Number of heads
            alpha=args.alpha)             # LeakyReLU alpha coefficient

optimizer = optim.Adam(model.parameters(),    
                       lr=args.lr,                        # Learning rate
                       weight_decay=args.weight_decay)    # Weight decay to prevent overfitting

if args.cuda: # Move to GPU
    model.to(device)
    for now_data in range(data_num):
        data_features[now_data] = data_features[now_data].to(device)
        data_labels[now_data] = data_labels[now_data].to(device)
        data_solution[now_data] = data_solution[now_data].to(device)
        data_edge_A[now_data] = data_edge_A[now_data].to(device)
        data_edge_B[now_data] = data_edge_B[now_data].to(device)
        data_edge_features[now_data] = data_edge_features[now_data].to(device)
        data_idx_train[now_data] = data_idx_train[now_data].to(device)


for now_data in range(data_num):
    data_features[now_data] = Variable(data_features[now_data])
    data_edge_A[now_data] = Variable(data_edge_A[now_data])
    data_edge_B[now_data] = Variable(data_edge_B[now_data])
    data_solution[now_data] = Variable(data_solution[now_data])
    # Define computation graph for automatic differentiation

def train(epoch, num):
    global data_edge_features
    t = time.time()

    output, data_edge_features[num] = model(data_features[num], data_edge_A[num], data_edge_B[num], data_edge_features[num].detach())
    print(data_solution[num][idx_train])

    lf = Focal_Loss(torch.as_tensor(data_labels[num]))
    loss_train = lf(output[idx_train], data_solution[num][idx_train])

    return loss_train

t_total = time.time()
loss_values = []
bad_counter = 0
best = args.epochs + 1
best_epoch = 0
for epoch in range(args.epochs):
    model.train()
    optimizer.zero_grad()
    now_loss = 0
    for i in range(5):
        now_data = random.randint(0, data_num - 1)
        now_loss += train(epoch, now_data)
    loss_values.append(now_loss)
    now_loss.backward()
    optimizer.step()
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(now_loss))

    torch.save(model.state_dict(), '{}.pkl'.format(epoch))
    if loss_values[-1] < best:
        best = loss_values[-1]
        best_epoch = epoch
        bad_counter = 0
    else:
        bad_counter += 1