In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import cvxpy as cp

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.optim as optim
import os
import random

import sys
sys.path.insert(0, './mlopt-micp')
sys.path.insert(0, './mlopt-micp/cartpole')

import optimizer
from problem import Cartpole
from src.ae import Encoder, get_cartpole_encoder

In [3]:
def euclidean_dist(x,y):
    # x: NxD
    # y: MxD
    n = x.size(0)
    m = y.size(0)
    d = x.size(1)
    assert d == y.size(1)
    
    x = x.unsqueeze(1).expand(n, m, d)
    y = y.unsqueeze(0).expand(n, m, d)
    return torch.pow(x-y, 2).sum(2)

In [4]:
pp = Cartpole()

In [5]:
print('Total number of classes: {}'.format(pp.n_strategies))
print('Length of feature vector: {}'.format(pp.n_features))

Total number of classes: 581
Length of feature vector: 13


In [13]:
dim_in, dim_z = pp.n_features, 2

enc = get_cartpole_encoder(dim_in, dim_z).cuda()
enc(torch.from_numpy(pp.features[:2]).float().cuda())

# training parameters
TRAINING_ITERATIONS = int(5000)
BATCH_SIZE = int(64)
CHECKPOINT_AFTER = int(1250)
SAVEPOINT_AFTER = int(2500)

rand_idx = list(np.arange(0, pp.n_strategies-1))

indices = [rand_idx[ii * BATCH_SIZE:(ii + 1) * BATCH_SIZE] for ii in range((len(rand_idx) + BATCH_SIZE - 1) // BATCH_SIZE)]
random.shuffle(indices)

enc_dict = {}
str_dict = {}
for ii in range(len(pp.features)):
    str_idx = int(pp.labels[ii,0])
    str_dict[ii] = str_idx
    if str_idx in enc_dict.keys():
        enc_dict[str_idx] += [ii]
    else:
        enc_dict[str_idx] = [ii]
        
feats = torch.from_numpy(pp.features).float().cuda()
strat_slices = tuple(enc_dict.values())

In [7]:
torch.cuda.empty_cache()
train_inds= int(pp.training_batch_percentage*pp.n_probs)
n = BATCH_SIZE; d = dim_z
m = pp.n_strategies
#for tt in range(training_iters):
#compute centroids
embeds = enc(feats)
centroids = torch.zeros(pp.n_strategies,dim_z).cuda()
for ii in range(pp.n_strategies):
    centroids[ii,:] = torch.mean(embeds[enc_dict[ii],:])
    
#n,d = embeds[batch_inds,:].shape
#m = centroids.size(0)

batch_inds = np.random.randint(0, train_inds,BATCH_SIZE)
diff = embeds[batch_inds,:].unsqueeze(1).expand(n,m,d) - centroids.unsqueeze(0).expand(n,m,d)
l2_dist = torch.norm(diff,dim=-1); l1_dist = torch.norm(diff,p=1,dim=-1)

dists = torch.where(l2_dist <= 1., l2_dist, l1_dist)

# #sample training points
# batch_inds = np.random.randint(0, train_inds,BATCH_SIZE)
# dists = torch.cdist(embeds[batch_inds,:],centroids)
margins = dists[torch.arange(BATCH_SIZE),pp.labels[batch_inds,0].astype(int)][:,None]
loss_mask = torch.zeros(BATCH_SIZE,pp.n_strategies).cuda()
loss_mask[torch.arange(BATCH_SIZE),pp.labels[batch_inds,0].astype(int)] = -1.
losses = margins + torch.sum(torch.max(margins-dists+1,torch.zeros(1).cuda()),dim=1)[:,None]

np.mean(np.equal(torch.argmin(dists,axis=1).cpu().numpy(),pp.labels[batch_inds,0].astype(int)))

#torch.sum(torch.max(loss_mask+margins-dists+1,torch.zeros(1).cuda()),dim=1)[:,None]

tensor([[0.2954],
        [0.2918],
        [0.2977],
        [0.2936],
        [0.2978],
        [0.2947],
        [0.2994],
        [0.2885],
        [0.2985],
        [0.3010],
        [0.2891],
        [0.3057],
        [0.2937],
        [0.2958],
        [0.2929],
        [0.3061],
        [0.2904],
        [0.2994],
        [0.3006],
        [0.2918],
        [0.3008],
        [0.2991],
        [0.2986],
        [0.2955],
        [0.2986],
        [0.3060],
        [0.3073],
        [0.2963],
        [0.2929],
        [0.3032],
        [0.3010],
        [0.3008],
        [0.2903],
        [0.3077],
        [0.2976],
        [0.2940],
        [0.2982],
        [0.2833],
        [0.3009],
        [0.2988],
        [0.2955],
        [0.2947],
        [0.2959],
        [0.2881],
        [0.3004],
        [0.2995],
        [0.2985],
        [0.3008],
        [0.3034],
        [0.2998],
        [0.2936],
        [0.2961],
        [0.2990],
        [0.3010],
        [0.2907],
        [0

In [29]:
optimizer = optim.Adam(enc.parameters(),lr=1e-3)
N = pp.n_strategies # number of classes in training set
Nc = 100 # number of classes per episode
Ns = 20  # number of support examples per class
Nq = 20  # number of query examples per class
BATCH_SIZE = int(2056)
training_iters = 10000
n = BATCH_SIZE; d = dim_z
m = pp.n_strategies
centroids = 10*torch.randn(pp.n_strategies,dim_z).cuda()
tau = 1e-1
for tt in range(training_iters):
    optimizer.zero_grad()

    train_inds= int(pp.training_batch_percentage*pp.n_probs)
    
    #compute centroids
    embeds = enc(feats)
    c_curr = torch.zeros(pp.n_strategies,dim_z).cuda()
    for ii in range(pp.n_strategies):
        c_curr[ii,:] = torch.mean(embeds[enc_dict[ii],:])
        
    centroids = (1-tau)*centroids + tau*c_curr.detach()

    #sample training points
    batch_inds = np.random.randint(0, train_inds,BATCH_SIZE)
    diff = embeds[batch_inds,:].unsqueeze(1).expand(n,m,d) - centroids.unsqueeze(0).expand(n,m,d)
    l2_dist = torch.norm(diff,dim=-1); l1_dist = torch.norm(diff,p=1,dim=-1)

    dists = torch.where(l2_dist <= 1., l2_dist, l1_dist)
    #dists = torch.cdist(embeds[batch_inds,:],centroids)
    margins = dists[torch.arange(BATCH_SIZE),pp.labels[batch_inds,0].astype(int)][:,None]
    loss_mask = torch.zeros(BATCH_SIZE,pp.n_strategies).cuda()
    loss_mask[torch.arange(BATCH_SIZE),pp.labels[batch_inds,0].astype(int)] = -1.
    losses = 0.5*margins + 0.5*torch.sum(torch.max(loss_mask+margins-dists+5,torch.zeros(1).cuda()),dim=1)[:,None]

    if tt % 50 == 0: #print for debug
        acc = np.mean(np.equal(torch.argmin(dists,axis=1).cpu().numpy(),pp.labels[batch_inds,0].astype(int)))
        print(acc,torch.mean(losses))
    
    torch.mean(losses).backward()
    #torch.nn.utils.clip_grad_norm_(enc.parameters(),1.)
    optimizer.step()

0.0014591439688715954 tensor(1431.2437, device='cuda:0', grad_fn=<MeanBackward0>)
0.013618677042801557 tensor(1192.1442, device='cuda:0', grad_fn=<MeanBackward0>)
0.007782101167315175 tensor(1124.6624, device='cuda:0', grad_fn=<MeanBackward0>)
0.0038910505836575876 tensor(1447.4426, device='cuda:0', grad_fn=<MeanBackward0>)
0.00048638132295719845 tensor(1488.0829, device='cuda:0', grad_fn=<MeanBackward0>)
0.0 tensor(1384.6669, device='cuda:0', grad_fn=<MeanBackward0>)
0.004377431906614786 tensor(1137.2689, device='cuda:0', grad_fn=<MeanBackward0>)
0.0009727626459143969 tensor(1026.2563, device='cuda:0', grad_fn=<MeanBackward0>)
0.0024319066147859923 tensor(1125.9775, device='cuda:0', grad_fn=<MeanBackward0>)
0.0009727626459143969 tensor(988.5848, device='cuda:0', grad_fn=<MeanBackward0>)
0.0 tensor(857.2982, device='cuda:0', grad_fn=<MeanBackward0>)
0.0 tensor(813.2253, device='cuda:0', grad_fn=<MeanBackward0>)
0.0014591439688715954 tensor(804.8226, device='cuda:0', grad_fn=<MeanBackwa

In [30]:
#test script
n_train_strategies = pp.n_strategies #store how many strats in train set
c_k = torch.zeros((n_train_strategies,dim_z)) 
embeddings = enc(feats) #embed training points
for ii in range(n_train_strategies): #compute train centroids
    inds = enc_dict[ii]
    c_k[ii,:] = torch.mean(embeddings[inds,:],axis=0).cuda()

#compute strategy dictionary for all problems
pp.training_batch_percentage = 1.
pp.construct_strategies()
strat_lookup = {}
for k, v in pp.strategy_dict.items():
    strat_lookup[v[0]] = v[1:]

#setup for test
test_feats = torch.from_numpy(pp.features[int(0.9*pp.n_probs):,:]).float().cuda()
test_enc = enc(test_feats).cuda()
test_dists = torch.cdist(test_enc,c_k.cuda()).detach().cpu().numpy()
test_start = int(0.9*pp.n_probs)
n_test = int(0.1*pp.n_probs)
ind_max = np.argsort(test_dists)[:,:pp.n_evals]
feasible = np.zeros(n_test)
costs = np.zeros(n_test)

In [31]:
prob_success = False

for ii in range(n_test):
    for jj in range(pp.n_evals):
        y_guess = strat_lookup[ind_max[ii,jj]]
        try:
            prob_success, cost, solve_time = pp.solve_mlopt_prob_with_idx(ii+test_start, y_guess)
            if prob_success:
                feasible[ii] = 1.
                costs[ii] = cost
                print('Succeded at {} with {} tries'.format(ii,jj+1))
                break
        except:
            print('mosek failed at '.format(ii))

Succeded at 2 with 2 tries
Succeded at 3 with 4 tries
Succeded at 5 with 5 tries
Succeded at 6 with 1 tries
Succeded at 8 with 1 tries
Succeded at 11 with 1 tries
Succeded at 13 with 3 tries
Succeded at 14 with 1 tries
Succeded at 17 with 6 tries
Succeded at 18 with 1 tries
Succeded at 19 with 1 tries
Succeded at 20 with 3 tries
Succeded at 23 with 3 tries
Succeded at 24 with 4 tries
Succeded at 26 with 2 tries
Succeded at 27 with 9 tries
Succeded at 28 with 5 tries
Succeded at 29 with 8 tries
Succeded at 32 with 2 tries
Succeded at 34 with 2 tries
Succeded at 35 with 9 tries
Succeded at 37 with 1 tries
Succeded at 39 with 3 tries
Succeded at 40 with 1 tries
Succeded at 41 with 3 tries
Succeded at 42 with 3 tries
Succeded at 43 with 1 tries
Succeded at 44 with 2 tries
Succeded at 45 with 2 tries
Succeded at 46 with 7 tries
Succeded at 47 with 5 tries
Succeded at 48 with 6 tries
Succeded at 49 with 5 tries
Succeded at 51 with 3 tries
Succeded at 52 with 3 tries
Succeded at 56 with 3 tri

In [None]:
global_acc = sum(sum(np.equal(ind_max,pp.labels[test_start:,0][:,None])))/(0.1*pp.n_probs)
global_acc

In [33]:
np.mean(feasible[:ii])

0.7764776477647765