In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.version

'3.6.3 (default, Mar  6 2020, 14:15:08) \n[GCC 5.4.0 20160609]'

In [4]:
import numpy as np
import cvxpy as cp

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.optim as optim
import os
import random

import sys
sys.path.insert(0, './mlopt-micp')
sys.path.insert(0, './mlopt-micp/cartpole')

import optimizer
from problem import Cartpole
from src.ae import Encoder, get_cartpole_encoder

In [5]:
def euclidean_dist(x,y):
    # x: NxD
    # y: MxD
    n = x.size(0)
    m = y.size(0)
    d = x.size(1)
    assert d == y.size(1)
    
    x = x.unsqueeze(1).expand(n, m, d)
    y = y.unsqueeze(0).expand(n, m, d)
    return torch.pow(x-y, 2).sum(2)

In [6]:
pp = Cartpole()

In [7]:
print('Total number of classes: {}'.format(pp.n_strategies))
print('Length of feature vector: {}'.format(pp.n_features))

Total number of classes: 581
Length of feature vector: 13


tensor([[ 0.1533,  0.0948,  0.0562,  0.1049],
        [ 0.1557,  0.0248, -0.0311,  0.1343]], device='cuda:0',
       grad_fn=<AddmmBackward>)

In [71]:
dim_in, dim_z = pp.n_features, 4

enc = get_cartpole_encoder(dim_in, dim_z).cuda()
enc(torch.from_numpy(pp.features[:2]).float().cuda())

# training parameters
TRAINING_ITERATIONS = int(5000)
BATCH_SIZE = int(64)
CHECKPOINT_AFTER = int(1250)
SAVEPOINT_AFTER = int(2500)

rand_idx = list(np.arange(0, pp.n_strategies-1))

indices = [rand_idx[ii * BATCH_SIZE:(ii + 1) * BATCH_SIZE] for ii in range((len(rand_idx) + BATCH_SIZE - 1) // BATCH_SIZE)]
random.shuffle(indices)

enc_dict = {}
str_dict = {}
for ii in range(len(pp.features)):
    str_idx = int(pp.labels[ii,0])
    str_dict[ii] = str_idx
    if str_idx in enc_dict.keys():
        enc_dict[str_idx] += [ii]
    else:
        enc_dict[str_idx] = [ii]
        
feats = torch.from_numpy(pp.features).float().cuda()

In [None]:
optimizer = optim.Adam(enc.parameters(),lr=3e-4)
N = pp.n_strategies # number of classes in training set
Nc = 100 # number of classes per episode
Ns = 20  # number of support examples per class
Nq = 20  # number of query examples per class
training_iters = 10000
for tt in range(training_iters):
    optimizer.zero_grad()

    #sample classes for this iter
    V = np.random.randint(0, pp.n_strategies, Nc)
    Sk = {}  # support examples
    Qk = {}  # query examples
    ck = torch.zeros((Nc, dim_z))

    for ii, v in enumerate(V):
        if len(enc_dict[v]) <= Ns: #if not enough examples for support
            Sk[v] = enc_dict[v]
            Qk[v] = enc_dict[v]
        else:
            Sk[v] = random.sample(enc_dict[v], Ns)
            Qk[v] = [kk for kk in enc_dict[v] if kk not in Sk[v]]
            if len(Qk[v]) > Nq: #if not enough examples for query
                Qk[v] = random.sample(Qk[v], Nq)
        enc_support = enc(feats[Sk[v],:])
        ck[ii,:] = torch.mean(enc_support, axis=0).float().cuda()
        
    losses = torch.zeros(len(V))
    correct = torch.zeros(len(V))
    total = torch.zeros(len(V))
    for ii, v in enumerate(V):
        fx = enc(feats[Qk[v],:]) #current features
        dists = euclidean_dist(fx.cuda(),ck.cuda()) #compute distance between centroid & query embeds
        log_p_y = dists[:,ii] + torch.log(torch.sum(torch.exp(-dists)+1e-6, axis=1))
        losses[ii] = log_p_y.mean()
        #compute accuracy
        correct[ii] = torch.sum(torch.argmin(dists,axis=1)==ii)
        total[ii] = len(Qk[v])
        
    acc = torch.sum(correct)/torch.sum(total)
    
    if tt % 50 == 0: #print for debug
        print(acc, torch.mean(losses))
    
    torch.mean(losses).backward()
    torch.nn.utils.clip_grad_norm_(enc.parameters(), 1.)
    total_norm = 0.
    optimizer.step()

In [17]:
#dummy train script
itr = 0
for epoch in range(TRAINING_ITERATIONS):
    iter_count = 0
    for ii, idx in enumerate(indices):
        optimizer.zero_grad()
        action = torch.empty(NUM_DATA, dim_u)

        encodings_idx = enc(torch.from_numpy(pp.features[idx]).float())
        loss = compute_loss(encodings_idx, str_dict)

        loss.backward()
        optimizer.step()

        if itr % CHECKPOINT_AFTER == 0:
            print('Avg. loss: {}'.format(loss.item()))

        if itr % SAVEPOINT_AFTER == 0:
            torch.save(model.state_dict(), fn_pt_model)

        iter_count += 1
        itr += 1

NameError: name 'NUM_DATA' is not defined

In [85]:
#test script
n_train_strategies = pp.n_strategies #store how many strats in train set
c_k = torch.zeros((n_train_strategies,4)) 
embeddings = enc(feats) #embed training points
for ii in range(n_train_strategies): #compute train centroids
    inds = enc_dict[ii]
    c_k[ii,:] = torch.mean(embeddings[inds,:],axis=0).cuda()

#compute strategy dictionary for all problems
pp.training_batch_percentage = 1.
pp.construct_strategies()
strat_lookup = {}
for k, v in pp.strategy_dict.items():
    strat_lookup[v[0]] = v[1:]

#setup for test
test_feats = torch.from_numpy(pp.features[int(0.9*pp.n_probs):,:]).float().cuda()
test_enc = enc(test_feats).cuda()
test_dists = torch.cdist(test_enc,c_k.cuda()).detach().cpu().numpy()
test_start = int(0.9*pp.n_probs)
n_test = int(0.1*pp.n_probs)
ind_max = np.argsort(test_dists)[:,:pp.n_evals]
feasible = np.zeros(n_test)
costs = np.zeros(n_test)

In [None]:
prob_success = False
for ii in range(n_test):
    for jj in range(pp.n_evals):
        y_guess = strat_lookup[ind_max[ii,jj]]
        try:
            prob_success, cost, solve_time = pp.solve_mlopt_prob_with_idx(ii+test_start, y_guess)
            print(ii,jj)
            if prob_success:
                feasible[ii] = 1.
                costs[ii] = cost
                break
        except:
            print('mosek failed')

0 0
1 0
1 1
1 2
1 3
2 0
3 0
3 1
3 2
3 3
3 4
3 5
3 6
3 7
3 8
3 9
4 0
4 1
4 2
4 3
5 0
5 1
5 2
5 3
5 4
6 0
7 0
8 0
9 0
9 1
9 2
9 3
9 4
9 5
10 0
10 1
10 2
10 3
10 4
11 0
12 0
12 1
12 2
13 0
13 1
14 0
14 1
15 0
16 0
16 1
16 2
17 0
17 1
17 2
17 3
18 0
18 1
18 2
18 3
18 4
18 5
18 6
18 7
18 8
18 9
19 0
20 0
20 1
20 2
20 3
20 4
20 5
20 6
20 7
20 8
20 9
21 0
21 1
21 2
21 3
21 4
21 5
22 0
22 1
22 2
22 3
23 0
23 1
23 2
23 3
23 4
23 5
23 6
23 7
23 8
23 9
24 0
24 1
25 0
25 1
25 2
25 3
25 4
25 5
25 6
25 7
25 8
25 9
26 0
27 0
27 1
27 2
27 3
28 0
28 1
29 0
29 1
29 2
29 3
29 4
29 5
30 0
30 1
30 2
30 3
30 4
30 5
30 6
30 7
30 8
30 9
31 0
32 0
33 0
34 0
35 0
35 1
35 2
35 3
35 4
35 5
35 6
35 7
36 0
36 1
36 2
37 0
38 0
38 1
39 0
40 0
40 1
40 2
40 3
40 4
41 0
41 1
41 2
41 3
41 4
41 5
41 6
41 7
41 8
41 9
42 0
42 1
42 2
42 3
42 4
42 5
42 6
42 7
42 8
43 0
44 0
44 1
45 0
46 0
46 1
47 0
47 1
48 0
49 0
49 1
49 2
49 3
49 4
49 5
49 6
49 7
49 8
50 0
50 1
51 0
51 1
51 2
51 3
51 4
51 5
51 6
52 0
53 0
53 1
53 2
53 3
53 4

Exception ignored in: <bound method IntVector2D.<lambda> of <cvxpy.cvxcore.python.cvxcore.IntVector2D; proxy of <Swig Object of type 'std::vector< std::vector< int,std::allocator< int > > > *' at 0x7f1c3c3c6390> >>
Traceback (most recent call last):
  File "/home/pculbertson/proto/lib/python3.6/site-packages/cvxpy/cvxcore/python/cvxcore.py", line 686, in <lambda>
    __del__ = lambda self: None
KeyboardInterrupt


1394 0
1395 0
1396 0
1397 0
1398 0
1399 0
1400 0
1401 0
1402 0
1403 0
1404 0
1404 1
1404 2
1404 3
1404 4
1404 5
1404 6
1404 7


In [71]:
global_acc = sum(sum(np.equal(ind_max,pp.labels[test_start:,0][:,None])))/(0.1*pp.n_probs)
global_acc

0.7898

In [94]:
np.mean(feasible[:ii])

0.8639455782312925