In [1]:
import torch

In [2]:
torch.__version__

'0.4.0'

In [3]:
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import torch.nn.functional as F
from itertools import chain
from cub_data import *
import pandas as pd
from tqdm import tqdm
import multiprocessing

In [4]:
%matplotlib inline

In [5]:
device = torch.device('cuda')

In [6]:
def to_var(x):
#     if torch.cuda.is_available():
#         x = x.cuda()
    global device
    return x.to(device)

def idx2onehot(idx, n):

    assert idx.size(1) == 1
    assert torch.max(idx).data[0] < n

    onehot = torch.zeros(idx.size(0), n)
    onehot.scatter_(1, idx.data, 1)
    onehot = to_var(onehot)
    
    return onehot

In [7]:
class VAE(nn.Module):

    def __init__(self, encoder_layer_sizes, latent_size, decoder_layer_sizes, regressor_layer_sizes, attributes):

        super().__init__()

        assert type(encoder_layer_sizes) == list
        assert type(latent_size) == int
        assert type(decoder_layer_sizes) == list

        self.latent_size = latent_size
        self.num_labels = attributes.shape[0]
        self.attribute_size = attributes.shape[1]
        self.embeddings = nn.Embedding.from_pretrained(torch.FloatTensor(attributes))

        
        self.encoder = Encoder(encoder_layer_sizes, latent_size, self.num_labels)
        self.decoder = Decoder(decoder_layer_sizes, latent_size, self.attribute_size, self.embeddings)
        self.regressor = Regressor(regressor_layer_sizes, attributes)
        
        """
        Grouping the model's parameters: separating encoder, decoder, and discriminator
        """
        self.encoder_params = chain(
            self.encoder.parameters()
        )

        self.decoder_params = chain(
            self.decoder.parameters()
        )

        self.vae_params = chain(
            self.encoder_params, self.decoder_params
        )
        self.vae_params = filter(lambda p: p.requires_grad, self.vae_params)

        self.regressor_params = filter(lambda p: p.requires_grad, self.regressor.parameters())
        
    def sample_z(self, mu, logvar):
        """
        Reparameterization trick: z = mu + std*eps; eps ~ N(0, I)
        """
        batch_size = mu.size(0)
        eps = to_var(torch.randn([batch_size, self.latent_size]))
        return mu + torch.exp(logvar/2) * eps

    def sample_z_prior(self, bsize):
        """
        Sample z ~ p(z) = N(0, I)
        """
        z = to_var(torch.randn(bsize, self.latent_size))
        return z

    def sample_c_prior(self, bsize):
        """
        Sample c ~ p(c) = Cat([0.5, 0.5])
        """
        c = to_var(
            torch.LongTensor(np.random.randint(0, self.num_labels, (bsize,1)))
        )
        return c

    def forward(self, x, c=None):
        """
        Params:
        -------
        c: whether to sample `c` from prior or use what is provided.
        Returns:
        --------
        recon_loss: reconstruction loss of VAE.
        kl_loss: KL-div loss of VAE.
        """
        
        batch_size = x.size(0)

        means, log_var = self.encoder(x)

        z = self.sample_z(means, log_var)

        if c is None:
            c = self.sample_c_prior(batch_size)
        
        recon_x = self.decoder(z, c)
        
        recon_loss = F.mse_loss(recon_x, x, size_average=True)
        kl_loss = torch.mean(0.5 * torch.sum(torch.exp(log_var) + means**2 - 1 - log_var, 1))

        return recon_loss, kl_loss

    def synthesize_examples(self, n=1):

        batch_size = n
        z = self.sample_z_prior(batch_size)
        c = self.sample_c_prior(batch_size)
        
        recon_x = self.decoder(z, c)

        return recon_x



class Encoder(nn.Module):

    def __init__(self, layer_sizes, latent_size, num_labels):

        super().__init__()

        self.MLP = nn.Sequential()

        for i, (in_size, out_size) in enumerate( zip(layer_sizes[:-1], layer_sizes[1:]) ):
            self.MLP.add_module(name="L%i"%(i), module=nn.Linear(in_size, out_size))
            self.MLP.add_module(name="A%i"%(i), module=nn.ReLU())
#             self.MLP.add_module(name="BN%i"%(i), module=nn.BatchNorm1d(out_size))


        self.linear_means = nn.Linear(layer_sizes[-1], latent_size)
        self.linear_var = nn.Linear(layer_sizes[-1], latent_size)

    def forward(self, x):

        x = self.MLP(x)

        means = self.linear_means(x)
        log_vars = torch.log(F.softplus(self.linear_var(x)))
#         log_vars = self.linear_var(x)

        return means, log_vars


class Decoder(nn.Module):

    def __init__(self, layer_sizes, latent_size, attribute_size, embeddings):

        super().__init__()
    
        self.embeddings = embeddings
        self.MLP1 = nn.Sequential()
        self.MLP2 = nn.Sequential()

        input_size = latent_size + attribute_size

        for i, (in_size, out_size) in enumerate( zip([input_size]+layer_sizes[:-1], layer_sizes)):
            self.MLP1.add_module(name="L%i"%(i), module=nn.Linear(in_size, out_size))
            if i+1 < len(layer_sizes):
                self.MLP1.add_module(name="A%i"%(i), module=nn.ReLU())
#                 self.MLP.add_module(name="BN%i"%(i), module=nn.BatchNorm1d(out_size))
            else:
                pass

        for i, (in_size, out_size) in enumerate( zip([input_size]+layer_sizes[:-1], layer_sizes)):
            self.MLP2.add_module(name="L%i"%(i), module=nn.Linear(in_size, out_size))
            if i+1 < len(layer_sizes):
                self.MLP2.add_module(name="A%i"%(i), module=nn.ReLU())
#                 self.MLP.add_module(name="BN%i"%(i), module=nn.BatchNorm1d(out_size))
            else:
                pass
        
    def forward(self, z, c):
        
        a = self.embeddings(c.view(-1))
        z = torch.cat((z, a), dim=-1)

        x1 = self.MLP1(z)
        x2 = self.MLP2(z)

        return x1 + x2
    
class Regressor(nn.Module):

    def __init__(self, layer_sizes, attributes):

        super().__init__()

        self.num_labels = attributes.shape[0]
        self.attribute_size = attributes.shape[1]
        self.attributes = torch.FloatTensor(attributes)
        self.attributes = self.attributes.view((1, self.num_labels, self.attribute_size))
        
        self.MLP = nn.Sequential()
        i=0
        for i, (in_size, out_size) in enumerate( zip(layer_sizes[:-1], layer_sizes[1:]) ):
            self.MLP.add_module(name="L%i"%(i), module=nn.Linear(in_size, out_size))
            self.MLP.add_module(name="A%i"%(i), module=nn.ReLU())
#             self.MLP.add_module(name="BN%i"%(i), module=nn.BatchNorm1d(out_size))

        self.MLP.add_module(name="L%i"%(i+1), module=nn.Linear(layer_sizes[-1], self.attribute_size))

    def forward(self, x):

        a = self.MLP(x)
        # Reshape predicted attribute because broadcasting is not supported
        # a has shape               batch_size x   1 x 312
        # attributes have shape:             1 x 200 x 312
        a = a.view((-1, 1, self.attribute_size))
        # logits of shape:          batch_size x 200
        logits = nn.CosineSimilarity(dim=2, eps=1e-6)(a, self.attributes)
        # log of predictions shape: batch_size x 200
        c_hat = F.log_softmax(logits, dim=-1)
        return c_hat

In [8]:
data = CUB_data('xlsa17/data/CUB/att_splits.mat', 'xlsa17/data/CUB/res101.mat')

In [9]:
x_train, c_train = data.sets['trainval_X'], data.sets['trainval_a']

In [10]:
x_val, c_val = data.sets['val_X'], data.sets['val_a']

In [11]:
test_seen_x, test_seen_c = data.sets['test_seen_X'], data.sets['test_seen_a']

In [12]:
test_unseen_x, test_unseen_c = data.sets['test_unseen_X'], data.sets['test_unseen_a']

In [13]:
model = VAE([2048, 512, 512], 78, [512, 2048], [2048, 512], data.class_attributes).to(device)

In [14]:
model.load_state_dict(torch.load('weights/vae_relu_wu3_40_40_40.pt'))

In [15]:
model.eval()

VAE(
  (embeddings): Embedding(200, 312)
  (encoder): Encoder(
    (MLP): Sequential(
      (L0): Linear(in_features=2048, out_features=512, bias=True)
      (A0): ReLU()
      (L1): Linear(in_features=512, out_features=512, bias=True)
      (A1): ReLU()
    )
    (linear_means): Linear(in_features=512, out_features=78, bias=True)
    (linear_var): Linear(in_features=512, out_features=78, bias=True)
  )
  (decoder): Decoder(
    (embeddings): Embedding(200, 312)
    (MLP1): Sequential(
      (L0): Linear(in_features=390, out_features=512, bias=True)
      (A0): ReLU()
      (L1): Linear(in_features=512, out_features=2048, bias=True)
    )
    (MLP2): Sequential(
      (L0): Linear(in_features=390, out_features=512, bias=True)
      (A0): ReLU()
      (L1): Linear(in_features=512, out_features=2048, bias=True)
    )
  )
  (regressor): Regressor(
    (MLP): Sequential(
      (L0): Linear(in_features=2048, out_features=512, bias=True)
      (A0): ReLU()
      (L1): Linear(in_features=512,

In [16]:
def augment():
    X_syn_train = []
    c_syn_train = []
    for label in range(200):
        batch_size = 100
        label_array = np.zeros((batch_size, 1), dtype='int32') + label
        
        real_idx = np.where(c_train.reshape(-1)==label)[0]
        x_real = x_train[real_idx]
        batch_size -= x_real.shape[0]
        
        z = model.sample_z_prior(batch_size)
        c = to_var(torch.LongTensor(label_array[:batch_size]))
        x = model.decoder.forward(z, c)
        if device.type == 'cpu':
            X_syn_train.append(x.data.numpy())
        else:
            X_syn_train.append(x.data.cpu().numpy())
        X_syn_train.append(x_real)
        c_syn_train.append(label_array)
    return np.concatenate(X_syn_train, axis=0), np.concatenate(c_syn_train, axis=0)

def synthesize(bs):
    X_syn_train = []
    c_syn_train = []
    for label in range(200):
        batch_size = bs
        label_array = np.zeros((batch_size, 1), dtype='int32') + label
        
        z = model.sample_z_prior(batch_size)
        c = to_var(torch.LongTensor(label_array))
        x = model.decoder.forward(z, c)
        if device.type == 'cpu':
            X_syn_train.append(x.data.numpy())
        else:
            X_syn_train.append(x.data.cpu().numpy())
        c_syn_train.append(label_array)
    return np.concatenate(X_syn_train, axis=0), np.concatenate(c_syn_train, axis=0)

def synthesize_uns(bs):
    new = {}
    for x,y in zip(test_unseen_x,test_unseen_c):
        try:
            new[int(y[0])].append(x)
        except:
            new[int(y[0])] = [x]
    un_cls = list(new.keys())

    X_syn_train = []
    c_syn_train = []
    for label in un_cls:
        batch_size = bs
        label_array = np.zeros((batch_size, 1), dtype='int32') + label
        
        z = model.sample_z_prior(batch_size)
        c = to_var(torch.LongTensor(label_array))
        x = model.decoder.forward(z, c)
        if device.type == 'cpu':
            X_syn_train.append(x.data.numpy())
        else:
            X_syn_train.append(x.data.cpu().numpy())
        c_syn_train.append(label_array)
    return np.concatenate(X_syn_train, axis=0), np.concatenate(c_syn_train, axis=0)

In [17]:
x, c = synthesize(100)

In [18]:
x.shape

(20000, 2048)

In [19]:
c.shape

(20000, 1)

In [None]:
cls_weights = {i:1.0 for i in seen_cls}

In [None]:
for i in range(200):
    if i not in seen_cls:
        cls_weights[i] = 0.2

In [19]:
from sklearn.svm import LinearSVC
from sklearn.externals import joblib

In [20]:
from sklearn.neural_network import MLPClassifier

In [21]:
# del nnc
nnc = MLPClassifier(hidden_layer_sizes=512, learning_rate='adaptive', early_stopping=True, verbose=True)

In [22]:
%%time
nnc.fit(x, c.reshape(-1))

Iteration 1, loss = 3.99299838
Validation score: 0.220500
Iteration 2, loss = 2.69652770
Validation score: 0.327500
Iteration 3, loss = 2.21362648
Validation score: 0.412500
Iteration 4, loss = 1.85767722
Validation score: 0.470500
Iteration 5, loss = 1.57754455
Validation score: 0.511000
Iteration 6, loss = 1.34994003
Validation score: 0.546000
Iteration 7, loss = 1.15472538
Validation score: 0.586500
Iteration 8, loss = 1.01374667
Validation score: 0.624500
Iteration 9, loss = 0.88019628
Validation score: 0.639000
Iteration 10, loss = 0.77421853
Validation score: 0.674000
Iteration 11, loss = 0.69601373
Validation score: 0.685000
Iteration 12, loss = 0.62783942
Validation score: 0.698000
Iteration 13, loss = 0.56067752
Validation score: 0.723500
Iteration 14, loss = 0.50965769
Validation score: 0.724000
Iteration 15, loss = 0.46895467
Validation score: 0.722000
Iteration 16, loss = 0.42861956
Validation score: 0.740500
Iteration 17, loss = 0.39184919
Validation score: 0.742500
Iterat

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=512, learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=True, warm_start=False)

In [23]:
print(nnc.score(x_train, c_train.reshape(-1)), 'x train accuracy \n', \
      nnc.score(test_seen_x, test_seen_c.reshape(-1)), 'test seen accuracy \n', \
      nnc.score(test_unseen_x, test_unseen_c.reshape(-1)), 'test unseen accuracy \n')

0.5286949128524869 x train accuracy 
 0.41043083900226757 test seen accuracy 
 0.18233906302662622 test unseen accuracy 



In [34]:
print(nnc.score(x_train, c_train.reshape(-1)), 'x train accuracy \n', \
      nnc.score(test_seen_x, test_seen_c.reshape(-1)), 'test seen accuracy \n', \
      nnc.score(test_unseen_x, test_unseen_c.reshape(-1)), 'test unseen accuracy \n')

0.0 x train accuracy 
 0.0 test seen accuracy 
 0.40613414223120997 test unseen accuracy 



In [24]:
print(nnc.score(x_train, c_train.reshape(-1)), 'x train accuracy \n', \
      nnc.score(test_seen_x, test_seen_c.reshape(-1)), 'test seen accuracy \n', \
      nnc.score(test_unseen_x, test_unseen_c.reshape(-1)), 'test unseen accuracy \n')

0.0 x train accuracy 
 0.0 test seen accuracy 
 0.47960903269295585 test unseen accuracy 



In [37]:
print(nnc.score(x_train, c_train.reshape(-1)), 'x train accuracy \n', \
      nnc.score(test_seen_x, test_seen_c.reshape(-1)), 'test seen accuracy \n', \
      nnc.score(test_unseen_x, test_unseen_c.reshape(-1)), 'test unseen accuracy \n')

0.401161966841434 x train accuracy 
 0.35090702947845803 test seen accuracy 
 0.20896528479946072 test unseen accuracy 



In [None]:
del svm

In [15]:
svm = LinearSVC(dual=False, C=1, max_iter=20)

In [16]:
%%time
svm.fit(x, c.reshape(-1))

CPU times: user 52min 42s, sys: 14.3 s, total: 52min 56s
Wall time: 52min 56s


LinearSVC(C=1, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=20,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [17]:
svm.score(x, c.reshape(-1))

0.00555

In [18]:
svm.score(x_train, c_train.reshape(-1))

0.0042510982003684285

In [20]:
svm.score(x_val, c_val.reshape(-1))

0.002036659877800407

In [22]:
svm.score(test_seen_x, test_seen_c.reshape(-1))

0.002834467120181406

In [24]:
a = svm.score(test_unseen_x, test_unseen_c.reshape(-1))

In [25]:
a

0.0003370407819346141

In [46]:
n = len(un_cls)
avg = 0
for label in un_cls:
    idx = np.where(test_unseen_c.reshape(-1) == label)[0]
    avg += nnc.score(test_unseen_x[idx], test_unseen_c[idx])
print(avg/n)

0.20805838344247857


In [None]:
joblib.dump(svm, 'svc_vae_relu_l1_7_1_10.pkl')

In [None]:
svm = joblib.load('svc_vae_relu_l1_4_2_10.pkl')