In [65]:
import torch
import torchvision
from torchvision import transforms, datasets
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import math
import h5py
import pickle

### Define ResNet

In [34]:
## Basic ResNet model

def init_layer(L):
    # Initialization using fan-in
    if isinstance(L, nn.Conv2d):
        n = L.kernel_size[0]*L.kernel_size[1]*L.out_channels
        L.weight.data.normal_(0,math.sqrt(2.0/float(n)))
    elif isinstance(L, nn.BatchNorm2d):
        L.weight.data.fill_(1)
        L.bias.data.fill_(0)




# Simple ResNet Block
class SimpleBlock(nn.Module):
    def __init__(self, indim, outdim, half_res):
        super(SimpleBlock, self).__init__()
        self.indim = indim
        self.outdim = outdim
        self.C1 = nn.Conv2d(indim, outdim, kernel_size=3, stride=2 if half_res else 1, padding=1, bias=False)
        self.relu1 = nn.ReLU(inplace=True)
        self.relu2 = nn.ReLU(inplace=True)
        self.BN1 = nn.BatchNorm2d(outdim)
        self.C2 = nn.Conv2d(outdim, outdim,kernel_size=3, padding=1,bias=False)
        self.BN2 = nn.BatchNorm2d(outdim)

        self.parametrized_layers = [self.C1, self.C2, self.BN1, self.BN2]

        self.half_res = half_res

        # if the input number of channels is not equal to the output, then need a 1x1 convolution
        if indim!=outdim:
            self.shortcut = nn.Conv2d(indim, outdim, 1, 2 if half_res else 1, bias=False)
            self.parametrized_layers.append(self.shortcut)
            self.BNshortcut = nn.BatchNorm2d(outdim)
            self.parametrized_layers.append(self.BNshortcut)
            self.shortcut_type = '1x1'
        else:
            self.shortcut_type = 'identity'

        for layer in self.parametrized_layers:
            init_layer(layer)

    def forward(self, x):
        out = self.C1(x)
        out = self.BN1(out)
        out = self.relu1(out)
        out = self.C2(out)
        out = self.BN2(out)
        short_out = x if self.shortcut_type == 'identity' else self.BNshortcut(self.shortcut(x))
        out = out + short_out
        out = self.relu2(out)
        return out



# Bottleneck block
class BottleneckBlock(nn.Module):
    def __init__(self, indim, outdim, half_res):
        super(BottleneckBlock, self).__init__()
        bottleneckdim = int(outdim/4)
        self.indim = indim
        self.outdim = outdim
        self.C1 = nn.Conv2d(indim, bottleneckdim, kernel_size=1,  bias=False)
        self.relu = nn.ReLU()
        self.BN1 = nn.BatchNorm2d(bottleneckdim)
        self.C2 = nn.Conv2d(bottleneckdim, bottleneckdim, kernel_size=3, stride=2 if half_res else 1,padding=1)
        self.BN2 = nn.BatchNorm2d(bottleneckdim)
        self.C3 = nn.Conv2d(bottleneckdim, outdim, kernel_size=1, bias=False)
        self.BN3 = nn.BatchNorm2d(outdim)

        self.parametrized_layers = [self.C1, self.BN1, self.C2, self.BN2, self.C3, self.BN3]
        self.half_res = half_res


        # if the input number of channels is not equal to the output, then need a 1x1 convolution
        if indim!=outdim:
            self.shortcut = nn.Conv2d(indim, outdim, 1, stride=2 if half_res else 1, bias=False)
            self.parametrized_layers.append(self.shortcut)
            self.shortcut_type = '1x1'
        else:
            self.shortcut_type = 'identity'

        for layer in self.parametrized_layers:
            init_layer(layer)


    def forward(self, x):

        short_out = x if self.shortcut_type == 'identity' else self.shortcut(x)
        out = self.C1(x)
        out = self.BN1(out)
        out = self.relu(out)
        out = self.C2(out)
        out = self.BN2(out)
        out = self.relu(out)
        out = self.C3(out)
        out = self.BN3(out)
        out = out + short_out

        out = self.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self,block,list_of_num_layers, list_of_out_dims, num_classes=1000, only_trunk=False ):
        # list_of_num_layers specifies number of layers in each stage
        # list_of_out_dims specifies number of output channel for each stage
        super(ResNet,self).__init__()
        self.grads = []
        self.fmaps = []
        assert len(list_of_num_layers)==4, 'Can have only four stages'
        conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                                               bias=False)
        bn1 = nn.BatchNorm2d(64)
        relu = nn.ReLU()
        pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        init_layer(conv1)
        init_layer(bn1)


        trunk = [conv1, bn1, relu, pool1]
        indim = 64
        for i in range(4):

            for j in range(list_of_num_layers[i]):
                half_res = (i>=1) and (j==0)
                B = block(indim, list_of_out_dims[i], half_res)
                trunk.append(B)
                indim = list_of_out_dims[i]



        self.only_trunk=only_trunk
        if not only_trunk:
            avgpool = nn.AvgPool2d(7)
            trunk.append(avgpool)

        self.trunk = nn.Sequential(*trunk)
        self.final_feat_dim = indim
        if not only_trunk:
            self.classifier = nn.Linear(indim, num_classes)
            self.classifier.bias.data.fill_(0)

    def forward(self, x):
        out = self.trunk(x)
        if self.only_trunk:
            return out
        out = out.view(out.size(0),-1)
        scores = self.classifier(out)
        return scores, out


def ResNet10(num_classes=1000, only_trunk=False):
    return ResNet(SimpleBlock, [1,1,1,1],[64,128,256,512], num_classes, only_trunk)

In [35]:
data_transform = transforms.Compose([
        transforms.Resize((300,300)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
train_dataset = datasets.ImageFolder(root='../aligned-data/train',
                                           transform=data_transform)
train_dataset_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=8, shuffle=True, 
                                            num_workers=4)
test_dataset = datasets.ImageFolder(root='../aligned-data/test',
                                           transform=data_transform)
test_dataset_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=90, shuffle=True,
                                             num_workers=4)
test_x, test_y = next(iter(test_dataset_loader))
test_x, test_y = Variable(test_x), Variable(test_y)

In [36]:
net = ResNet10(9)

In [37]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)

In [40]:
def calc_accuracy(mdl, X, Y):
    # TODO: why can't we call .data.numpy() for train_acc as a whole?
    outputs, _ = mdl(X)
    max_vals, max_indices = torch.max(outputs,1)
    train_acc = (max_indices == Y).sum().data.numpy()/max_indices.size()[0]
    return train_acc

### Train ResNet

In [41]:
for epoch in range(10):  # loop over the dataset multiple times
    acc = 0
    running_loss = 0.0
    test_acc = 0
    train_dataset_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=20, shuffle=True,
                                             num_workers=4)
    
    for i, data in enumerate(train_dataset_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs, _ = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        acc += calc_accuracy(net, inputs, labels)
        # print statistics
        running_loss += loss.data[0]
        #if i % 5 == 4:    # print every 2000 mini-batches
            #print('[%d, %5d] loss: %.3f' %
            #      (epoch + 1, i + 1, running_loss / 5))
        #    running_loss = 0.0
        
    # Compute test accuracy
    test_acc += calc_accuracy(net, test_x, test_y)

    print('Loss: ', running_loss)
    print('Training accuracy: ', acc/(len(train_dataset_loader)))
    print('Test accuracy: ', test_acc)

print('Finished Training')

Loss:  46.823426485061646
Training accuracy:  [0.34347826]
Test accuracy:  [0.35555556]
Loss:  37.87575137615204
Training accuracy:  [0.46304348]
Test accuracy:  [0.44444444]
Loss:  30.224955201148987
Training accuracy:  [0.61304348]
Test accuracy:  [0.52222222]
Loss:  22.2606044113636
Training accuracy:  [0.72826087]
Test accuracy:  [0.61111111]
Loss:  17.46561226248741
Training accuracy:  [0.81521739]
Test accuracy:  [0.68888889]
Loss:  11.944070979952812
Training accuracy:  [0.88043478]
Test accuracy:  [0.72222222]
Loss:  9.013656049966812
Training accuracy:  [0.94130435]
Test accuracy:  [0.78888889]
Loss:  7.262864649295807
Training accuracy:  [0.95869565]
Test accuracy:  [0.75555556]
Loss:  5.758986331522465
Training accuracy:  [0.96956522]
Test accuracy:  [0.8]
Loss:  5.11051269993186
Training accuracy:  [0.96956522]
Test accuracy:  [0.76666667]
Finished Training


### Save features

In [59]:
def save_features(model, data_loader, outfile ):

    f = h5py.File(outfile, 'w')
    max_count = len(data_loader)*data_loader.batch_size
    all_labels = f.create_dataset('all_labels',(max_count,), dtype='i')
    all_feats=None
    count=0
    for i, (x,y) in enumerate(data_loader):
        if i%10 == 0:
            print('{:d}/{:d}'.format(i, len(data_loader)))
        x_var = Variable(x)
        scores, feats = model(x_var)
        if all_feats is None:
            all_feats = f.create_dataset('all_feats', (max_count, feats.size(1)), dtype='f')
        all_feats[count:count+feats.size(0),:] = feats.data.cpu().numpy()
        all_labels[count:count+feats.size(0)] = y.cpu().numpy()
        print(all_labels[count:count+feats.size(0)])
        count = count + feats.size(0)

    print(all_labels)
    count_var = f.create_dataset('count', (1,), dtype='i')
    count_var[0] = count

    f.close()

In [61]:
train_dataset = datasets.ImageFolder(root='../novel-data/train',
                                           transform=data_transform)
train_dataset_loader = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=20, shuffle=True,
                                             num_workers=4)
save_features(net, train_dataset_loader, 'resnet_features.hdf5')

0/35
[ 1  0  3  4  7  1 13  9 11  7  9  7  7 11  5  3  4  7  3  8]
[11  1  1  8 11  2 10  0  9  9  8 11  7  5  9  9 11  3  3  3]
[ 1 11 12 10  6 12  9  0  7  0  9 13  6 11  2  9  0  9 10  3]
[ 2  6  5 12  3  7  4  2  0  9  6  8  0 13  5  9 13  2 13  8]
[ 7 13 12  9 10  4 13  8  9  4  9  9  6 11  2  2  1  8  7 12]
[ 2  1  6  2 10  1 10 13  2 12  3 10  3  9  8 13 13  5  1  7]
[ 6  1  4  8  8 10  9  2  4  4  1  8 11  0 11 12  7  4  5 13]
[10  1  3  9 10 11  1 11 10  5  4 11  4  5 11 13  0  6 13  3]
[11 13 11 10  1  9  4 13  2 13 10  2 13  7  8 13  6 13  9  4]
[10 11  2 11  9  7 10  1  7  6  5  0 12 12  0  7 10  6  7  8]
10/35
[ 5  0  5 10  3 12  0  6  4 13  8  5  4 13 11  2  7 10  5  9]
[ 0  1  8 12 10  7 12  3  9  8  6  5 11 12 13  9  3  4 12  4]
[13  9  8  8  8  6  6  7 13 10  3  0  1 11  6 12  6  0  6 12]
[ 2 12  6  8  9  3  6 13  7  9 11 11  1  5  2 11 13  8  0  7]
[10  2  7  1  0  4 10  8 11  6  5  8  1  8  3 10  1 10  4  8]
[ 6  6  8  4  6  2  7 10  7 13 13  8  4  8  3  0  5  0 13 1

### K Means

In [62]:
def kmeans(x, k, niter=1, batchsize=1000):
    batchsize = min(batchsize, x.shape[0])

    nsamples = x.shape[0]
    ndims = x.shape[1]

    x2 = np.sum(x**2, axis=1)
    centroids = np.random.randn(k, ndims)
    centroidnorm = np.sqrt(np.sum(centroids**2, axis=1, keepdims=True))
    centroids = centroids / centroidnorm
    totalcounts = np.zeros(k)

    for i in range(niter):
        c2 = np.sum(centroids**2, axis=1,keepdims=True)*0.5
        summation = np.zeros((k, ndims))
        counts = np.zeros(k)
        loss = 0

        for j in range(0, nsamples, batchsize):
            lastj = min(j+batchsize, nsamples)
            batch = x[j:lastj]
            m = batch.shape[0]

            tmp = np.dot(centroids, batch.T)
            tmp = tmp - c2
            val = np.max(tmp,0)
            labels = np.argmax(tmp,0)
            loss = loss + np.sum(np.sum(x2[j:lastj])*0.5 - val)

            S = np.zeros((k, m))
            S[labels, np.arange(m)] = 1
            summation = summation + np.dot(S, batch)
            counts = counts + np.sum(S, axis=1)

        for j in range(k):
            if counts[j]>0:
                centroids[j] = summation[j] / counts[j]

        totalcounts = totalcounts + counts
        for j in range(k):
            if totalcounts[j] == 0:
                idx = np.random.choice(nsamples)
                centroids[j] = x[idx]



    return centroids

### Cluster features

In [63]:
def cluster_feats(filehandle, base_classes, cachefile, n_clusters=5):
    if os.path.isfile(cachefile):
        with open(cachefile, 'rb') as f:
            centroids = pickle.load(f)
    else:
        centroids = []
        all_labels = filehandle['all_labels'][...]
        all_feats = filehandle['all_feats']

        count = filehandle['count'][0]
        for j, i in enumerate(base_classes):
            print('Clustering class {:d}:{:d}'.format(j,i))
            idx = np.where(all_labels==i)[0]
            idx = idx[idx<count]
            X = all_feats[idx,:]
            # use a reimplementation of torch kmeans for reproducible results
            # TODO: Figure out why this is important
            centroids_this = kmeans(X, n_clusters, 20)
            centroids.append(centroids_this)
        with open(cachefile, 'wb') as f:
            pickle.dump(centroids, f)
    return centroids

In [104]:
base_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8]
novel_classes = [9, 10, 11, 12, 13]
with h5py.File('resnet_features.hdf5', 'r') as features_file:
    centroids = cluster_feats(features_file, base_classes, 'centroids.pkl', 5)

Clustering class 0:0
Clustering class 1:1
Clustering class 2:2
Clustering class 3:3
Clustering class 4:4
Clustering class 5:5
Clustering class 6:6
Clustering class 7:7
Clustering class 8:8


### Mine Analogies

In [105]:
def get_difference_vectors(c_i):
    diff_i = c_i[:,np.newaxis,:] - c_i[np.newaxis,:,:]
    diff_i = diff_i.reshape((-1, diff_i.shape[2]))
    diff_i_norm = np.sqrt(np.sum(diff_i**2,axis=1, keepdims=True))
    diff_i = diff_i / (diff_i_norm + 0.00001)
    return diff_i

def mine_analogies(centroids):
    n_clusters = centroids[0].shape[0]

    analogies = np.zeros((n_clusters*n_clusters*len(centroids),4), dtype=int)
    analogy_scores = np.zeros(analogies.shape[0])
    start=0

    I, J = np.unravel_index(np.arange(n_clusters**2), (n_clusters, n_clusters))
    # for every class
    for i, c_i in enumerate(centroids):

        # get normalized difference vectors between cluster centers
        diff_i = get_difference_vectors(c_i)
        diff_i_t = torch.Tensor(diff_i)


        bestdots = np.zeros(diff_i.shape[0])
        bestdotidx = np.zeros((diff_i.shape[0],2),dtype=int)

        # for every other class
        for j, c_j in enumerate(centroids):
            if i==j:
                continue
            print(i,j)

            # get normalized difference vectors
            diff_j = get_difference_vectors(c_j)
            diff_j = torch.Tensor(diff_j)

            #compute cosine distance and take the maximum
            dots = diff_i_t.mm(diff_j.transpose(0,1))
            maxdots, argmaxdots = dots.max(1)
            maxdots = maxdots.cpu().numpy().reshape(-1)
            argmaxdots = argmaxdots.cpu().numpy().reshape(-1)

            # if maximum is better than best seen so far, update
            betteridx = maxdots>bestdots
            bestdots[betteridx] = maxdots[betteridx]
            bestdotidx[betteridx,0] = j*n_clusters + I[argmaxdots[betteridx]]
            bestdotidx[betteridx,1] = j*n_clusters + J[argmaxdots[betteridx]]


        # store discovered analogies
        stop = start+diff_i.shape[0]
        analogies[start : stop,0]=i*n_clusters + I
        analogies[start : stop,1]=i*n_clusters + J
        analogies[start : stop,2:] = bestdotidx
        analogy_scores[start : stop] = bestdots
        start = stop

    #prune away trivial analogies
    good_analogies = (analogy_scores>0) & (analogies[:,0]!=analogies[:,1]) & (analogies[:,2]!=analogies[:,3])
    return analogies[good_analogies,:], analogy_scores[good_analogies]

In [106]:
analogies, analogy_scores = mine_analogies(centroids)
np.save('analogies.npy', analogies.astype(int))

0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
1 0
1 2
1 3
1 4
1 5
1 6
1 7
1 8
2 0
2 1
2 3
2 4
2 5
2 6
2 7
2 8
3 0
3 1
3 2
3 4
3 5
3 6
3 7
3 8
4 0
4 1
4 2
4 3
4 5
4 6
4 7
4 8
5 0
5 1
5 2
5 3
5 4
5 6
5 7
5 8
6 0
6 1
6 2
6 3
6 4
6 5
6 7
6 8
7 0
7 1
7 2
7 3
7 4
7 5
7 6
7 8
8 0
8 1
8 2
8 3
8 4
8 5
8 6
8 7


### Train classifier

In [107]:
def train_classifier(filehandle, base_classes, cachefile, networkfile, total_num_classes=14, lr=0.1, wd=0.0001, momentum=0.9, batchsize=20, niter=500):
    # either use pre-existing classifier or train one
    all_labels = filehandle['all_labels'][...]
    all_labels = all_labels.astype(int)
    all_feats = filehandle['all_feats']
    base_class_ids = np.where(np.in1d(all_labels, base_classes))[0]
    loss = nn.CrossEntropyLoss()
    model = nn.Linear(all_feats[0].size, total_num_classes)
    if os.path.isfile(cachefile):
        tmp = torch.load(cachefile)
        model.load_state_dict(tmp)
    elif os.path.isfile(networkfile):
        tmp = torch.load(networkfile)
        if 'module.classifier.bias' in tmp['state']:
            state_dict = {'weight':tmp['state']['module.classifier.weight'], 'bias':tmp['state']['module.classifier.bias']}
        else:
            model = nn.Linear(all_feats[0].size, total_num_classes, bias=False).cuda()
            state_dict = {'weight':tmp['state']['module.classifier.weight']}
        model.load_state_dict(state_dict)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=wd, dampening=0)
        for i in range(niter):
            optimizer.zero_grad()
            idx = np.sort(np.random.choice(base_class_ids, batchsize, replace=False))
            F = all_feats[idx,:]
            F = Variable(torch.Tensor(F))
            L = Variable(torch.LongTensor(all_labels[idx]))
            S = model(F)
            loss_val = loss(S, L)
            loss_val.backward()
            optimizer.step()
            if i % 100 == 0:
                print('Classifier training {:d}: {:f}'.format(i, loss_val.data[0]))
        torch.save(model.state_dict(), cachefile)
        
    return model

In [108]:
with h5py.File('resnet_features.hdf5', 'r') as features_file:
    classification_model = train_classifier(features_file, base_classes, 'classifier.pkl', 'random.npy')

Classifier training 0: 2.664518
Classifier training 100: 0.767853
Classifier training 200: 0.000521
Classifier training 300: 0.000106
Classifier training 400: 0.000510


### Analogy regressor train

In [114]:
class AnalogyRegressor(nn.Module):
    def __init__(self, featdim, innerdim=512):
        super(AnalogyRegressor,self).__init__()
        self.featdim = featdim
        self.innerdim = innerdim
        self.fc1 = nn.Linear(featdim*3, innerdim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(innerdim, innerdim)
        self.fc3 = nn.Linear(innerdim, featdim)

    def forward(self, a,c,d):
        x = torch.cat((a,c,d), dim=1)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        return out

def train_analogy_regressor(analogies, centroids, base_classes, trained_classifier, lr=0.1, wt=10, niter=5000, step_after=5000, batchsize=40, momentum=0.9, wd=0.0001):
    # pre-permute analogies
    permuted_analogies = analogies[np.random.permutation(analogies.shape[0])]

    # create model and init
    featdim = centroids[0].shape[1]
    model = AnalogyRegressor(featdim)
    model = model
    trained_classifier = trained_classifier
    optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=wd, dampening=momentum)
    loss_1 = nn.CrossEntropyLoss()
    loss_2 = nn.MSELoss()


    num_clusters_per_class = centroids[0].shape[0]
    centroid_labels = (np.array(base_classes).reshape((-1,1)) * np.ones((1, num_clusters_per_class))).reshape(-1)
    concatenated_centroids = np.concatenate(centroids, axis=0)


    start=0
    avg_loss_1 = avg_loss_2 = count = 0.0
    for i in range(niter):
        # get current batch of analogies
        stop = min(start+batchsize, permuted_analogies.shape[0])
        #print(start+batchsize, permuted_analogies.shape[0])
        to_train = permuted_analogies[start:stop,:]
        optimizer.zero_grad()

        # analogy is A:B :: C:D, goal is to predict B from A, C, D
        # Y is the class label of B (and A)
        A = concatenated_centroids[to_train[:,0]]
        B = concatenated_centroids[to_train[:,1]]
        C = concatenated_centroids[to_train[:,2]]
        D = concatenated_centroids[to_train[:,3]]
        Y = centroid_labels[to_train[:,1]]

        A = Variable(torch.Tensor(A))
        B = Variable(torch.Tensor(B))
        C = Variable(torch.Tensor(C))
        D = Variable(torch.Tensor(D))
        Y = Variable(torch.LongTensor(Y.astype(int)))

        Bhat = model(A,C,D)

        lossval_2 = loss_2(Bhat, B) # simple mean squared error loss

        # classification loss
        predicted_classprobs = trained_classifier(Bhat)
        lossval_1 = loss_1(predicted_classprobs, Y)
        loss = lossval_1 + wt * lossval_2

        loss.backward()
        optimizer.step()

        avg_loss_1 = avg_loss_1 + lossval_1.data[0]
        avg_loss_2 = avg_loss_2 + lossval_2.data[0]
        count = count+1.0


        if i % 50 == 0:
            print('{:d} : {:f}, {:f}, {:f}'.format(i, avg_loss_1/count, avg_loss_2/count, count))
            avg_loss_1 = avg_loss_2 = count = 0.0

        if (i+1) % step_after == 0:
            lr = lr / 10.0
            print(lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        start = stop
        if start==permuted_analogies.shape[0]:
            start=0

    return dict(model_state=model.state_dict(), concatenated_centroids=torch.Tensor(concatenated_centroids),
            num_base_classes=len(centroids), num_clusters_per_class=num_clusters_per_class)

In [110]:
print(analogies.shape[0])

180


In [115]:
generator = train_analogy_regressor(analogies, centroids, base_classes, classification_model, lr=0.1)

0 : 3.284535, 0.413273, 1.000000
50 : 4.267699, 0.371373, 50.000000
100 : 1.325531, 0.257671, 50.000000
150 : 0.574396, 0.189520, 50.000000
200 : 0.082800, 0.148434, 50.000000
250 : 0.019898, 0.125422, 50.000000
300 : 0.019362, 0.115872, 50.000000
350 : 0.002575, 0.109401, 50.000000
400 : 0.002543, 0.107121, 50.000000
450 : 0.002537, 0.105595, 50.000000
500 : 0.002618, 0.103619, 50.000000
550 : 0.002618, 0.101775, 50.000000
600 : 0.002523, 0.100765, 50.000000
650 : 0.002433, 0.099796, 50.000000
700 : 0.002405, 0.098065, 50.000000
750 : 0.002250, 0.096721, 50.000000
800 : 0.002110, 0.095772, 50.000000
850 : 0.001966, 0.093493, 50.000000
900 : 0.001882, 0.092033, 50.000000
950 : 0.001823, 0.090690, 50.000000
1000 : 0.001753, 0.090101, 50.000000
1050 : 0.001709, 0.089590, 50.000000
1100 : 0.001679, 0.089119, 50.000000
1150 : 0.001665, 0.088665, 50.000000
1200 : 0.001648, 0.088224, 50.000000
1250 : 0.001623, 0.087804, 50.000000
1300 : 0.001590, 0.087399, 50.000000
1350 : 0.001577, 0.087006

In [116]:
print(generator['num_base_classes'])

9
