In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.python.ops.nn_impl import _compute_sampled_logits, _sum_rows, sigmoid_cross_entropy_with_logits
from tensorflow.python.ops import nn_ops, embedding_ops

In [7]:
# ------------------------------------
# Parameters
# ------------------------------------
# (in the future can pass these in from the command line)
learning_rate = 0.01
training_epochs = 10
batch_size = 100
display_step = 1

In [9]:
# ------------------------------------
# Load data
# ------------------------------------

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data

# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
train_set_size = 55000
num_classes = 10.0

In [11]:
# ------------------------------------
# Define variables
# ------------------------------------

# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784])  # mnist data image of shape 28*28=784
y_one_hot = tf.placeholder(tf.float32, [None, 10])  # 0-9 digits recognition => 10 classes
y = tf.placeholder(tf.int64, [None, 1])  # 0-9 digits recognition => 10 classes
idx = tf.placeholder(tf.int64, [None, 1])  # data point indices

# Set model weights
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
u = tf.Variable(tf.ones([train_set_size]) * tf.log(num_classes)) # Initialize u_i = log(K)


In [30]:
# ------------------------------------
# Loss functions
# ------------------------------------

# Softmax without sampling
pred_softmax = tf.nn.softmax(tf.matmul(x, W) + b)
cost_softmax = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred_softmax),
                                             reduction_indices=1))

# Negative sampling without sampling
pred_negative_sampling = tf.nn.sigmoid(tf.matmul(x, W) + b)
cost_negative_sampling = tf.reduce_mean(-tf.reduce_sum((
    y * tf.log(pred_negative_sampling)
    + (1 - y) * tf.log(1 - pred_negative_sampling)
),
    reduction_indices=1))

# Sampled softmax = Importance sampling
cost_sampled_softmax = tf.nn.sampled_softmax_loss(weights=tf.transpose(W),
                         biases=b,
                         inputs=x,
                         labels=y_int,
                         num_sampled=5,
                         num_classes=10)

# Noise Contrastive Estimation
cost_nce = tf.nn.nce_loss(weights=tf.transpose(W),
                         biases=b,
                         inputs=x,
                         labels=y_int,
                         num_sampled=5,
                         num_classes=10)

# One vs Each
cost_ove = custom_sampled_loss(OVE)(weights=tf.transpose(W),
                         biases=b,
                         inputs=x,
                         labels=y_int,
                         num_sampled=5,
                         num_classes=10)



def debais_cost_fn(W, b, x, y):
    
    pred_softmax = tf.nn.softmax(tf.matmul(x, W) + b)
    cost_softmax = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred_softmax),
                                                 reduction_indices=1))
    def f1(): return 0.0*cost_softmax
    def f2(): return 10.0*cost_softmax
    return tf.cond(tf.less(tf.random_uniform([]), tf.constant(0.999)), f1, f2)

debais_cost = debais_cost_fn(W, b, x, y)

In [31]:
def ld_loss(weights,
                         biases,
                         datapoint_weights,
                         labels,
                         inputs,
                         idx,
                         num_sampled,
                         num_classes,
                         num_true=1,
                         sampled_values=None,
                         remove_accidental_hits=True,
                         partition_strategy="mod",
                         name="ld_loss"):

    logits, labels = _compute_sampled_logits(
      weights=weights,
      biases=biases,
      labels=labels,
      inputs=inputs,
      num_sampled=num_sampled,
      num_classes=num_classes,
      num_true=num_true,
      sampled_values=sampled_values,
      subtract_log_q=False,
      remove_accidental_hits=remove_accidental_hits,
      partition_strategy=partition_strategy)
    
    
    sampled_dp_weight = tf.transpose(embedding_ops.embedding_lookup(
        datapoint_weights, idx, partition_strategy=partition_strategy))
    
    true_logit = _sum_rows(labels*logits)
    repeated_true_logit = tf.tile(tf.reshape(true_logit, [-1, 1]), [1, tf.shape(logits)[1]] )
    logit_difference = logits - repeated_true_logit
    
    # - sampled_dp_weight + tf.exp(sampled_dp_weight) * 
    # 
    # (1.0 + _sum_rows((1 - labels) * stable_logistic(logit_difference)))
    return sampled_dp_weight + tf.exp(-sampled_dp_weight) * _sum_rows((1 - labels) * stable_logistic(logit_difference))

# Learned Denominator
cost_ld = ld_loss(weights=tf.transpose(W),
                         biases=b,
                         datapoint_weights=u,
                         inputs=x,
                         idx=idx,
                         labels=y_int,
                         num_sampled=5,
                         num_classes=10)

clip_u = u.assign(tf.maximum(0., u))

In [34]:
# Minimize error using cost
cost = cost_ld
sampled_loss = (cost in {cost_nce, cost_sampled_softmax, cost_ove, cost_ld})

# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:
    # Run the initializer
    print("Initializing")
    sess.run(init)

    print("Optimization started!")
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)

            # Run optimization op (backprop) and cost op (to get loss value)
            if sampled_loss:
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                              y_int: np.argmax(batch_ys, axis=1).reshape((-1,1)),
                                                             idx: 0*np.ones(batch_xs.shape[0])[:,None]})
                sess.run(clip_u)
                c = np.mean(c)  # Average loss over the batch

            else:
                _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                              y: batch_ys})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch + 1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

    print("Optimization Finished!")

    # ------------------------------------
    # Print results
    # ------------------------------------
    # Test model
    correct_prediction = tf.equal(tf.argmax(pred_softmax, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
    
    print(u.eval())

Initializing
Optimization started!
Epoch: 0001 cost= 0.493090546
Epoch: 0002 cost= 0.293873282
Epoch: 0003 cost= 0.261314185
Epoch: 0004 cost= 0.254083936
Epoch: 0005 cost= 0.262350118
Epoch: 0006 cost= 0.257801616
Epoch: 0007 cost= 0.238646907
Epoch: 0008 cost= 0.243439019
Epoch: 0009 cost= 0.229165989
Epoch: 0010 cost= 0.235063174
Optimization Finished!
Accuracy: 0.896
[ 0.          2.30258512  2.30258512 ...,  2.30258512  2.30258512
  2.30258512]


In [6]:
def stable_logistic(x):
    """Calculates log(1+exp(x)) in a stable way.
    https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
    """
    return tf.maximum(x, 0.0) + tf.log(1.0   + tf.exp(-tf.abs(x)))

In [7]:
def OVE(labels, logits):
    true_logit = _sum_rows(labels*logits)
    repeated_true_logit = tf.tile(tf.reshape(true_logit, [-1, 1]), [1, tf.shape(logits)[1]] )
    logit_difference = logits - repeated_true_logit
    return _sum_rows((1 - labels) * stable_logistic(logit_difference))

In [8]:
def my_sigmoid_cross_entropy_with_logits(labels, logits):
    """My implementation of nn_ops.softmax_cross_entropy_with_logits
    Used to make sure I can do this right"""
    return _sum_rows(tf.maximum(logits, 0.0) - logits * labels + tf.log(1.0 + tf.exp(-abs(logits))))

In [9]:
def custom_sampled_loss(custom_loss_function):
    def loss(weights,
                             biases,
                             labels,
                             inputs,
                             num_sampled,
                             num_classes,
                             num_true=1,
                             sampled_values=None,
                             remove_accidental_hits=True,
                             partition_strategy="mod",
                             name="ove_loss"):

        logits, labels = _compute_sampled_logits(
          weights=weights,
          biases=biases,
          labels=labels,
          inputs=inputs,
          num_sampled=num_sampled,
          num_classes=num_classes,
          num_true=num_true,
          sampled_values=sampled_values,
          subtract_log_q=False,
          remove_accidental_hits=remove_accidental_hits,
          partition_strategy=partition_strategy,
          name=name)
        return custom_loss_function(labels=labels, logits=logits)

    return loss

In [1]:

# logits = tf.constant([[5.8, 3.0, 4.0],
#                       [2.0, 6.0, 1.0]])
# labels = tf.constant([[0.0, 1.0, 0.0],
#                       [1.0, 0.0, 0.0]])

# a = tf.range(30, dtype=tf.float32) + 100.0
# W = tf.zeros([784, 10])
# b = tf.zeros([10])
    
batch_xs, batch_ys = mnist.train.next_batch(1)
print(batch_xs)

# sess = tf.InteractiveSession()



# print(W.eval())
# print(b.eval())
# print(logits.eval())
# print(labels.eval())

# logits, labels = _compute_sampled_logits(
#   weights=tf.transpose(W),
#   biases=b,
#   labels=tf.convert_to_tensor(np.argmax(batch_ys, axis=1).reshape((-1,1)), dtype=tf.int64),
#   inputs=tf.convert_to_tensor(batch_xs, dtype=tf.float32),
#   num_sampled=5,
#   num_classes=10,
#  num_true=1,
#  sampled_values=None,
#  remove_accidental_hits=True,
#  partition_strategy="mod")

#     sampled_dp_weight = embedding_ops.embedding_lookup(
#         datapoint_weights, idx, partition_strategy=partition_strategy)
    
#     true_logit = _sum_rows(labels*logits)
#     repeated_true_logit = tf.tile(tf.reshape(true_logit, [-1, 1]), [1, tf.shape(logits)[1]] )
#     logit_difference = logits - repeated_true_logit
    
#     # - sampled_dp_weight + tf.exp(sampled_dp_weight) * 
#     # 
#     # (1.0 + _sum_rows((1 - labels) * stable_logistic(logit_difference)))
#     return sampled_dp_weight + _sum_rows((1 - labels) * stable_logistic(logit_difference))

# # Sampled softmax
# print(nn_ops.softmax_cross_entropy_with_logits(labels=labels, logits=logits).eval())
# print(my_sigmoid_cross_entropy_with_logits(labels, logits).eval())
# print(my_OVE(labels, logits).eval())

# print(tf.random_uniform([]).shape)
# print(tf.random_uniform([]).eval())
# print(tf.constant(0.7).shape)
# print(tf.random_uniform([1]).eval())

# print(a.eval())
# print(embedding_ops.embedding_lookup(a, 0).eval())


NameError: name 'mnist' is not defined

In [39]:
mnist.train.labels.shape

(55000, 10)

In [127]:
"""Defines how all data is to be loaded"""
import numpy as np
from sklearn.datasets import load_svmlight_file


class MNLDataset():
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.num_examples = x.shape[0]
        self.batch_index = 0

    def next_batch(self, batch_size):
        batch_indices = self.batch_index + np.arange(batch_size)
        batch_indices = np.mod(batch_indices, self.num_examples)
        self.batch_index = (self.batch_index + batch_size) % self.num_examples

        return [self.x[batch_indices, :], self.y[batch_indices, :], batch_indices[:, None]]


def loadLIBSVMdata(file_path, train_test_split):
    # Load the data
    data = load_svmlight_file(file_path, multilabel=True)

    # Separate into x and y
    # Remove data with no y value
    # and if multiple y values, take the first one
    y = data[1]
    y_not_empty = [i for i, y_val in enumerate(y) if y_val != ()]
    y = np.array([y[i][0] for i in y_not_empty])
    x = data[0].toarray()[y_not_empty, :]

    # Find point to split training and test sets
    n_samples = len(y)
    split_point = int(train_test_split * n_samples)

    # Create train and test sets
    train = MNLDataset(x[:split_point, :], y[:split_point])
    test = MNLDataset(x[split_point:, :], y[split_point:])
    return train, test


def load_data(dataset_name, train_test_split):
    if dataset_name == 'mnist':
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
        train = MNLDataset(mnist.train.images, mnist.train.labels) #[:,None]
        test = MNLDataset(mnist.test.images, mnist.test.labels) #[:,None]
    if dataset_name in {'Bibtex'}:
        file_path = '/Users/francoisfagan/Documents/UnbiasedSoftmaxData/LIBSVM/' + dataset_name + '.txt'
        train, test = loadLIBSVMdata(file_path, train_test_split)

    dim = train.x.shape[1]
    num_classes = int(max(train.y)) + 1 #train.y.shape[1]#
    num_train_points = train.x.shape[0]
    return [train, test, dim, num_classes, num_train_points]



In [132]:
minst2 = input_data.read_data_sets("/tmp/data/", one_hot=False)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [159]:
train = MNLDataset(minst2.train.images, minst2.train.labels[:,None])

In [160]:
train.y.shape

(55000, 1)

In [143]:
batch_xs, batch_ys, batch_idx = train.next_batch(5)

In [148]:
batch_idx

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [150]:
def one_hot(y, num_classes):
    return np.eye(num_classes)[y]

In [153]:
one_hot(batch_ys[:,0], 10)

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [154]:
dataset_name = 'Bibtex'
file_path = '/Users/francoisfagan/Documents/UnbiasedSoftmaxData/LIBSVM/' + dataset_name + '.txt'
data = load_svmlight_file(file_path, multilabel=True)

In [164]:
y = data[1]
y_not_empty = [i for i, y_val in enumerate(y) if y_val != ()]
y = np.array([y[i][0] for i in y_not_empty])[:, None]
x = data[0].toarray()[y_not_empty, :]

# Find point to split training and test sets
n_samples = len(y)
split_point = int(train_test_split * n_samples)

# Create train and test sets
train = MNLDataset(x[:split_point, :], y[:split_point])
test = MNLDataset(x[split_point:, :], y[split_point:])

In [166]:
train.y

array([[  48.],
       [  75.],
       [  52.],
       ..., 
       [ 131.],
       [ 119.],
       [  13.]])

In [2]:
"""Defines how all data is to be loaded"""
import numpy as np
from sklearn.datasets import load_svmlight_file


class MNLDataset():
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.num_examples = x.shape[0]
        self.batch_index = 0

    def next_batch(self, batch_size):
        batch_indices = self.batch_index + np.arange(batch_size)
        batch_indices = np.mod(batch_indices, self.num_examples)
        self.batch_index = (self.batch_index + batch_size) % self.num_examples

        return [self.x[batch_indices, :], self.y[batch_indices, :], batch_indices[:, None]]


def loadLIBSVMdata(file_path, train_test_split):
    # Load the data
    data = load_svmlight_file(file_path, multilabel=True)

    # Separate into x and y
    # Remove data with no y value
    # and if multiple y values, take the first one
    y = data[1]
    y_not_empty = [i for i, y_val in enumerate(y) if y_val != ()]
    y = np.array([int(y[i][0]) for i in y_not_empty])[:, None]
    x = data[0].toarray()[y_not_empty, :]

    # Find point to split training and test sets
    n_samples = len(y)
    split_point = int(train_test_split * n_samples)

    # Create train and test sets
    train = MNLDataset(x[:split_point, :], y[:split_point])
    test = MNLDataset(x[split_point:, :], y[split_point:])
    return train, test


def load_data(dataset_name, train_test_split):
    print('Loading data')
    if dataset_name == 'mnist':
        from tensorflow.examples.tutorials.mnist import input_data
        mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
        train = MNLDataset(mnist.train.images, mnist.train.labels[:,None]) #
        test = MNLDataset(mnist.test.images, mnist.test.labels[:,None]) #[:,None]
    if dataset_name in {'Bibtex', 'Delicious', 'Eurlex'}:
        file_path = '../UnbiasedSoftmaxData/LIBSVM/' + dataset_name + '.txt'
        train, test = loadLIBSVMdata(file_path, train_test_split)

    dim = train.x.shape[1]
    num_classes = int(max(train.y)) + 1
    num_train_points = train.x.shape[0]
    return [train, test, dim, num_classes, num_train_points]

In [3]:
train, test, dim, num_classes, num_train_points = load_data('mnist', 0.7)

Loading data
Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [80]:
batch_xs, batch_ys, batch_idx = train.next_batch(1)
xx = batch_xs.reshape((batch_xs.shape[1]))
sess = tf.InteractiveSession()
init = tf.global_variables_initializer()
sess.run(init)
i=0
y_i = train.y[i][0]
y_i_one_hot = np.eye(int(num_classes))[y_i]
x_i = train.x[i, :]
denominator_i = (1 + np.exp(-(np.dot(x_i, W[:, y_i].eval()) + b.eval()[y_i]))
                 * np.dot(1 - y_i_one_hot, np.exp(np.dot(x_i, W.eval()) + b.eval())))
difference_i = abs(np.exp(u.eval()[i]) - denominator_i)
print(difference_i)

# WW = W[:,batch_ys].eval()
# print(np.dot(xx,WW))
# uu = u.eval()[batch_idx][0][0]
# print(uu)
# print(np.dot(xx,xx))
# for i in range(train.x.shape[0]):
#     label = np.eye(int(num_classes))[train.y[i][0]]
#     print(1+np.exp(-np.dot(train.x[i,:],W[:,batch_ys].eval()))*np.dot(1-label,np.exp(np.dot(train.x[i,:],W.eval()))))



#                     if i_batch == 2:
#                         xx = batch_xs.reshape((batch_xs.shape[1]))
#                         dot_old = np.dot(xx, W[:,batch_ys].eval()) / np.dot(xx,xx)
#                         u_old = u.eval()[batch_idx][0][0]
#                         print(u_old)

#                     if i_batch == 2:
#                         xx = batch_xs.reshape((batch_xs.shape[1]))
#                         dot_new = np.dot(xx, W[:,batch_ys].eval()) / np.dot(xx,xx)
#                         u_new = u.eval()[batch_idx][0][0]

#                         print('dot difference:', dot_old - dot_new)
#                         print('u difference:', u_old - u_new)

0.0


In [62]:
a = tf.Variable(tf.range(30, dtype=tf.float32)+100.0, dtype=tf.float32)
W = tf.zeros([784, 10])
b = tf.zeros([10])

init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
# a_feed = sess.run([a],feed_dict={a: np.arange(30)-5})
# print(a_feed)
# sess.run(a.assign(tf.range(30, dtype=tf.float32)))
sess.run(a.assign(tf.constant(np.arange(30, dtype=tf.float32))))
print(a.eval())



TypeError: data type not understood

In [31]:
from numpy.random import randint
num_classes = 10
num_sampled = 5
batch_size = 4
batch_ys = np.array([[1],[2],[3],[4]])

In [58]:
samples = randint(num_classes-1, size=(batch_size,num_sampled))
print(samples)
repeated_batch_ys = np.tile(batch_ys, (1, num_sampled))
print(repeated_batch_ys)
sum_samples = samples + repeated_batch_ys + 1
print(sum_samples)
samples_mod = np.mod(sum_samples,num_classes)
print(samples_mod)

[[4 1 4 2 3]
 [2 3 0 0 2]
 [6 6 3 0 3]
 [0 2 0 5 3]]
[[1 1 1 1 1]
 [2 2 2 2 2]
 [3 3 3 3 3]
 [4 4 4 4 4]]
[[ 6  3  6  4  5]
 [ 5  6  3  3  5]
 [10 10  7  4  7]
 [ 5  7  5 10  8]]
[[6 3 6 4 5]
 [5 6 3 3 5]
 [0 0 7 4 7]
 [5 7 5 0 8]]


In [111]:

a = tf.Variable(tf.constant(1.),name="a")
b = tf.Variable(tf.constant(2.),name="b")
c = tf.Variable(tf.constant([2.,4.]),name="c")
with tf.Session() as s:
    s.run(tf.global_variables_initializer())
#     s.run(a.assign(5.0*b))
#     print(a.eval())
    result = a + b
    stored  = tf.Variable(tf.constant(0.),name="stored_sum")
    assign_op=stored.assign(result)
    _ = s.run(assign_op,{a:1.,b:2.})
    print(stored.eval()) # ok, still 3 
    _ = s.run(assign_op,{a:4.,b:5.})
    print(stored.eval()) # ok, still 3 
    print(val) # 3
    val=s.run(result,{a:4.,b:5.})
    print(val) # 9
    print(stored.eval()) # ok, still 3 
    print(c[0].eval())
    
    print(tf.nn.uniform_candidate_sampler(tf.constant([[1],[2]]), 1, 5, unique=True, range_max=10).eval())

3.0
9.0
9.0
9.0
9.0
2.0


TypeError: Input 'true_classes' of 'UniformCandidateSampler' Op has type float32 that does not match expected type of int64.

In [78]:
import numpy as np
import tensorflow as tf

npc = np.array([[1.,2.],[3.,4.]])
tfc = tf.Variable(npc) # Use variable 

row = np.array([[.1,.2]])

with tf.Session() as sess:   
    tf.initialize_all_variables().run() # need to initialize all variables

    print('tfc:\n', tfc.eval())
    print('npc:\n', npc)
    for i in range(2):
        for j in range(2):
            npc[i,j] += row[0,j]
    tfc.assign(npc).eval() # assign_sub/assign_add is also available.
    print('modified tfc:\n', tfc.eval())
    print('modified npc:\n', npc)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
tfc:
 [[ 1.  2.]
 [ 3.  4.]]
npc:
 [[ 1.  2.]
 [ 3.  4.]]
modified tfc:
 [[ 1.1  2.2]
 [ 3.1  4.2]]
modified npc:
 [[ 1.1  2.2]
 [ 3.1  4.2]]


In [109]:
np.random.randint(10**18)

980297104634894595