In [1]:
# -*- coding: utf-8 -*-
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dense, Activation, BatchNormalization
from keras.layers import activations, initializers, regularizers, constraints, Lambda
from keras.engine import InputSpec
import tensorflow as tf
import numpy as np


class AMSoftmax(Layer):
    def __init__(self, units, s, m,
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=None,
                 kernel_constraint=None,
                 **kwargs
                 ):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(AMSoftmax, self).__init__(**kwargs)
        self.units = units
        self.s = s
        self.m = m
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.input_spec = InputSpec(min_ndim=2)
        self.supports_masking = True


    def build(self, input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[-1]

        self.kernel = self.add_weight(shape=(input_dim, self.units),
                                      initializer=self.kernel_initializer,
                                      name='kernel',
                                      regularizer=self.kernel_regularizer,
                                      constraint=self.kernel_constraint)
        self.bias = None

        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
        self.built = True


    def call(self, inputs, **kwargs):
        inputs = tf.nn.l2_normalize(inputs, dim=-1)
        self.kernel = tf.nn.l2_normalize(self.kernel, dim=(0, 1))   # W归一化

        dis_cosin = K.dot(inputs, self.kernel)
        # psi = dis_cosin - self.m

        # e_costheta = K.exp(self.s * dis_cosin)
        # e_psi = K.exp(self.s * psi)
        # sum_x = K.sum(e_costheta, axis=-1, keepdims=True)

        # temp = e_psi - e_costheta
        # temp = temp + sum_x

        # output = e_psi / temp
        return dis_cosin


def amsoftmax_loss(y_true, y_pred):
    d1 = K.sum(y_true * y_pred, axis=-1)
    d1 = K.log(K.clip(d1, K.epsilon(), None))
    loss = -K.mean(d1, axis=-1)
    return loss

Using TensorFlow backend.


In [4]:
import numpy as np
np.random.seed(1337)
from keras import backend as K
from keras.layers import Dense,Input,Conv2D,MaxPooling2D,Dropout,BatchNormalization
from keras.models import Model
from keras.optimizers import SGD, Adam
import os
from keras.backend.tensorflow_backend import set_session

batch_size = 200
nb_classes = 10
nb_epoch = 2


os.environ["CUDA_VISIBLE_DEVICES"] = '1'
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
set_session(tf.Session(config=config))

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

X_train, Y_train = mnist.train.images,mnist.train.labels
X_test, Y_test = mnist.test.images, mnist.test.labels
X_train = X_train.reshape(-1, 28, 28,1).astype('float32')
X_test = X_test.reshape(-1,28, 28,1).astype('float32')

#打印训练数据和测试数据的维度
print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)

#修改维度
X_train = X_train.reshape(55000,784)
X_test = X_test.reshape(10000,784)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

# 将X_train, X_test的数据格式转为float32存储
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# 归一化
X_train /= 255
X_test /= 255
# 打印出训练集和测试集的信息
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


x_input = Input(shape=(784,))
y = Dense(500, activation='relu')(x_input)
y = Dropout(0.2)(y)
y = Dense(500, activation='relu')(y)
y = Dropout(0.2)(y)

output = AMSoftmax(10, 10, 0.35)(y)
#output = Dense(10, activation='softmax')(y)
model = Model(inputs=x_input, outputs=output)
model.summary()

adam = Adam()
model.compile(loss=amsoftmax_loss,
              optimizer=adam,
              metrics=['accuracy'])

history = model.fit(X_train, Y_train,
                    batch_size=batch_size,
                    epochs=10,
                    verbose=1,
                    validation_data=(X_test, Y_test))

score = model.evaluate(X_test, Y_test, verbose=0)

print('Test score:', score[0])
print('Test accuracy:', score[1])

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py fr

In [34]:
from keras import *

model = models.Sequential()

model.add(layers.Convolution2D(32, 3, padding='same',
                               input_shape=(32, 32, 3)))
model.add(layers.Activation('relu'))
model.add(layers.Convolution2D(32, 3))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Convolution2D(64, 3, padding='same'))
model.add(layers.Activation('relu'))
model.add(layers.Convolution2D(64, 3))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(512))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.5))
model.add(AMSoftmax(100, 10, 0.35))
# model.add(layers.Dense(100))

Using TensorFlow backend.


NameError: name 'AMSoftmax' is not defined

In [3]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar100.load_data()

train_images = train_images.reshape((50000, 32, 32, 3))
test_images = test_images.reshape((10000, 32, 32, 3))

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0
# train_images, test_images = train_images - 0.5, test_images - 0.5
# train_images, test_images = train_images * 2, test_images * 2

# To one-hot
train_labels = utils.to_categorical(train_labels, 100)
test_labels = utils.to_categorical(test_labels, 100)

In [38]:
from tensorflow.python.ops import array_ops


def softmax_loss(t=1.0, s=10):

    t = float(t)
    s = float(s)
    
    def softmax_loss_fixed(y_true, logits):
        """Softmax loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is raw logits
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
        ones = array_ops.ones_like(logits, dtype=logits.dtype)
        
        # Возможно косяк здесь! Хз как правильно искать значение логита на тру-классе
        logit_y = tf.reduce_sum(tf.multiply(y_true, logits), axis=-1, keepdims=True)
        I_k = array_ops.where(logit_y >= logits, zeros, ones)
        
        h = tf.exp(s*tf.multiply(t - 1., tf.multiply(logits + ones, I_k)))
        
        # softmax = tf.exp(logits) / tf.reduce_sum(tf.multiply(tf.exp(logits), h))
#         softmax = tf.exp(logits) / (tf.reshape(
#             tf.reduce_sum(tf.multiply(tf.exp(logits), h)), 
#             [-1, 1]) + epsilon)
        softmax = tf.exp(s*logits) / (tf.reduce_sum(tf.multiply(tf.exp(s*logits), h), axis=-1, keepdims=True) + epsilon)
        # softmax = tf.nn.softmax(logits)
        # softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits))
        
        # We add epsilon because log(0) = nan
        softmax = tf.add(softmax, epsilon)
        ce = tf.multiply(y_true, -tf.log(softmax))
        return tf.reduce_mean(ce)
    
    return softmax_loss_fixed

In [9]:
model.compile(optimizer='adam',
              loss=softmax_loss(t=1.1, s=30),
              metrics=['accuracy'])

history1 = model.fit(train_images, train_labels, epochs=50,
                     validation_data=(test_images, test_labels));

Train on 50000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
https://github.com/Joker316701882/Additive-Margin-Softmax/issues/9

In [10]:
model.add(layers.Activation('softmax'))
model.compile(optimizer='adam',
              loss=softmax_loss(),
              metrics=['accuracy'])
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(test_acc)

0.4396


In [69]:
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import array_ops

t = tf.placeholder(tf.float32)
m = tf.placeholder(tf.float32)
logits = tf.placeholder(tf.float32)
y_true = tf.placeholder(tf.float32)
zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
ones = array_ops.ones_like(logits, dtype=logits.dtype)

logit_y = tf.reduce_sum(tf.multiply(y_true, logits), axis=-1, keepdims=True)
I_k = array_ops.where(logit_y >= logits, zeros, ones)
I_k = array_ops.where(logit_y - m >= logits, zeros, ones)
I_k_ = I_k * tf.cast(tf.not_equal(y_true, 1), tf.float32)
h = tf.exp(tf.multiply(t - 1., tf.multiply(logits + 1., I_k)))

# logits = logits - m * y_true
softmax = tf.exp(logits- m * y_true) / tf.reduce_sum(tf.multiply(tf.exp(logits- m * y_true), h), 
                                         axis=-1, keepdims=True)
# softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis=-1, keepdims=True)

# softmax = tf.nn.softmax(logits)

# ce = tf.multiply(y_true, -tf.log(softmax))
# ce = tf.reduce_sum(ce, axis=1)
# ce = tf.reduce_mean(ce)
ce = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(softmax), reduction_indices=[1]))
# ce = tf.losses.softmax_cross_entropy(y_true, softmax)
# ce = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=logits) 
# ce = tf.reduce_mean(ce)
with tf.Session() as sess:
  logits_array = np.array([[2., 3., 1.], [1., 2.1, 2.]])
  y_true_array = np.array([[0., 1., 0.], [0., 0., 1.]])
  print(sess.run(ce, feed_dict={t: 1., m: 0., logits: logits_array, y_true: y_true_array}))

0.656529


In [None]:
tf.lo