# 参考
http://www.cs.cmu.edu/~rsalakhu/papers/oneshot1.pdf

In [1]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as img
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [16]:
import helper.load_batch as lb
import helper.load_music as ld
import pygame
songs = ld.music_load(dir='Music_sample',length=8000)
song_test = songs[0].reshape(-1,2)
pygame.mixer.init(44100,-16,2)
sound = pygame.sndarray.make_sound(song_test)

In [3]:
import tensorflow as tf
from keras.datasets import fashion_mnist

Using TensorFlow backend.


In [4]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train, X_test = [np.reshape(_,(-1,28,28,1))/255. for _ in [X_train, X_test]]
X_val, y_val = X_train[-5000:], y_train[-5000:]
X_train, y_train = X_train[:-5000], y_train[:-5000]

In [5]:
data_dict = {
    'X_train':X_train,
    'X_val':X_val,
    'X_test':X_test,
    'y_train':y_train,
    'y_val':y_val,
    'y_test':y_test,
    'num_class':10
}

# 初期値
論文により、
- Kernel for Convolution is normal distribution with mean 0 and standard deviation 0.01

同様に以下の通りに初期値を与えます。

In [6]:
Winit = tf.random_normal_initializer(mean=0,stddev=0.01)
binit = tf.random_normal_initializer(mean=0.5,stddev=0.01)
Denseinit = tf.random_normal_initializer(mean=0,stddev=0.2)

In [7]:
# Same as Before
def model(X, reg_pow=0.001):
    reg = tf.contrib.layers.l2_regularizer(scale=reg_pow)
    con = tf.constant_initializer
    with tf.variable_scope('conv1'):
        X = tf.layers.conv2d(X,32,kernel_size=[2,2],strides=[1,1],activation=tf.nn.relu
                     ,kernel_initializer=Winit,bias_initializer=binit,kernel_regularizer=reg)
        X = tf.layers.max_pooling2d(X,[2,2],2)
    with tf.variable_scope('conv2'):
        X = tf.layers.conv2d(X,128,kernel_size=[2,2],strides=[1,1],activation=tf.nn.relu
                        ,kernel_initializer=Winit,bias_initializer=binit,kernel_regularizer=reg)
        X = tf.layers.max_pooling2d(X,[2,2],2)
        X = tf.contrib.layers.flatten(X)
    with tf.variable_scope('full1'):
        X = tf.layers.dense(X,512,kernel_initializer=Denseinit,kernel_regularizer=reg,activation=tf.sigmoid)
        return X

# 距離
$$ p = \sigma\left(\sum_{j}\alpha_j\left|h_{1,L-1}^{(j)}-h_{1,L-1}^{(j)}\right|\right) $$
実践的に dense layer 一層で用いる。

In [8]:
def combine_predict(X1,X2):
    '''
    Input
    X1,X2: (N,M)
    Return
    logistic prediction
    '''
    dim = X1.get_shape()[1]
    diff = tf.abs(X1-X2)
    out = tf.layers.dense(diff,1,kernel_initializer=Denseinit, use_bias=False)
    return tf.reduce_sum(diff,axis=1), out
    

In [9]:
tf.reset_default_graph()
X1 = tf.placeholder(tf.float32,[None,28,28,1])
X2 = tf.placeholder(tf.float32,[None,28,28,1])
y = tf.placeholder(tf.int32,[None,1])
is_training = tf.placeholder(tf.bool)

In [10]:
with tf.variable_scope('bottleneck') as scope:
    Y1 = model(X1)
    scope.reuse_variables()
    Y2 = model(X2)
dist, p = combine_predict(Y1,Y2)
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(y,tf.float32),logits=p)) \
    + tf.reduce_sum(reg_losses)
optimizer = tf.train.AdamOptimizer()
train_step = optimizer.minimize(loss)


predict = tf.cast(tf.greater(p,0.), dtype=tf.int32)
correct_pred = tf.equal(predict, y)
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))

In [11]:
sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess,'siamese_model/siamese')

INFO:tensorflow:Restoring parameters from siamese_model/siamese


In [12]:
data = lb.BatchLoader(**data_dict)

In [15]:
t1,t2,yt = data.make_batch(4)
t1.shape, yt.shape

((8, 28, 28, 1), (8, 1))

In [11]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [15]:
batch_size = 100
val_size = 500
for i in range(10000):
    for phase in ['train','val']:
        if phase == 'train':
            step = [loss, accuracy, train_step]
            first, second, expect = data.make_batch(batch_size, dat_type='train')
        else:
            step = [loss, accuracy, correct_pred]
            first, second, expect = data.make_batch(val_size, dat_type='val')
        feed = {X1:first,X2:second,y:expect}
        current_loss, acc, _ = sess.run(step, feed_dict=feed)
        if i % 100 == 0:
            print('{} loss is {} and accuracy is {}'.format(phase, current_loss, acc))

sound.play()

train loss is 0.24887996912002563 and accuracy is 0.9149999618530273
val loss is 0.25411179661750793 and accuracy is 0.9210000038146973
train loss is 0.2455081045627594 and accuracy is 0.9149999618530273
val loss is 0.26558002829551697 and accuracy is 0.9039999842643738
train loss is 0.22822628915309906 and accuracy is 0.9399999380111694
val loss is 0.2257498949766159 and accuracy is 0.9390000700950623
train loss is 0.37398648262023926 and accuracy is 0.8900001049041748
val loss is 0.2609494924545288 and accuracy is 0.9070000052452087
train loss is 0.28646329045295715 and accuracy is 0.9049999713897705
val loss is 0.25355684757232666 and accuracy is 0.9109999537467957
train loss is 0.22070737183094025 and accuracy is 0.9399999380111694
val loss is 0.24210910499095917 and accuracy is 0.9240000247955322
train loss is 0.27953603863716125 and accuracy is 0.9049999713897705
val loss is 0.24856387078762054 and accuracy is 0.9179999828338623
train loss is 0.26773956418037415 and accuracy is 0

KeyboardInterrupt: 

In [14]:
sound.stop()

In [60]:
saver = tf.train.Saver()
path = saver.save(sess,'siamese_model/siamese')

In [29]:
data.do_test_oneshot(sess, p,X1,X2,repeat=6)

0.8642