In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import os, random, cv2


import keras as keras
import tensorflow as tf
from keras.preprocessing import image
from keras.applications.resnet import preprocess_input
from keras.models import Sequential,Model
from keras.layers import Input, Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D,Lambda
import keras.backend as K
from keras.optimizers import RMSprop,Adam
import gc

In [None]:
tr_data = pd.read_csv("../input/siamese-split-data-csv/tr_data.csv")
te_data = pd.read_csv("../input/siamese-split-data-csv/te_data.csv")


In [None]:
random.seed(a=123)

In [None]:
class SiameseDataLoader(object):
    def __init__(self, data_pd, sample_class_num , samples_per_class):
        self._data_pd = data_pd
        self._sample_class_num = sample_class_num
        self._samples_per_class = samples_per_class
        self._sample_file_names = self._get_samples()
    

    def _get_samples(self):
        sample_file_names = []
        ids = self._data_pd.landmark_id.unique()
        P=0
        for ID in range(len(ids)):
            if ID%int(len(ids)/10)==0:
                print(P*10,"%")
                P+=1
            same_label = self._data_pd.loc[self._data_pd.landmark_id == ids[ID]]
            same_label = same_label.reset_index(drop=True)
            sample_file_names_per_class = []
            if len(same_label)>1:
                for i in range(len(same_label)):
                    num1 = str(same_label.id[i])[0]
                    num2 = str(same_label.id[i])[1]
                    num3 = str(same_label.id[i])[2]
                    filename = str(same_label.id[i])
                    filepath = "../input/landmark-retrieval-2020/train/" +num1+ "/" +num2+"/" +num3+ "/" + filename + ".jpg"

                    sample_file_names_per_class.append(filepath)
                #print("a",sample_file_names_per_class)
                sample_file_names.append(sample_file_names_per_class)
        #print("get")
        return sample_file_names
    

    
    # positiveとnegativeの画像ペアを出力
    def get_train_data(self):
        pairs, labels = self._create_pairs(self._sample_file_names, self._samples_per_class)
        tmp = np.array(Image.open(pairs[0][0]).convert('RGB'))
        X1=[]
        X2=[]
        Y=[]
        for pair, label in zip(pairs, labels):
            img=np.array(Image.open(pair[0]).convert('RGB')).astype("int16")
            img=np.expand_dims(img, axis=0)
            X1.append(img)
            img=np.array(Image.open(pair[1]).convert('RGB')).astype("int16")
            img=np.expand_dims(img, axis=0)
            X2.append(img)
            Y.append(label)
        return [X1,X2], Y
    
    def get_test_data(self, test_image_path, sample_class_num, samples_per_class):
        pairs = []
        #for sample_file_names_per_class in self._sample_file_names:
        for j in range(sample_class_num):
            sample_file_names_per_class = random.sample(sample_file_names, 1)[0]
            
            selected_files = random.sample(sample_file_names_per_class, samples_per_class)
            for selected_file in selected_files:
                pair = []
                pair.append(test_image_path)
                pair.append(selected_file)
                pairs.append(pair)
        X1=[]
        X2=[]
        for pair in pairs:
            img=np.array(Image.open(pair[0]).convert('RGB')).astype("int16")
            img=np.expand_dims(img, axis=0)
            X1.append(img)
            img=np.array(Image.open(pair[1]).convert('RGB')).astype("int16")
            img=np.expand_di
            X2.append(img)
            
        return [X1,X2]
    
    
    def _create_pairs(self, sample_file_names, samples_per_class):
        positive_pairs, positive_labels = self._create_positive_pairs(sample_file_names, samples_per_class)
        negative_pairs, negative_labels = self._create_negative_pairs(sample_file_names, samples_per_class)
        positive_pairs.extend(negative_pairs)
        positive_labels.extend(negative_labels)
        return positive_pairs, positive_labels
    
    # 同じラベルのペアを作成する
    def _create_positive_pairs(self, sample_file_names, samples_per_class):
        positive_pairs = []
        labels=[]  
        for j in range(self._sample_class_num):
            sample_file_names_per_class = random.sample(sample_file_names, 1)[0]
            for k in range(samples_per_class):
                positive_pairs.append(random.sample(sample_file_names_per_class, 2))
                labels.append(np.array([1.]))
        #labels = np.array([1])*len(positive_pairs)
        #print(len(positive_pairs))
        return positive_pairs, labels
    
    # 異なるラベルのペアを作成する
    def _create_negative_pairs(self, sample_file_names, samples_per_class):
        negative_pairs = []
        labels=[]
        class_count =len(sample_file_names)
            
        for j in range(self._sample_class_num):
            select = random.randint(0,class_count-1)
            sample_file_names_per_class = sample_file_names[select]
            
            class_ids = list(range(class_count))
            class_ids.remove(select)
            for k in range(samples_per_class):
                pair = []
                pair.append(random.choice(sample_file_names[j]))
                pair.append(random.choice(sample_file_names[random.choice(class_ids)]))
                negative_pairs.append(pair)
                labels.append(np.array([0.]))
        #labels = np.array([0])*len(negative_pairs)
        #print(labels)
        #print(len(negative_pairs))
        return negative_pairs, labels

In [None]:
loader=SiameseDataLoader(te_data,50,2)
X, y = loader.get_train_data()

In [None]:
NUM=98
print(y[NUM])
plt.subplots(figsize=(10,10))
plt.subplot(121)

plt.imshow(X[0][NUM][0].astype("int16"))
plt.subplot(122)
plt.imshow(X[1][NUM][0].astype("int16"))

In [None]:
input_tensor = Input(shape=(None,None, 3))
resnet101=tf.keras.applications.ResNet101(
    include_top=False, weights='imagenet',input_tensor=input_tensor
)
AVP = Sequential(GlobalAveragePooling2D())
Encoder=Model(inputs=resnet101.input,outputs=AVP(resnet101.output))
Encoder.summary()

In [None]:
x=X[0][0].astype("float32")
#x = np.expand_dims(x, axis=0)
Encoder.predict(x)

In [None]:
for layer in Encoder.layers[:313]:
    layer.trainable = False

In [None]:
class SiameseNet(object):
    def __init__(self,encoder):
        Encoder=encoder
        
        input_a = Input(shape=[None,None,3])
        input_b = Input(shape=[None,None,3])
        input_a = tf.cast(input_a, tf.float32)
        input_b = tf.cast(input_b, tf.float32)
        
        processed_a = Encoder(input_a)
        processed_b = Encoder(input_b)
        #print("b",processed_b)
        distance = Lambda(self._euclidean_distance, output_shape=(2,2))([processed_a, processed_b])#self._eucl_dist_output_shape
        #print("d",distance)
        self._model = Model(inputs=[input_a, input_b], outputs=distance)

    def _euclidean_distance(self, vects):
        x, y = vects
        #print("_eu",vects)
        distance = K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
        #print("_eu_",distance)
        return distance

    def _eucl_dist_output_shape(self, shapes):
        shape1, shape2 = shapes
        #print("_e",shape1[0])
        return (shape1[0], 1)

    def get_model(self):
        return self._model
    
def contrastive_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    #print("a",y_true, y_pred)
    margin = 1
    loss=K.mean(y_true*K.square(y_pred) + (1 - y_true)*K.square(K.maximum(margin - y_pred, 0)))
    #print("L",loss)
    return loss

In [None]:
print("loader_prepare")
loader_train = SiameseDataLoader(tr_data,150,1)
print("loader_train:ok")
loader_test = SiameseDataLoader(te_data,75,1)
print("loader_test:ok")

In [None]:
# Adam not works well for Siamese net
optim = RMSprop(decay=1e-4)
#optim = Adam(lr=0.0001, decay=1e-4, amsgrad=True)
siamese = SiameseNet(Encoder).get_model()
siamese.compile(optimizer=optim, loss=contrastive_loss)

In [None]:
#siamese.load_weights('./weights_I709_L0.06329984217882156.h5')

In [None]:
iterations = 1000000
min_loss = 9999
min_iter = -1
print("start")
for iteration in range(iterations):
    X = []
    y = []
    X, y = loader_train.get_train_data()
    #print("X",len(X[0]))
    #print("y",len(y[0]))
    loss_train = siamese.train_on_batch(X,y)
    print(iteration+1,",", end="")
    if (iteration+1)%5 == 0:
        X = []
        y = []
        X, y = loader_test.get_train_data()
        loss_val = siamese.evaluate(X, y,verbose=0)
        if loss_val < min_loss:
            min_iter = iteration
            min_loss = loss_val
            siamese.save_weights('weights_M1_I'+str(min_iter)+'_L'+str(min_loss)+'.h5', True)
        print('///////////loss@' + str(iteration+1) + ' = ' + str(loss_train) + ',' + str(loss_val) + ' (' + str(min_loss) + '@' + str(min_iter) + ')')
    keras.backend.clear_session()
    gc.collect()

In [None]:
load_model = SiameseNet(Encoder).get_model()

In [None]:
load_model.load_weights('./weights_M1_I469_L0.3352832496166229.h5')

In [None]:
load_model.summary()

In [None]:
Decoder = Model(inputs=load_model.get_layer("functional_1").input,
                                 outputs=load_model.get_layer("functional_1").output)

In [None]:
x=X[0][0].astype("float32")
f=Decoder(x).numpy().copy()
for i in range(len(f[0])):
    print(f[0][i])

In [None]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.model = Decoder
    
    @tf.function(input_signature=[
      tf.TensorSpec(shape=[None, None, 3], dtype=tf.uint8, name='input_image')
    ])
    def call(self, im):
        output_tensors = {}
        im=tf.cast(im, tf.float32)

        extracted_features = self.model(tf.convert_to_tensor([im], dtype=tf.float32))[0]
        output_tensors['global_descriptor'] = tf.identity(extracted_features, name='global_descriptor')
        return output_tensors

In [None]:
my_model=MyModel()

In [None]:
served_function = my_model.call
tf.saved_model.save(
      my_model, export_dir="./my_model", signatures={'serving_default': served_function})

In [None]:
from zipfile import ZipFile

with ZipFile('submission.zip','w') as zip:           
    zip.write('./my_model/saved_model.pb', arcname='saved_model.pb') 
    zip.write('./my_model/variables/variables.data-00000-of-00001', arcname='variables/variables.data-00000-of-00001') 
    zip.write('./my_model/variables/variables.index', arcname='variables/variables.index') 

In [None]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
 
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
print(len(x_train[0]), len(x_test[0]))
print(x_train)
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
 
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)