In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
import cv2
from sklearn.model_selection import train_test_split
from functools import partial
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow import keras
from tensorflow.keras.layers import (Dense,Layer,Input,Dropout,
                                     BatchNormalization,Activation,
                                     GRU,LSTM,GlobalMaxPool2D,Flatten,
                                     GlobalAvgPool2D,GlobalMaxPool1D,Conv2D)
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras import metrics
from tensorflow.keras import losses
from tensorflow.keras import optimizers

import LossCalculation
import WordEmbedding

# 讀取資料

In [None]:
path_word_embedding = 'path of your own word embedding results'
word_embedding = np.load(path_word_embedding,allow_pickle=True)
word_embedding = pd.DataFrame(word_embedding,columns=['Image_name','0','1','2','3','4','image_features','WordEmbedding'])

# 觀察圖像 features的分佈

In [None]:
for ii in range(10):
    plt.subplot(2,5,ii+1)
    plt.imshow(word_embedding['image_features'][10][0][...,ii])    

In [None]:
for ii in range(10):
    plt.subplot(2,5,ii+1)
    plt.imshow(word_embedding['image_features'][100][0][...,ii])

In [None]:
"""
ImageNamePath : 原始圖像路徑
AllImage : 存放讀入的原始圖像
"""
pathFolder_modify = 'path of your images'
x = os.listdir(pathFolder_modify)
ImageNamePath = []
for i in range(len(word_embedding)):
    image_name = word_embedding['Image_name'][i]
    pathImage = pathFolder_modify + image_name
    ImageNamePath.append(pathImage)
    
    
AllImage = []
for i in range(len(ImageNamePath)):
    img_path = ImageNamePath[i]
    img = image.load_img(img_path,target_size=(224,224))
    img = image.img_to_array(img)/255.0
    img = img[5:200,5:200]
    img = tf.image.resize(img,[227,227])
    AllImage.append(img)

In [None]:
plt.imshow(AllImage[9])

In [None]:
def gen(df):
    """
    生產更多組合資料
    因為原始資料只有1833筆
    所以隨機將兩筆資料配對
    -->(case_1) 圖像features  + 對應的 wordEmbedding
    --> (case_2)圖像features + 非對應的 wordEmbedding
    """
    order=np.random.permutation(len(df))
    for x in range(len(order)//2):
        w_emb=df.iloc[order[2*x]]["WordEmbedding"]
        idx=(np.random.rand()>0.5)*1
        img = AllImage[order[2*x+idx]]
        yield (w_emb,img),1.-idx
        
        
def model_image():
    """
    將原始圖像 進過 model_image進行 特徵提取
    """
    inputs = Input(shape=(227,227,3))
    x = Conv2D(filters=96, kernel_size=11, strides=4, padding='same', activation='relu')(inputs)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    # LRN layer
    x = tf.nn.local_response_normalization(x, depth_radius=2, bias=2, alpha=1e-4, beta=0.75)

    # second layer, convolution and pooling
    x = Conv2D(filters=256, kernel_size=5, padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    # LRN layer
    x = tf.nn.local_response_normalization(x, depth_radius=2, bias=2, alpha=1e-4, beta=0.75)

    # convolution layers
    x = Conv2D(filters=384, kernel_size=3, padding='same', activation='relu')(x)
    x = Conv2D(filters=384, kernel_size=3, padding='same', activation='relu')(x)
    x = Conv2D(filters=256, kernel_size=3, padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)

    # Fully connection layer
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(128,activation='relu')(x)
    embedding = Model(inputs=inputs, outputs=outputs,name='imageEmbbeding')
    return embedding


def model_word():
    """
    將bert結果 進過 model_word 特徵提取與圍堵調整
    """
    seq = Sequential()
    seq.add(Dense(5120,activation='tanh'))
    seq.add(Dropout(0.25))
    seq.add(Dense(1024,activation='tanh'))
    seq.add(Dropout(0.25))
    seq.add(Dense(128,activation='tanh'))
    return seq



class DistanceLayer(Layer):
    def __int__(self,**kwargs):
        super().__init__(**kwargs)
    
    def call(self,word,image):
        distance = tf.reduce_sum(tf.square(word-image),-1)
        return distance
    
    
class SiameseModel(Model):
    def __init__(self,siamese_network,margin = 0.25):
        super(SiameseModel,self).__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = metrics.Mean(name = 'loss')
        
    def call(self,inputs):
        return self.siamese_network(inputs)
    
    def train_step(self,data,label):
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data,label)
        gradients = tape.gradient(loss,self.siamese_network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients,self.siamese_network.trainable_weights))
        self.loss_tracker.update_state(loss)
        return {'loss':self.loss_tracker.result()}
    
    
    
    def test_step(self,data,label):
        def accu(labels,predictions):
            return K.mean((labels==1)==(predictions<0.05))
        dist = self.siamese_network(data)
        return accu(label,dist)

    
    def _compute_loss(self,data,label):
        dist = self.siamese_network(data)
        loss = K.mean(label * K.square(dist) + \
                      (1 - label) * K.square(K.maximum(self.margin - dist, 0.0)))
        return loss
    
    @property
    def metrics(self):
        return [self.loss_tracker]

# 切割訓練測試資料集

In [None]:
train_df,val_df = train_test_split(word_embedding,test_size=0.2)

train_dataloader = tf.data.Dataset.from_generator(partial(gen,train_df),
                                                  output_types=((tf.float32,tf.float32),tf.float32)).\
                                                  prefetch(10).batch(32)

val_dataloader = tf.data.Dataset.from_generator(partial(gen,val_df),
                                                output_types=((tf.float32,tf.float32),tf.float32)).\
                                                prefetch(10).batch(32)

# Training

In [None]:
"""
embedding_image : model_image
embedding_word : model_word

siamese_network : 最後使用模型 ; 
input : [embedding_image結果,embedding_word結果] ,output: image & word latents distance 
"""

embedding_image = model_image()
embedding_word = model_word()


word_input = Input(name='word',shape=(5120))
image_input = Input(name='image',shape=(227,227,3))
distances = DistanceLayer()(embedding_word(word_input),
                            embedding_image(image_input))

siamese_network = Model(inputs=[word_input,image_input],outputs=distances,name='final_model')
siamese_network.summary()

In [None]:
learning_rate = 1e-6
epochs = 100

siamese_model = SiameseModel(siamese_network)
siamese_model.compile(optimizer=optimizers.Adam(learning_rate))

In [None]:
session = tqdm(range(epochs))
for e in session:
    for x,y in train_dataloader:
        loss = siamese_model.train_step(x,y)
        session.set_postfix({"loss":loss})

In [None]:
accuracy = siamese_model.test_step(x_val,y_val)
distance = siamese_model.siamese_network(x_val)
model_training_loss = siamese_model.loss_tracker.result()

In [None]:
import matplotlib.pyplot as plt
# 利用直方圖統計 distance 分佈,原則越分開越好
plt.hist(tf.concat(d,0))

In [None]:
plt.figure(figsize=(14,8))
# 觀察圖像特徵學習情況,理論越離散越好
plt.imshow(embedding_image(x_val[1]).numpy())

In [None]:
plt.figure(figsize=(14,8))
# 觀察文字特徵學習情況,理論越離散越好
plt.imshow(embedding_word(x_val[0]).numpy())

# Save Model's weights

In [None]:
siamese_model.save_weights('path of saving psuedo  siamese network')
embedding_image.save_weights('path of saving image embedding model trained')
embedding_word.save_weights('path of saving word embedding model trained')

# Load Model's Weights

In [None]:
"""
呼叫 SiameseModel() class並存成一個新的變數
利用 model.loads_weights 讀取訓練好的weights
"""
siamese_model2 = SiameseModel(siamese_network)
siamese_model2.load_weights('path of saving psuedo  siamese network')

In [None]:
siamese_model2.test_step(x_val,y_val)