In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import seaborn as sns
from sklearn.manifold import TSNE
import random
from itertools import permutations
import os
import cv2

from keras.optimizers import SGD,Adam
from keras.layers import Input, Conv2D, Lambda, Dense, Flatten,MaxPooling2D, concatenate
from keras.models import Model, Sequential
from keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping
from keras import backend as K

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
LEARNING_RATE=0.00001
MARGIN = 1.3

In [3]:
def load_data(path, row, column, channel):
    x_train=[]
    y_train=[]
    filename_train=[]
    x_test=[]
    y_test=[]
    filename_test=[]
    # Get directory
    DataPaths = [path+'/'+folder_name for folder_name in os.listdir(path)]
    print(DataPaths)
    # Check each image folders. each image folder contains images with same yoga posture 
    for label_idx in range(len(DataPaths)):
        if os.path.isdir(DataPaths[label_idx]) == False:
            continue
        LabelPath = DataPaths[label_idx]
        
        # Check each kinds of data.
        ImgPaths = [LabelPath+ '/'+image_name for image_name in os.listdir(LabelPath)]
        
        #each image
        for idx in range(len(ImgPaths)):
            imagePath = ImgPaths[idx]
            # print(imagePath)
            if ".DS_Store" in imagePath:
                continue
            img = cv2.imread(imagePath, cv2.IMREAD_COLOR)
            if channel==1:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                
            img = cv2.resize(img, (row,column), interpolation = cv2.INTER_AREA)
            if idx%4==0:
                x_test.append(img)
                y_test.append(label_idx)
                filename_test.append(imagePath)
            else : 
                x_train.append(img)
                y_train.append(label_idx)
                filename_train.append(imagePath)
    return np.array(x_train), np.array(y_train), filename_train, np.array(x_test), np.array(y_test), filename_test

In [4]:
#data load
img_row=28
img_column=28
img_channel=1
path =  '../../images/data'
#사진 1장당 28*28*1 (가로*세로*채널)로 입력된다.
x_train, y_train, filename_train, x_test, y_test, filename_test = load_data(path,img_row, img_column, img_channel)
#make total data set
x_total = np.array(np.concatenate((x_train, x_test), axis = 0))
y_total = np.array(np.concatenate((y_train, y_test), axis = 0))
filename_total = np.concatenate((filename_train, filename_test), axis = 0)
y_total = np.concatenate((y_train,y_test), axis = 0)
print("before shape : ",x_train.shape, x_test.shape, x_total.shape)

['../../images/data/1865', '../../images/data/g7', '../../images/data/Barton', '../../images/data/Montes_Alpha', '../../images/data/1', '../../images/data/Diablo', '../../images/data/Canti_Brachetto', '../../images/data/3', '../../images/data/2', '../../images/data/Canti_Moscato']


KeyboardInterrupt: 

In [None]:
def checkImgSet(msg, x_sample, y_sample, filename_sample, classes, maxnum):

    print(msg)
    for k in range(classes):
        plt.figure(figsize=(20,20))
    
        num=0
        for i in range(len(y_sample)):
            if num==maxnum:
                break
            if y_sample[i]==k:
                num+=1
                plt.subplot(10,10,num)
                plt.xticks([])
                plt.yticks([])
                plt.grid(False)
                plt.imshow(x_sample[i], cmap='gray', vmin=0, vmax=255)
                #plt.imshow(cv2.cvtColor(x_sample[i], cv2.COLOR_BGR2RGB))
                plt.xlabel(str(y_sample[i])+filename_sample[i][-10:])
        plt.show()

In [None]:
checkImgSet("Training Samples", x_train, y_train, filename_train, 10, 5)
checkImgSet("Validation Samples", x_test, y_test, filename_test, 10, 5)

In [None]:
print("before shape : ",x_train.shape, x_test.shape, x_total.shape)
x_train_flat = x_train.reshape(-1,img_row*img_column*img_channel)
x_test_flat = x_test.reshape(-1,img_row*img_column*img_channel)                           
x_total_flat = x_total.reshape(-1,img_row*img_column*img_channel)
print("after flat shape : ",x_train_flat.shape, x_test_flat.shape, x_total_flat.shape)

In [None]:
# Define our own plot function
def scatter(x, labels, num_of_labels, subtitle=None):
    # We choose a color palette with seaborn.
    palette = np.array(sns.color_palette("hls", num_of_labels))

    # We create a scatter plot.
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    
    
    sc = ax.scatter(x[:,0], x[:,1], lw=0, s=40,
                    c=palette[labels.astype(np.int)])
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    # 그래프 규격선, 여백공간 제거
    ax.axis('off') 
    ax.axis('tight')

    # We add the labels for each digit.
    txts = []
    for i in range(num_of_labels):
        # Add Label with txt type on plotting image
        xtext, ytext = np.median(x[labels == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=24)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)
        
    if subtitle != None:
        plt.suptitle(subtitle)
        
    plt.savefig(subtitle)

In [None]:
tsne = TSNE()
#train_tsne_embeds = tsne.fit_transform(x_train_flat[])
#scatter(train_tsne_embeds, y_train[:1641], 10, "Training Data Before TNN")

eval_tsne_embeds = tsne.fit_transform(x_test_flat)
print(eval_tsne_embeds)
scatter(eval_tsne_embeds, y_test, 10, "Validation Data Before TNN")

#total_tsne_embeds = tsne.fit_transform(x_total_flat[:1921])
#scatter(total_tsne_embeds, y_total[:1921], 10,"Total Data Before TNN")

In [None]:
def generate_triplet(x,y,ap_pairs=100,an_pairs=100, trainsize=0.7):
    data_xy = tuple([x,y])
    
    triplet_train_pairs = []
    triplet_test_pairs = []
    
    for data_class in sorted(set(data_xy[1])):
        same_class_idx = np.where(data_xy[1] == data_class)[0]
        diff_class_idx = np.where(data_xy[1] != data_class)[0]
        print('same and diff : ', len(same_class_idx), len(diff_class_idx))
        
        ap_pairs = int((len(same_class_idx) * (len(same_class_idx)-1))/2)
        an_pairs = int(len(diff_class_idx))
        
        #print("data_class : ",data_class)
        # num of each: ap_pairs, an_pairs
        A_P_pairs = random.sample(list(permutations(same_class_idx,2)),ap_pairs) #Generating Anchor-Positive pairs
        Neg_idx = random.sample(list(diff_class_idx),an_pairs)
        #print(len(A_P_pairs),len(Neg_idx))
        #total data: ap_pairs*an_pairs
        A_P_len = ap_pairs
        Neg_len = an_pairs
        ng_idx=0
        print('ap' + str(len(A_P_pairs)))
        print('neg' + str(len(Neg_idx)))
        
        
        for ap in A_P_pairs[:int(A_P_len)]:
            Anchor = data_xy[0][ap[0]]
            Positive = data_xy[0][ap[1]]
            Negative = data_xy[0][Neg_idx[ng_idx%Neg_len]]
            if (ng_idx%Neg_len)%4 == 0:
                triplet_test_pairs.append([Anchor,Positive,Negative])
            else: 
                triplet_train_pairs.append([Anchor,Positive,Negative])
            ng_idx+=1
                
    return np.array(triplet_train_pairs), np.array(triplet_test_pairs)

In [None]:
print(len(y_test))

In [None]:

x_train_triplet, x_test_triplet = generate_triplet(x_train,y_train,1600,100)
print(x_train.shape , x_test.shape)
print(x_train_triplet.shape , x_test_triplet.shape)

In [None]:
def create_base_network(in_dims):
    
    model = Sequential()
    model.add(Conv2D(128,(7,7),padding='same',input_shape=(in_dims[0],in_dims[1],in_dims[2],),activation='relu',name='conv1'))
    model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool1'))
    model.add(Conv2D(256,(10,10),padding='same',activation='relu',name='conv2'))
    model.add(MaxPooling2D((2,2),(2,2),padding='same',name='pool2'))
    model.add(Flatten(name='flatten'))
    model.add(Dense(4,name='embeddings'))
    return model

In [None]:
def triplet_loss(y_true, y_pred, alpha = MARGIN):
    
    print('y_pred.shape = ',y_pred)
    
    total_lenght = y_pred.shape.as_list()[-1]
#     print('total_lenght=',  total_lenght)
#     total_lenght =12
    
    anchor = y_pred[:,0:int(total_lenght*1/3)]
    positive = y_pred[:,int(total_lenght*1/3):int(total_lenght*2/3)]
    negative = y_pred[:,int(total_lenght*2/3):int(total_lenght*3/3)]

    # distance between the anchor and the positive
    pos_dist = K.sum(K.square(anchor-positive),axis=1)

    # distance between the anchor and the negative
    neg_dist = K.sum(K.square(anchor-negative),axis=1)

    # compute loss
    basic_loss = pos_dist-neg_dist+alpha
    loss = K.maximum(basic_loss,0.0)
 
    return loss

In [None]:
#---------------------------------------------------------------------------------------------------
adam_optim = Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=None)
#---------------------------------------------------------------------------------------------------
input_row=28
input_column=28
input_channel=1
anchor_input = Input((input_row, input_column, input_channel, ), name='anchor_input')
positive_input = Input((input_row, input_column, input_channel, ), name='positive_input')
negative_input = Input((input_row, input_column, input_channel, ), name='negative_input')

Shared_DNN =create_base_network([input_row, input_column, input_channel,])
encoded_anchor = Shared_DNN(anchor_input)
encoded_positive = Shared_DNN(positive_input)
encoded_negative = Shared_DNN(negative_input)


merged_vector = concatenate([encoded_anchor, encoded_positive, encoded_negative], axis=-1, name='merged_layer')

model = Model(inputs=[anchor_input,positive_input, negative_input], outputs=merged_vector)
#---------------------------------------------------------------------------------------------------
model.compile(loss=triplet_loss, optimizer=adam_optim)
model.summary()
#---------------------------------------------------------------------------------------------------
Anchor = x_train_triplet[:,0,:].reshape(-1,input_row, input_column, input_channel)
Positive = x_train_triplet[:,1,:].reshape(-1,input_row, input_column, input_channel)
Negative = x_train_triplet[:,2,:].reshape(-1,input_row, input_column, input_channel)

Anchor_test = x_test_triplet[:,0,:].reshape(-1,input_row, input_column, input_channel)
Positive_test = x_test_triplet[:,1,:].reshape(-1,input_row, input_column, input_channel)
Negative_test = x_test_triplet[:,2,:].reshape(-1,input_row, input_column, input_channel)



In [None]:
trained_model = Model(inputs=anchor_input, outputs=encoded_anchor)
trained_model.load_weights('1.300001-0.1013-27.hdf5')


In [None]:
#x_train_flat = x_train.reshape(-1,img_row*img_column*img_channel)
#x_test_flat = x_test.reshape(-1,img_row*img_column*img_channel)                           
#x_total_flat = x_total.reshape(-1,img_row*img_column*img_channel)
#print(eval_tsne_embeds.shape)
tsne = TSNE()
X_total_trm = trained_model.predict(x_total.reshape(-1,28,28,1))
X_total_trm=X_total_trm.tolist()
print(type(X_total_trm))


#wine_db.append({'id':i[1], 'filename':i[2], 'vector': i[3], 'label':i[4]})


In [None]:
X_total_trm[0]

In [None]:
wine_db = []
for i in range(len(X_total_trm)):
    wine_db.append({'id':i, 'filename': filename_total[i], 'vector': X_total_trm[i], 'label':y_total[i]})
#print(wine_db)

wine_db_pandas = pd.DataFrame(wine_db)
wine_db_pandas.to_csv("wine_db_jj.csv",mode = "w",sep=',')
#print()

#time_pd.to_csv("filename.csv", mode='w')


In [None]:


#df = pd.DataFrame(list_data,index= )

In [None]:
#a = pd.read_csv("wine_db_dj.csv")

In [None]:
#a['vector']