In [None]:
import numpy as np
import cv2
from sklearn.feature_extraction.image import extract_patches
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Flatten, AveragePooling2D, Dropout, Input
from keras import regularizers, optimizers, models, layers, losses, metrics
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from skimage import img_as_float

In [None]:
def load_data(file_path):
    data = cv2.imread(file_path,cv2.IMREAD_UNCHANGED)
    data = img_as_float(data)*255
    return data


# divide each channel of the painting to patches then concatenate channels, also get the list of
# corresponding labels (painter id)
def get_patches(data, patch_size, painter_id):
    pc1 = extract_patches(data[:,:,0], patch_shape = patch_size, extraction_step = patch_size)
    pc1 = pc1.reshape(-1, patch_size, patch_size)
    pc2 = extract_patches(data[:,:,1], patch_shape = patch_size, extraction_step = patch_size)
    pc2 = pc2.reshape(-1, patch_size, patch_size)
    pc3 = extract_patches(data[:,:,2], patch_shape = patch_size, extraction_step = patch_size)
    pc3 = pc3.reshape(-1, patch_size, patch_size)
    pc1_reshaped = pc1.reshape(*pc1.shape,1)
    pc2_reshaped = pc2.reshape(*pc2.shape,1)
    pc3_reshaped = pc3.reshape(*pc3.shape,1)
    patches = np.concatenate((pc3_reshaped,pc3_reshaped,pc2_reshaped),axis=3)
    
    labels = []
    def get_label(painter_id, patch_len):
        labels.clear()
        labels.append(painter_id * patch_len)
        return labels

    list_len = np.ones(len(patches))
    y_list = get_label(painter_id, list_len)
    y_list = np.reshape(y_list,(len(patches),1)) 
                        
    return patches, y_list  # use this when shuffle=False


# preprocess each patches to prepare for transfer learning by subtracting the mean [103.939, 116.779, 123.68]
def preprocess_patches(patch_list):
    patches = preprocess_input(patch_list)
    return patches

# resize patches to 224*224
def resize_patches(patch_list):   
    resize_patches = [None]*len(patch_list)
    for i in range(len(patch_list)):
        resize_patches[i] = cv2.resize(patch_list[i],(224, 224))
    new_list = np.asarray(resize_patches, dtype=np.float64)
    return new_list
    

def process_pipeline(file_path, patch_size, painter_id):
    data = load_data(file_path)
    patch_list, labels = get_patches(data, patch_size, painter_id)
    preprocessed_patches = preprocess_patches(patch_list)
    resized_patches = resize_patches(preprocessed_patches)
    return resized_patches, labels

In [None]:
#psizes = [200, 224]
psizes = [100]#140,160,180,200,224,250,300,75]
#psizes = [800]
#psizes=[100,140,160,180,200,224,250,300,350,400,500,600,700,800]#, 900, 1000, 1100, 1200]


for patch_size in psizes:
    foldnum=100
    
    for fold in range(0, foldnum):
        print('PATCH SIZE: '+repr(patch_size))


        p1a_x, p1a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp1.png', patch_size, 0)
        p1b_x, p1b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp2.png', patch_size, 0)
        p1c_x, p1c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp3.png', patch_size, 0)
        p2a_x, p2a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp4.png', patch_size, 1)
        p2b_x, p2b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp5.png', patch_size, 1)
        p2c_x, p2c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp6.png', patch_size, 1)
        p3a_x, p3a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp7.png', patch_size, 2)
        p3b_x, p3b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp8.png', patch_size, 2)
        p3c_x, p3c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp9.png', patch_size, 2)
        p4a_x, p4a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp10.png', patch_size, 3)
        p4c_x, p4c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp12.png', patch_size, 3)

        p4_b = cv2.imread('/home/ml/FINAL_CODE_DATA/height_data/fgp11.png',cv2.IMREAD_UNCHANGED)  
        p4_b = img_as_float(p4_b)*255
        p4_b = cv2.rotate(p4_b, cv2.ROTATE_180)
        p4b_x, p4b_y = get_patches(p4_b, patch_size, 3)
        p4b_x = resize_patches(preprocess_patches(p4b_x))
        
        del p4_b
        
        masks = np.loadtxt('/home/ml/Gundeep/main/mask_info_with_borders.csv',delimiter=",")
        
        bckg1a_x = p1a_x[masks[0,:]==1,:,:,:]
        bckg1a_y = p1a_y[masks[0,:]==1,:]
        
        bckg1b_x = p1a_x[masks[1,:]==1,:,:,:]
        bckg1b_y = p1a_y[masks[1,:]==1,:]
        
        bckg1c_x = p1c_x[masks[2,:]==1,:,:,:]
        bckg1c_y = p1c_y[masks[2,:]==1,:]
        
        bckg2a_x = p2a_x[masks[3,:]==1,:,:,:]
        bckg2a_y = p2a_y[masks[3,:]==1,:]        
        
        bckg2b_x = p2b_x[masks[4,:]==1,:,:,:]
        bckg2b_y = p2b_y[masks[4,:]==1,:]        
        
        bckg2c_x = p2c_x[masks[5,:]==1,:,:,:]
        bckg2c_y = p2c_y[masks[5,:]==1,:]        
                
        bckg3a_x = p3a_x[masks[6,:]==1,:,:,:]
        bckg3a_y = p3a_y[masks[6,:]==1,:]        
        
        bckg3b_x = p3b_x[masks[7,:]==1,:,:,:]
        bckg3b_y = p3b_y[masks[7,:]==1,:]        
        
        bckg3c_x = p3c_x[masks[8,:]==1,:,:,:]
        bckg3c_y = p3c_y[masks[8,:]==1,:]                
        
        bckg4a_x = p4a_x[masks[9,:]==1,:,:,:]
        bckg4a_y = p4a_y[masks[9,:]==1,:]        
        
        bckg4b_x = p4b_x[masks[10,:]==1,:,:,:]
        bckg4b_y = p4b_y[masks[10,:]==1,:]        
        
        bckg4c_x = p4c_x[masks[11,:]==1,:,:,:]
        bckg4c_y = p4c_y[masks[11,:]==1,:]                
        
        
        frgrd1a_x = p1a_x[masks[0,:]==0,:,:,:]
        frgrd1a_y = p1a_y[masks[0,:]==0,:]
        
        frgrd1b_x = p1a_x[masks[1,:]==0,:,:,:]
        frgrd1b_y = p1a_y[masks[1,:]==0,:]
        
        frgrd1c_x = p1c_x[masks[2,:]==0,:,:,:]
        frgrd1c_y = p1c_y[masks[2,:]==0,:]
        
        frgrd2a_x = p2a_x[masks[3,:]==0,:,:,:]
        frgrd2a_y = p2a_y[masks[3,:]==0,:]        
        
        frgrd2b_x = p2b_x[masks[4,:]==0,:,:,:]
        frgrd2b_y = p2b_y[masks[4,:]==0,:]        
        
        frgrd2c_x = p2c_x[masks[5,:]==0,:,:,:]
        frgrd2c_y = p2c_y[masks[5,:]==0,:]        
                
        frgrd3a_x = p3a_x[masks[6,:]==0,:,:,:]
        frgrd3a_y = p3a_y[masks[6,:]==0,:]        
        
        frgrd3b_x = p3b_x[masks[7,:]==0,:,:,:]
        frgrd3b_y = p3b_y[masks[7,:]==0,:]        
        
        frgrd3c_x = p3c_x[masks[8,:]==0,:,:,:]
        frgrd3c_y = p3c_y[masks[8,:]==0,:]                
        
        frgrd4a_x = p4a_x[masks[9,:]==0,:,:,:]
        frgrd4a_y = p4a_y[masks[9,:]==0,:]        
        
        frgrd4b_x = p4b_x[masks[10,:]==0,:,:,:]
        frgrd4b_y = p4b_y[masks[10,:]==0,:]        
        
        frgrd4c_x = p4c_x[masks[11,:]==0,:,:,:]
        frgrd4c_y = p4c_y[masks[11,:]==0,:]                
        
        del (p1a_x, p2a_x, p3a_x, p4a_x, p1b_x, p2b_x, p3b_x, p4b_x, p1c_x, p2c_x, p3c_x, p4c_x)
        del (p1a_y, p2a_y, p3a_y, p4a_y, p1b_y, p2b_y, p3b_y, p4b_y, p1c_y, p2c_y, p3c_y, p4c_y)
        
        x_train_val = np.concatenate((bckg1a_x, bckg1b_x, bckg1c_x,
                                      bckg2a_x, bckg2b_x, bckg2c_x,
                                      bckg3a_x, bckg3b_x, bckg3c_x,
                                      bckg4a_x, bckg4b_x, bckg4c_x))
        
        y_train_val = np.concatenate((bckg1a_y, bckg1b_y, bckg1c_y,
                                      bckg2a_y, bckg2b_y, bckg2c_y,
                                      bckg3a_y, bckg3b_y, bckg3c_y,
                                      bckg4a_y, bckg4b_y, bckg4c_y))
        '''
        x_train_val = np.concatenate((frgrd1a_x, frgrd1b_x, frgrd1c_x,
                                      frgrd2a_x, frgrd2b_x, frgrd2c_x,
                                      frgrd3a_x, frgrd3b_x, frgrd3c_x,
                                      frgrd4a_x, frgrd4b_x, frgrd4c_x))
        
        y_train_val = np.concatenate((frgrd1a_y, frgrd1b_y, frgrd1c_y,
                                      frgrd2a_y, frgrd2b_y, frgrd2c_y,
                                      frgrd3a_y, frgrd3b_y, frgrd3c_y,
                                      frgrd4a_y, frgrd4b_y, frgrd4c_y))
        
        
        
        del (frgrd1a_x, frgrd1b_x, frgrd1c_x,
             frgrd2a_x, frgrd2b_x, frgrd2c_x,
             frgrd3a_x, frgrd3b_x, frgrd3c_x,
             frgrd4a_x, frgrd4b_x, frgrd4c_x)
        
        del (frgrd1a_y, frgrd1b_y, frgrd1c_y,
             frgrd2a_y, frgrd2b_y, frgrd2c_y,
             frgrd3a_y, frgrd3b_y, frgrd3c_y,
             frgrd4a_y, frgrd4b_y, frgrd4c_y)
        
        '''
        del (bckg1a_x, bckg1b_x, bckg1c_x,
             bckg2a_x, bckg2b_x, bckg2c_x, 
             bckg3a_x, bckg3b_x, bckg3c_x,
             bckg4a_x, bckg4b_x, bckg4c_x)
        
        del (bckg1a_y, bckg1b_y, bckg1c_y,
             bckg2a_y, bckg2b_y, bckg2c_y, 
             bckg3a_y, bckg3b_y, bckg3c_y,
             bckg4a_y, bckg4b_y, bckg4c_y)
        
        
        x_train,x_val,y_train,y_val = train_test_split(x_train_val, y_train_val, test_size=0.1)
        
        del (x_train_val,y_train_val)
        
        
        test1x = np.concatenate((frgrd1a_x, frgrd1b_x, frgrd1c_x))
        test2x = np.concatenate((frgrd2a_x, frgrd2b_x, frgrd2c_x))
        test3x = np.concatenate((frgrd3a_x, frgrd3b_x, frgrd3c_x))
        test4x = np.concatenate((frgrd4a_x, frgrd4b_x, frgrd4c_x))
        '''
        
        
        test1x = np.concatenate((bckg1a_x, bckg1b_x, bckg1c_x))
        test2x = np.concatenate((bckg2a_x, bckg2b_x, bckg2c_x))
        test3x = np.concatenate((bckg3a_x, bckg3b_x, bckg3c_x))
        test4x = np.concatenate((bckg4a_x, bckg4b_x, bckg4c_x))
        '''
        
        test_idx1 = np.arange(len(test1x)).reshape(len(test1x),1)
        test_idx2 = np.arange(len(test2x)).reshape(len(test2x),1)
        test_idx3 = np.arange(len(test3x)).reshape(len(test3x),1)
        test_idx4 = np.arange(len(test4x)).reshape(len(test4x),1)
        
        #x_test = np.concatenate((frgrd1a_x, frgrd1b_x, frgrd1c_x,
        #                         frgrd2a_x, frgrd2b_x, frgrd2c_x,
        #                         frgrd3a_x, frgrd3b_x, frgrd3c_x,
        #                         frgrd4a_x, frgrd4b_x, frgrd4c_x))
        
        '''
        y_test = np.concatenate((bckg1a_y, bckg1b_y, bckg1c_y,
                                 bckg2a_y, bckg2b_y, bckg2c_y,
                                 bckg3a_y, bckg3b_y, bckg3c_y,
                                 bckg4a_y, bckg4b_y, bckg4c_y))
        '''

        y_test = np.concatenate((frgrd1a_y, frgrd1b_y, frgrd1c_y,
                                 frgrd2a_y, frgrd2b_y, frgrd2c_y,
                                 frgrd3a_y, frgrd3b_y, frgrd3c_y,
                                 frgrd4a_y, frgrd4b_y, frgrd4c_y))
        '''
        
        del (bckg1a_x, bckg1b_x, bckg1c_x,
             bckg2a_x, bckg2b_x, bckg2c_x, 
             bckg3a_x, bckg3b_x, bckg3c_x,
             bckg4a_x, bckg4b_x, bckg4c_x)
        
        del (bckg1a_y, bckg1b_y, bckg1c_y,
             bckg2a_y, bckg2b_y, bckg2c_y, 
             bckg3a_y, bckg3b_y, bckg3c_y,
             bckg4a_y, bckg4b_y, bckg4c_y)
        
        '''
        del (frgrd1a_x, frgrd1b_x, frgrd1c_x,
             frgrd2a_x, frgrd2b_x, frgrd2c_x,
             frgrd3a_x, frgrd3b_x, frgrd3c_x,
             frgrd4a_x, frgrd4b_x, frgrd4c_x)
        
        del (frgrd1a_y, frgrd1b_y, frgrd1c_y,
             frgrd2a_y, frgrd2b_y, frgrd2c_y,
             frgrd3a_y, frgrd3b_y, frgrd3c_y,
             frgrd4a_y, frgrd4b_y, frgrd4c_y)

        # one-hot encode y
        y_train = to_categorical(y_train, num_classes=None)
        y_val = to_categorical(y_val, num_classes=None)
        y_test = to_categorical(y_test, num_classes=None)

        baseModel = VGG16(weights="imagenet", include_top=False,input_tensor=Input(shape=(224, 224, 3)))

        model = models.Sequential()
        model.add(baseModel)
        model.add(layers.AveragePooling2D(pool_size=(3, 3)))
        model.add(layers.Flatten())
        model.add(layers.Dropout(0.25))
        model.add(layers.Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.001)))
        model.add(layers.Dropout(0.25))
        model.add(layers.Dense(4, activation="softmax"))

        for layer in baseModel.layers[:]:
            layer.trainable = False
        
        model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])

        direc_path = '/home/ml/Fang/preprocess_vgg16/fbresult/'

        
        filepath=direc_path +"weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        
        # Short training ONLY top layers 
        #... so the conv_base weights will not be destroyed by the random intialization of the new weights
        history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list, verbose=1)
        #load the best top model
        model.load_weights(filepath)
        #model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])
        
        # Make last block of the conv_base trainable:
        for layer in baseModel.layers[:11]:
            layer.trainable = False
        for layer in baseModel.layers[11:]:
            layer.trainable = True

        
        # Compile frozen conv_base + UNfrozen top block + my top layer ... slower learning rate
        model.compile(optimizer=optimizers.Adam(lr = 0.0001),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        #train the unfrozen model
        history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list,verbose=1)

        
        model.load_weights(filepath)

        model.compile(optimizer=optimizers.Adam(lr = 0.0001), loss='categorical_crossentropy', 
                  metrics=['accuracy'])
         
        x_test = np.concatenate((test1x,test2x,test3x,test4x))              
        y_pred = model.predict(x_test)

        ypred = np.argmax(y_pred, axis=1)
        ytest = np.argmax(y_test, axis=1)

        cm = confusion_matrix(ytest, ypred)
        cm_flatten = cm.flatten()
        ps_cm = np.insert(cm_flatten,0,patch_size)
        ps_cm = ps_cm.reshape(1,17)
        
        del (baseModel, checkpoint,callbacks_list)

        test_accuracy = (np.trace(cm))/len(ytest)
        size_update=patch_size
        
        print('RESULT: '+repr(size_update)+', '+repr(test_accuracy)+'\n')
        with open(direc_path+"acc_height_back_train_fore_test.csv", "a") as myfile:
            myfile.write(repr(size_update)+', '+repr(test_accuracy)+'\n')    
        
        p1predict = model.predict(test1x)
        p2predict = model.predict(test2x)
        p3predict = model.predict(test3x)
        p4predict = model.predict(test4x)

        del (test1x, test2x, test3x, test4x)
        
        p1_predict = np.concatenate([test_idx1,p1predict], axis=1)
        p2_predict = np.concatenate([test_idx2,p2predict], axis=1)
        p3_predict = np.concatenate([test_idx3,p3predict], axis=1)
        p4_predict = np.concatenate([test_idx4,p4predict], axis=1)

        with open(direc_path+'heapmap_height_back_train_fore_test_p1_ps100.csv','a') as f:
            np.savetxt(f, p1_predict, fmt='%s')
        with open(direc_path+'heapmap_height_back_train_fore_test_p2_ps100.csv','a') as f:
            np.savetxt(f, p2_predict, fmt='%s')
        with open(direc_path+'heapmap_height_back_train_fore_test_p3_ps100.csv','a') as f:
            np.savetxt(f, p3_predict, fmt='%s')
        with open(direc_path+'heapmap_height_back_train_fore_test_p4_ps100.csv','a') as f:
            np.savetxt(f, p4_predict, fmt='%s')
        with open(direc_path+'cm_height_back_train_fore_testt.csv','a') as f:
            np.savetxt(f, ps_cm, fmt='%s')
            
       # with open("accuracy_resnet_ps200_9010.csv", "a") as myfile:
       #     myfile.write(repr(patch_size)+','+repr(test_accuracy)+'\n')
        
        del (p1_predict,p2_predict,p3_predict,p4_predict, p1predict, p2predict, p3predict, p4predict)
        
        del ( x_train,x_val,y_train,y_val,x_test, y_test, model, y_pred, history )

        
        