In [1]:
import numpy as np
import cv2
from sklearn.feature_extraction.image import extract_patches
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Flatten, AveragePooling2D, Dropout, Input
from tensorflow.keras import regularizers, optimizers, models, layers, losses, metrics
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from skimage import img_as_float
import os

In [2]:
#load data
def load_data(file_path):
    data = cv2.imread(file_path,cv2.IMREAD_UNCHANGED)
    data = (data/65535)*255
    return data


# divide each channel of the painting to patches then concatenate channels, also get the list of
# corresponding labels (painter id)
def get_patches(data, patch_size, painter_id):
    pc1 = extract_patches(data[:,:,0], patch_shape = patch_size, extraction_step = patch_size)
    pc1 = pc1.reshape(-1, patch_size, patch_size)
    pc2 = extract_patches(data[:,:,1], patch_shape = patch_size, extraction_step = patch_size)
    pc2 = pc2.reshape(-1, patch_size, patch_size)
    pc3 = extract_patches(data[:,:,2], patch_shape = patch_size, extraction_step = patch_size)
    pc3 = pc3.reshape(-1, patch_size, patch_size)
    pc1_reshaped = pc1.reshape(*pc1.shape,1)
    pc2_reshaped = pc2.reshape(*pc2.shape,1)
    pc3_reshaped = pc3.reshape(*pc3.shape,1)
    patches = np.concatenate((pc1_reshaped,pc2_reshaped,pc3_reshaped),axis=3)

    
    labels = []
    def get_label(painter_id, patch_len):
        labels.clear()
        labels.append(painter_id * patch_len)
        return labels

    list_len = np.ones(len(patches))
    y_list = get_label(painter_id, list_len)
    y_list = np.reshape(y_list,(len(patches),1)) 
                        
    return patches, y_list  # use this when shuffle=False


# preprocess each patches to prepare for transfer learning by subtracting the mean [103.939, 116.779, 123.68]
def preprocess_patches(patch_list):
    patches = preprocess_input(patch_list)
    return patches

# resize patches to 224*224
def resize_patches(patch_list):   
    resize_patches = [None]*len(patch_list)
    for i in range(len(patch_list)):
        resize_patches[i] = cv2.resize(patch_list[i],(224, 224))
    new_list = np.asarray(resize_patches, dtype=np.float64)
    return new_list
    
# put all the previous step together
def process_pipeline(file_path, patch_size, painter_id):
    data = load_data(file_path)
    patch_list, labels = get_patches(data, patch_size, painter_id)
    preprocessed_patches = preprocess_patches(patch_list)
    resized_patches = resize_patches(preprocessed_patches)
    return resized_patches, labels

In [3]:

p1a_imf_arr = ['emd_painting_1_imf_1.png','emd_painting_1_imf_2.png','emd_painting_1_imf_3.png','emd_painting_1_imf_4.png','emd_painting_1_imf_5.png']
p1b_imf_arr = ['emd_painting_2_imf_1.png','emd_painting_2_imf_2.png','emd_painting_2_imf_3.png','emd_painting_2_imf_4.png','emd_painting_2_imf_5.png']
p1c_imf_arr = ['emd_painting_3_imf_1.png','emd_painting_3_imf_2.png','emd_painting_3_imf_3.png','emd_painting_3_imf_4.png','emd_painting_3_imf_5.png']
p2a_imf_arr = ['emd_painting_4_imf_1.png','emd_painting_4_imf_2.png','emd_painting_4_imf_3.png','emd_painting_4_imf_4.png','emd_painting_4_imf_5.png']
p2b_imf_arr = ['emd_painting_5_imf_1.png','emd_painting_5_imf_2.png','emd_painting_5_imf_3.png','emd_painting_5_imf_4.png','emd_painting_5_imf_5.png']
p2c_imf_arr = ['emd_painting_6_imf_1.png','emd_painting_6_imf_2.png','emd_painting_6_imf_3.png','emd_painting_6_imf_4.png','emd_painting_6_imf_5.png']
p3a_imf_arr = ['emd_painting_7_imf_1.png','emd_painting_7_imf_2.png','emd_painting_7_imf_3.png','emd_painting_7_imf_4.png','emd_painting_7_imf_5.png']
p3b_imf_arr = ['emd_painting_8_imf_1.png','emd_painting_8_imf_2.png','emd_painting_8_imf_3.png','emd_painting_8_imf_4.png','emd_painting_8_imf_5.png']
p3c_imf_arr = ['emd_painting_9_imf_1.png','emd_painting_9_imf_2.png','emd_painting_9_imf_3.png','emd_painting_9_imf_4.png','emd_painting_9_imf_5.png']
p4a_imf_arr = ['emd_painting_10_imf_1.png','emd_painting_10_imf_2.png','emd_painting_10_imf_3.png','emd_painting_10_imf_4.png','emd_painting_10_imf_5.png']
p4b_imf_arr = ['emd_painting_11_imf_1.png','emd_painting_11_imf_2.png','emd_painting_11_imf_3.png','emd_painting_11_imf_4.png','emd_painting_11_imf_5.png']
p4c_imf_arr = ['emd_painting_12_imf_1.png','emd_painting_12_imf_2.png','emd_painting_12_imf_3.png','emd_painting_12_imf_4.png','emd_painting_12_imf_5.png']


for imf_i in [3]: #range(len(p1a_imf_arr)): # loop through imfs, imf_i = 0-4 representing imf1-5
    
    psizes = [200] #patch size in pixels

    for patch_size in psizes:
        print('PATCH SIZE: '+repr(patch_size))
        # get a list of patches (x) with corresponding painter if (y) for all 12 paintings
        p1a_x, p1a_y = process_pipeline(p1a_imf_arr[imf_i], patch_size, 0)
        p1b_x, p1b_y = process_pipeline(p1b_imf_arr[imf_i], patch_size, 0)
        p1c_x, p1c_y = process_pipeline(p1c_imf_arr[imf_i], patch_size, 0)
        p2a_x, p2a_y = process_pipeline(p2a_imf_arr[imf_i], patch_size, 1)
        p2b_x, p2b_y = process_pipeline(p2b_imf_arr[imf_i], patch_size, 1)
        p2c_x, p2c_y = process_pipeline(p2c_imf_arr[imf_i], patch_size, 1)
        p3a_x, p3a_y = process_pipeline(p3a_imf_arr[imf_i], patch_size, 2)
        p3b_x, p3b_y = process_pipeline(p3b_imf_arr[imf_i], patch_size, 2)
        p3c_x, p3c_y = process_pipeline(p3c_imf_arr[imf_i], patch_size, 2)
        p4a_x, p4a_y = process_pipeline(p4a_imf_arr[imf_i], patch_size, 3)
        p4c_x, p4c_y = process_pipeline(p4c_imf_arr[imf_i], patch_size, 3)
        p4_b = cv2.imread(p4b_imf_arr[imf_i],cv2.IMREAD_UNCHANGED)
        p4_b = (p4_b/65535)*255
        p4_b = cv2.rotate(p4_b, cv2.ROTATE_180) # this painting is upside down so needs to be rotated
        p4b_x, p4b_y = get_patches(p4_b, patch_size, 3)
        p4b_x = resize_patches(preprocess_patches(p4b_x))

        x_train_val = np.concatenate((p1a_x, p1c_x, 
                                      p2a_x, p2c_x, 
                                      p3a_x, p3c_x, 
                                      p4a_x, p4c_x))
        y_train_val = np.concatenate((p1a_y, p1c_y, 
                                      p2a_y, p2c_y, 
                                      p3a_y, p3c_y, 
                                      p4a_y, p4c_y))
        del(p1a_x,p1a_y,p2a_x,p2a_y,p3a_x,p3a_y,p4a_x,p4a_y)
        del(p1c_x,p1c_y,p2c_x,p2c_y,p3c_x,p3c_y,p4c_x,p4c_y)


        foldnum=20

        for fold in range(0, foldnum):
            print('PATCH SIZE: '+repr(patch_size))

            x_train,x_val,y_train,y_val = train_test_split(x_train_val, y_train_val, test_size=0.1)

            x_test = np.concatenate((p1b_x, p2b_x, p3b_x, p4b_x))

            # one-hot encode y
            y_train = to_categorical(y_train, num_classes=None)
            y_val = to_categorical(y_val, num_classes=None)

            y_test = np.concatenate((p1b_y, p2b_y, p3b_y, p4b_y))
            y_test = to_categorical(y_test, num_classes=None)

            # get index for all the patches in the testing painting
            test_idx = np.arange(len(p1b_x)).reshape(len(p1b_x),1)


            #################
            #if run into "value error", copy the rest of the code to a new cell and rerun it
            # "value error" happens when the frozen network performs better than the network with layers unlocked
            #################

            baseModel = VGG16(weights="imagenet", include_top=False,input_tensor=Input(shape=(224, 224, 3)))

            model = models.Sequential()
            model.add(baseModel)
            model.add(layers.AveragePooling2D(pool_size=(3, 3)))
            model.add(layers.Flatten())
            model.add(layers.Dropout(0.25))
            model.add(layers.Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.001)))
            model.add(layers.Dropout(0.25))
            model.add(layers.Dense(4, activation="softmax"))

            for layer in baseModel.layers[:]:
                layer.trainable = False


            model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])

            filepath= "weights.best.hdf5"
            checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
            callbacks_list = [checkpoint]
            
            try:
                # train ONLY top layers 
                history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list, verbose=2)

                #load the best top model
                model.load_weights(filepath)

                # Make last two blocks of the baseModel trainable:
                for layer in baseModel.layers[:11]:
                    layer.trainable = False
                for layer in baseModel.layers[11:]:
                    layer.trainable = True


                # Compile frozen baseModel + unfrozen top block + my top layer
                model.compile(optimizer=optimizers.Adam(lr = 0.0001),
                              loss='categorical_crossentropy',
                              metrics=['accuracy'])

                #train with a slower learning rate
                history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list,verbose=2)

                try:
                    model.load_weights(filepath)

                    model.compile(optimizer=optimizers.Adam(lr = 0.0001), loss='categorical_crossentropy', 
                              metrics=['accuracy'])

                    '''   
                    if os.path.exists(filepath):
                        os.remove("demofile.txt")
                    else:
                        print("The file does not exist")


                    path_del = (filepath)     
                    try:
                        os.rmdir(filepath)
                    except OSError:
                        print ("Deletion of the file %s failed" % filepath)
                    else:
                        print ("Successfully deleted the file %s" % filepath)
                    '''

                    y_pred = model.predict(x_test)
                    ypred = np.argmax(y_pred, axis=1)
                    ytest = np.argmax(y_test, axis=1)
                    
                    # resulting coufusion matrix
                    cm = confusion_matrix(ytest, ypred)
                    cm_flatten = cm.flatten()
                    ps_cm = np.insert(cm_flatten,0,patch_size)
                    ps_cm = ps_cm.reshape(1,17)

                    test_accuracy = (np.trace(cm))/len(ytest)
                    print('RESULT: '+repr(patch_size)+', '+repr(test_accuracy)+'\n')

                    p1predict = model.predict(p1b_x)
                    p2predict = model.predict(p2b_x)
                    p3predict = model.predict(p3b_x)
                    p4predict = model.predict(p4b_x)

                    p1_predict = np.concatenate([test_idx,p1predict], axis=1)
                    p2_predict = np.concatenate([test_idx,p2predict], axis=1)
                    p3_predict = np.concatenate([test_idx,p3predict], axis=1)
                    p4_predict = np.concatenate([test_idx,p4predict], axis=1)

                    report = classification_report(ytest, ypred,output_dict=True)
                    p1_report = np.asarray([report['0']['f1-score']])
                    p1_report = np.insert(p1_report,0,patch_size)
                    p1_report = p1_report.reshape(1,2)

                    p2_report = np.asarray([report['1']['f1-score']])
                    p2_report = np.insert(p2_report,0,patch_size)
                    p2_report = p2_report.reshape(1,2)

                    p3_report = np.asarray([report['2']['f1-score']])
                    p3_report = np.insert(p3_report,0,patch_size)
                    p3_report = p3_report.reshape(1,2)

                    p4_report = np.asarray([report['3']['f1-score']])
                    p4_report = np.insert(p4_report,0,patch_size)
                    p4_report = p4_report.reshape(1,2)

                    overall = np.asarray([report['accuracy'],report['macro avg']['f1-score'],report['weighted avg']['f1-score'] ])
                    overall = np.insert(overall,0,patch_size)
                    overall = overall.reshape(1,4)


                    with open('p1_report_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p1_report, fmt='%s')
                    with open('p2_report_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p2_report, fmt='%s')
                    with open('p3_report_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p3_report, fmt='%s')
                    with open('p4_report_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p4_report, fmt='%s')
                    with open('overall_report_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, overall, fmt='%s')


                    with open('heapmap_p1_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p1_predict, fmt='%s')
                    with open('heapmap_p2_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p2_predict, fmt='%s')
                    with open('heapmap_p3_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p3_predict, fmt='%s')
                    with open('heapmap_p4_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, p4_predict, fmt='%s')
                    with open('cm_height_vgg16_ps'+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv','a') as f:
                        np.savetxt(f, ps_cm, fmt='%s')

                    with open("accuracy_vgg16_ps"+repr(patch_size)+'_emd_mike_individual_imf'+repr(imf_i+1)+'.csv', "a") as myfile:
                        myfile.write(repr(patch_size)+','+repr(test_accuracy)+'\n')

                    del(p1predict,p2predict,p3predict,p4predict,p1_predict,p2_predict,p3_predict,p4_predict,model,ps_cm,cm,history)

                except:
                    pass   
            except:
                pass


PATCH SIZE: 200




PATCH SIZE: 200
Train on 1296 samples, validate on 144 samples
Epoch 1/25

Epoch 00001: val_accuracy improved from -inf to 0.48611, saving model to weights.best.hdf5
1296/1296 - 19s - loss: 1.7954 - accuracy: 0.4005 - val_loss: 1.2740 - val_accuracy: 0.4861
Epoch 2/25

Epoch 00002: val_accuracy improved from 0.48611 to 0.52083, saving model to weights.best.hdf5
1296/1296 - 12s - loss: 1.2314 - accuracy: 0.5008 - val_loss: 1.1648 - val_accuracy: 0.5208
Epoch 3/25

Epoch 00003: val_accuracy improved from 0.52083 to 0.59028, saving model to weights.best.hdf5
1296/1296 - 12s - loss: 1.1326 - accuracy: 0.5579 - val_loss: 1.0726 - val_accuracy: 0.5903
Epoch 4/25

Epoch 00004: val_accuracy did not improve from 0.59028
1296/1296 - 12s - loss: 1.0261 - accuracy: 0.5995 - val_loss: 1.0139 - val_accuracy: 0.5764
Epoch 5/25

Epoch 00005: val_accuracy did not improve from 0.59028
1296/1296 - 12s - loss: 0.9576 - accuracy: 0.6404 - val_loss: 1.0350 - val_accuracy: 0.5833
Epoch 6/25

Epoch 00006: val

Epoch 25/25

Epoch 00025: val_accuracy did not improve from 0.68750
1296/1296 - 17s - loss: 0.1335 - accuracy: 0.9745 - val_loss: 1.7258 - val_accuracy: 0.6181
RESULT: 200, 0.5208333333333334

PATCH SIZE: 200
Train on 1296 samples, validate on 144 samples
Epoch 1/25

Epoch 00001: val_accuracy improved from -inf to 0.40278, saving model to weights.best.hdf5
1296/1296 - 13s - loss: 1.9367 - accuracy: 0.3565 - val_loss: 1.3881 - val_accuracy: 0.4028
Epoch 2/25

Epoch 00002: val_accuracy improved from 0.40278 to 0.56944, saving model to weights.best.hdf5
1296/1296 - 12s - loss: 1.3069 - accuracy: 0.4468 - val_loss: 1.1513 - val_accuracy: 0.5694
Epoch 3/25

Epoch 00003: val_accuracy improved from 0.56944 to 0.58333, saving model to weights.best.hdf5
1296/1296 - 12s - loss: 1.1218 - accuracy: 0.5486 - val_loss: 1.1008 - val_accuracy: 0.5833
Epoch 4/25

Epoch 00004: val_accuracy did not improve from 0.58333
1296/1296 - 12s - loss: 1.0136 - accuracy: 0.6049 - val_loss: 1.0728 - val_accuracy: 0

Epoch 24/25

Epoch 00024: val_accuracy did not improve from 0.74306
1296/1296 - 17s - loss: 0.1530 - accuracy: 0.9522 - val_loss: 2.2128 - val_accuracy: 0.6042
Epoch 25/25

Epoch 00025: val_accuracy did not improve from 0.74306
1296/1296 - 17s - loss: 0.1776 - accuracy: 0.9576 - val_loss: 1.7481 - val_accuracy: 0.6736
RESULT: 200, 0.49722222222222223



In [None]:

test_accuracy 
