In [None]:
import numpy as np
import cv2
from sklearn.feature_extraction.image import extract_patches
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Flatten, AveragePooling2D, Dropout, Input
from keras import regularizers, optimizers, models, layers, losses, metrics
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from skimage import img_as_float

In [None]:
##load data without any changes (high resolution)
def load_data(file_path):
    data = cv2.imread(file_path,cv2.IMREAD_UNCHANGED)
    data = img_as_float(data)*255
    return data

# divide each channel of the painting to patches then concatenate channels, also get the list of
# corresponding labels (painter id)
def get_patches(data, patch_size, painter_id):
    pc1 = extract_patches(data[:,:,0], patch_shape = patch_size, extraction_step = patch_size)
    pc1 = pc1.reshape(-1, patch_size, patch_size)
    pc2 = extract_patches(data[:,:,1], patch_shape = patch_size, extraction_step = patch_size)
    pc2 = pc2.reshape(-1, patch_size, patch_size)
    pc3 = extract_patches(data[:,:,2], patch_shape = patch_size, extraction_step = patch_size)
    pc3 = pc3.reshape(-1, patch_size, patch_size)
    pc1_reshaped = pc1.reshape(*pc1.shape,1)
    pc2_reshaped = pc2.reshape(*pc2.shape,1)
    pc3_reshaped = pc3.reshape(*pc3.shape,1)
    patches = np.concatenate((pc1_reshaped,pc2_reshaped,pc3_reshaped),axis=3)
    
    labels = []
    def get_label(painter_id, patch_len):
        labels.clear()
        labels.append(painter_id * patch_len)
        return labels

    list_len = np.ones(len(patches))
    y_list = get_label(painter_id, list_len)
    y_list = np.reshape(y_list,(len(patches),1)) 
                        
    return patches, y_list  # use this when shuffle=False


# preprocess each patches to prepare for transfer learning by subtracting the mean [103.939, 116.779, 123.68]
def preprocess_patches(patch_list):
    patches = preprocess_input(patch_list)
    return patches

# resize patches to 224*224
def resize_patches(patch_list):   
    resize_patches = [None]*len(patch_list)
    for i in range(len(patch_list)):
        resize_patches[i] = cv2.resize(patch_list[i],(224, 224))
    new_list = np.asarray(resize_patches, dtype=np.float64)
    return new_list
    

def process_pipeline(file_path, patch_size, painter_id):
    data = load_data(file_path)
    patch_list, labels = get_patches(data, patch_size, painter_id)
    preprocessed_patches = preprocess_patches(patch_list)
    resized_patches = resize_patches(preprocessed_patches)
    return resized_patches, labels


In [None]:
psizes = [300]

for patch_size in psizes:
    print('PATCH SIZE: '+repr(patch_size))
    
    p1a_x, p1a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp1.png', patch_size, 0)
    p1b_x, p1b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp2.png', patch_size, 0)
    p1c_x, p1c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp3.png', patch_size, 0)
    p2a_x, p2a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp4.png', patch_size, 1)
    p2b_x, p2b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp5.png', patch_size, 1)
    p2c_x, p2c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp6.png', patch_size, 1)
    p3a_x, p3a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp7.png', patch_size, 2)
    p3b_x, p3b_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp8.png', patch_size, 2)
    p3c_x, p3c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp9.png', patch_size, 2)
    p4a_x, p4a_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp10.png', patch_size, 3)
    p4c_x, p4c_y = process_pipeline('/home/ml/FINAL_CODE_DATA/height_data/fgp12.png', patch_size, 3)

    p4_b = cv2.imread('/home/ml/FINAL_CODE_DATA/height_data/fgp11.png',cv2.IMREAD_UNCHANGED)  
    p4_b = img_as_float(p4_b)*255
    p4_b = cv2.rotate(p4_b, cv2.ROTATE_180)
    p4b_x, p4b_y = get_patches(p4_b, patch_size, 3)
    p4b_x = resize_patches(preprocess_patches(p4b_x))
    del p4_b

    x_train_val = np.concatenate((p1a_x, p1c_x, 
                                  p2a_x, p2c_x, 
                                  p3a_x, p3c_x, 
                                  p4a_x, p4c_x))
    y_train_val = np.concatenate((p1a_y, p1c_y, 
                                  p2a_y, p2c_y, 
                                  p3a_y, p3c_y, 
                                  p4a_y, p4c_y))
    del(p1a_x,p1a_y,p2a_x,p2a_y,p3a_x,p3a_y,p4a_x,p4a_y)
    del(p1c_x,p1c_y,p2c_x,p2c_y,p3c_x,p3c_y,p4c_x,p4c_y)
    
    foldnum=20
    
    for fold in range(0, foldnum):
    
        ##A&C as training+validation data
        
        
        x_train,x_val,y_train,y_val = train_test_split(x_train_val, y_train_val, test_size=0.1)
        
        x_test = np.concatenate((p1b_x, p2b_x, p3b_x, p4b_x))
        
        # one-hot encode y
        y_train = to_categorical(y_train, num_classes=None)
        y_val = to_categorical(y_val, num_classes=None)
        
        y_test = np.concatenate((p1b_y, p2b_y, p3b_y, p4b_y))
        y_test = to_categorical(y_test, num_classes=None)

        test_idx = np.arange(len(p1b_x)).reshape(len(p1b_x),1)
        
        
        #################
        #if run into "value error", copy the rest of the code to a new cell and rerun it
        # "value error" happens when the frozen network performs better than the network with layers unlocked
        #################
        
        baseModel = VGG16(weights="imagenet", include_top=False,input_tensor=Input(shape=(224, 224, 3)))

        model = models.Sequential()
        model.add(baseModel)
        model.add(layers.AveragePooling2D(pool_size=(3, 3)))
        model.add(layers.Flatten())
        model.add(layers.Dropout(0.25))
        model.add(layers.Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.001)))
        model.add(layers.Dropout(0.25))
        model.add(layers.Dense(4, activation="softmax"))
        
        try:
            for layer in baseModel.layers[:]:
                layer.trainable = False

        
            model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])

            filepath="weights.best.hdf5"
            checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
            callbacks_list = [checkpoint]
        
            # training only top layers 
            history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list, verbose=2)
            #load the best top model
            model.load_weights("weights.best.hdf5")
            #model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])
        
            # Make last block of the conv_base trainable:
            for layer in baseModel.layers[:11]:
                layer.trainable = False
            for layer in baseModel.layers[11:]:
                layer.trainable = True

        
            # Compile frozen conv_base + UNfrozen top block + my top layer, use a slower learning rate
            model.compile(optimizer=optimizers.Adam(lr = 0.0001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            #train the unfrozen model
            history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list,verbose=2)

        
            model.load_weights("weights.best.hdf5")

            model.compile(optimizer=optimizers.Adam(lr = 0.0001), loss='categorical_crossentropy', 
                      metrics=['accuracy'])
        
            y_pred = model.predict(x_test)
            ypred = np.argmax(y_pred, axis=1)
            ytest = np.argmax(y_test, axis=1)

            cm = confusion_matrix(ytest, ypred)
            cm_flatten = cm.flatten()
            ps_cm = np.insert(cm_flatten,0,patch_size)
            ps_cm = ps_cm.reshape(1,17)

            test_accuracy = (np.trace(cm))/len(ytest)
            print('RESULT: '+repr(patch_size)+', '+repr(test_accuracy)+'\n')
        
            p1predict = model.predict(p1b_x)
            p2predict = model.predict(p2b_x)
            p3predict = model.predict(p3b_x)
            p4predict = model.predict(p4b_x)

            p1_predict = np.concatenate([test_idx,p1predict], axis=1)
            p2_predict = np.concatenate([test_idx,p2predict], axis=1)
            p3_predict = np.concatenate([test_idx,p3predict], axis=1)
            p4_predict = np.concatenate([test_idx,p4predict], axis=1)

            with open('height_p1_ps300.csv','a') as f:
                np.savetxt(f, p1_predict, fmt='%s')
            with open('height_p2_ps300.csv','a') as f:
                np.savetxt(f, p2_predict, fmt='%s')
            with open('height_p3_ps300.csv','a') as f:
                np.savetxt(f, p3_predict, fmt='%s')
            with open('height_p4_ps300.csv','a') as f:
                np.savetxt(f, p4_predict, fmt='%s')
            with open('height_cm_ps300.csv','a') as f:
                np.savetxt(f, ps_cm, fmt='%s')
            
            with open("height_accuracy_ps300.csv", "a") as myfile:
                myfile.write(repr(patch_size)+','+repr(test_accuracy)+'\n')
            
            report = classification_report(ytest, ypred,output_dict=True)
            p1_report = np.asarray([report['0']['f1-score'] ])
            p1_report = np.insert(p1_report,0,patch_size)
            p1_report = p1_report.reshape(1,2)

            p2_report = np.asarray([report['1']['f1-score'] ])
            p2_report = np.insert(p2_report,0,patch_size)
            p2_report = p2_report.reshape(1,2)

            p3_report = np.asarray([report['2']['f1-score'] ])
            p3_report = np.insert(p3_report,0,patch_size)
            p3_report = p3_report.reshape(1,2)

            p4_report = np.asarray([report['3']['f1-score'] ])
            p4_report = np.insert(p4_report,0,patch_size)
            p4_report = p4_report.reshape(1,2)

            overall = np.asarray([report['accuracy']])
            overall = np.insert(overall,0,patch_size)
            overall = overall.reshape(1,2)
        

            with open('height_p1_f1_ps300.csv','a') as f:
                np.savetxt(f, p1_report, fmt='%s')
            with open('height_p2_f1_ps300.csv','a') as f:
                np.savetxt(f, p2_report, fmt='%s')
            with open('height_p3_f1_ps300.csv','a') as f:
                np.savetxt(f, p3_report, fmt='%s')
            with open('height_p4_f1_ps300.csv','a') as f:
                np.savetxt(f, p4_report, fmt='%s')
            with open('height_overall_ps300.csv','a') as f:
                np.savetxt(f, overall, fmt='%s')
        
            
            del(x_train,x_val,x_test,y_train,y_val,y_test,test_idx,p1predict,p2predict,p3predict,p4predict,p1_predict,p2_predict,p3_predict,p4_predict,model,ps_cm,cm,history)
            del(y_pred,ypred,ytest,test_accuracy,filepath)
            del(p1_report,p2_report,p3_report,p4_report,overall)
        except:
            pass

In [None]:
baseModel = VGG16(weights="imagenet", include_top=False,input_tensor=Input(shape=(224, 224, 3)))

model = models.Sequential()
model.add(baseModel)
model.add(layers.AveragePooling2D(pool_size=(3, 3)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.25))
model.add(layers.Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.25))
model.add(layers.Dense(4, activation="softmax"))

for layer in baseModel.layers[:]:
    layer.trainable = False

        
model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])

filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
        
# Short training ONLY top layers 
#... so the conv_base weights will not be destroyed by the random intialization of the new weights
history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list, verbose=2)
#load the best top model
model.load_weights("weights.best.hdf5")
#model.compile(optimizer=optimizers.Adam(lr = 0.001), loss='categorical_crossentropy', metrics=['accuracy'])
        
# Make last block of the conv_base trainable:
for layer in baseModel.layers[:11]:
    layer.trainable = False
for layer in baseModel.layers[11:]:
    layer.trainable = True

        
# Compile frozen conv_base + UNfrozen top block + my top layer ... slower learning rate
model.compile(optimizer=optimizers.Adam(lr = 0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

#train the unfrozen model
history = model.fit(x_train, y_train, epochs=25, batch_size=32, validation_data=(x_val,y_val),shuffle=True,callbacks=callbacks_list,verbose=2)

        
model.load_weights("weights.best.hdf5")

model.compile(optimizer=optimizers.Adam(lr = 0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
        
y_pred = model.predict(x_test)
ypred = np.argmax(y_pred, axis=1)
ytest = np.argmax(y_test, axis=1)

cm = confusion_matrix(ytest, ypred)

test_accuracy = (np.trace(cm))/len(ytest)
print('RESULT: '+repr(patch_size)+', '+repr(test_accuracy)+'\n')
        
p1predict = model.predict(p1b_x)
p2predict = model.predict(p2b_x)
p3predict = model.predict(p3b_x)
p4predict = model.predict(p4b_x)

p1_predict = np.concatenate([test_idx,p1predict], axis=1)
p2_predict = np.concatenate([test_idx,p2predict], axis=1)
p3_predict = np.concatenate([test_idx,p3predict], axis=1)
p4_predict = np.concatenate([test_idx,p4predict], axis=1)

with open('heapmap_p1_ps140_9010.csv','a') as f:
            np.savetxt(f, p1_predict, fmt='%s')
with open('heapmap_p2_ps140_9010.csv','a') as f:
            np.savetxt(f, p2_predict, fmt='%s')
with open('heapmap_p3_ps140_9010.csv','a') as f:
            np.savetxt(f, p3_predict, fmt='%s')
with open('heapmap_p4_ps140_9010.csv','a') as f:
            np.savetxt(f, p4_predict, fmt='%s')
with open('cm_height_vgg16_ps140_9010.csv','a') as f:
            np.savetxt(f, ps_cm, fmt='%s')
            
with open("accuracy_vgg16_ps140_9010.csv", "a") as myfile:
            myfile.write(repr(patch_size)+','+repr(test_accuracy)+'\n')

In [None]:
import numpy as np
np.loadtxt('heapmap_p1_200_8020.csv')

In [None]:
np.random.choice(np.arange(0,3), 3, replace=False)[0:2]

In [None]:
np.concatenate((np.random.choice(np.arange(0,3), 3, replace=False)[0:2],
                np.random.choice(np.arange(3,6), 3, replace=False)[0:2],
                np.random.choice(np.arange(6,9), 3, replace=False)[0:2],
                np.random.choice(np.arange(9,12), 3, replace=False)[0:2]))

In [None]:
pick_test = np.asarray([np.random.choice(np.arange(0,3), 3, replace=False)[0],
                                    np.random.choice(np.arange(3,6), 3, replace=False)[0],
                                    np.random.choice(np.arange(6,9), 3, replace=False)[0],
                                    np.random.choice(np.arange(9,12), 3, replace=False)[0]])

In [None]:
pick_test

In [None]:
np.asarray([np.random.choice(np.arange(0,3), 3, replace=False)[0],
                 np.random.choice(np.arange(3,6), 3, replace=False)[0],
                 np.random.choice(np.arange(6,9), 3, replace=False)[0],
                 np.random.choice(np.arange(9,12), 3, replace=False)[0]])