**Save features from VGG16 Conv layer**

Using first convolutional layer from convolutional block 5\
512 feature maps with a size of 14x14

In [17]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.applications import VGG16

batch_size = 64

def save_image_features():
    # build model from VGG16 network, only up to the first convolutional layer from convolutional block 5
    base_model = VGG16(include_top=False, weights='imagenet')
    base_model.trainable = False
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_conv1').output)
#    model.summary()

    # image generator from directory
    datagen = ImageDataGenerator(rescale=1. / 255)

    generator = datagen.flow_from_directory(
        '../Labels_Style/Train',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    
    # obtain covolutional features
    features_train = model.predict(generator)
    
    # save training features
    np.save(open('../data/vgg16conv51_features_train.npy', 'wb'),
            features_train)

    # save training labels
    labels = [int(filepath.split('/')[-2]) for filepath in generator.filepaths]
    np.save(open('../data/labels_train.npy', 'wb'), labels)
    np.save(open('../data/files_train.npy', 'wb'), generator.filepaths)
    

    generator = datagen.flow_from_directory(
        '../Labels_Style/Test',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    
    # obtain covolutional features
    features_test = model.predict(generator)
    
    # save test features
    np.save(open('../data/vgg16conv51_features_test.npy', 'wb'),
            features_test)

    # save test labels
    labels = [int(filepath.split('/')[-2]) for filepath in generator.filepaths]
    np.save(open('../data/labels_test.npy', 'wb'), labels)
    np.save(open('../data/files_test.npy', 'wb'), generator.filepaths)

    

In [18]:
save_image_features()

Found 1250 images belonging to 13 classes.
Found 1088 images belonging to 13 classes.


**Generate Gram matrix**

In [19]:
import numpy as np
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler

def save_gram_features(scale=False):

    # Load and reshape training data
    # Original shape (, 14, 14, 512)
    # Resulting shape after vectorizing feature maps (, 196, 512)
    train_data = np.load(open('../data/vgg16conv51_features_train.npy', 'rb'))
    train_data = train_data.reshape(train_data.shape[0], -1, 512)
    test_data = np.load(open('../data/vgg16conv51_features_test.npy', 'rb'))
    test_data = test_data.reshape(test_data.shape[0], -1, 512)
    #print(train_data.shape)

    # Calculate Gram matrix values as dot product of feature map vectors: 196x512 -> 512x512
    # Vectorize upper diagonal (of symmetrical matrix) to vector of length 512*513/2 = 131328 

    # should be the same, except for the selection of upper diagonal, as:
    #G_train = np.matmul(np.transpose(train_data, (0, 2, 1)), train_data)
    #G_train = G_train.reshape(G_train.shape[0], -1)

    G_train = np.zeros((train_data.shape[0], 131328))
    for k in range(train_data.shape[0]):
        g = train_data[k,].T.dot(train_data[k,])
        G_train[k,] = g[np.triu_indices_from(g)]

    G_test = np.zeros((test_data.shape[0], 131328))
    for k in range(test_data.shape[0]):
        g = test_data[k,].T.dot(test_data[k,])
        G_test[k,] = g[np.triu_indices_from(g)]

    #print(G_train.shape)

    # Reduce dimensions by PCA
    if scale:
        scaler = StandardScaler().fit(G_train)
        G_train = scaler.transform(G_train)
        G_test = scaler.transform(G_test)

    pca = decomposition.PCA(n_components=1024).fit(G_train)
    x_train = pca.transform(G_train)
    x_test = pca.transform(G_test)

    #print(x_train.shape)
    
    np.save(open('../data/gram_features_train.npy', 'wb'), x_train)
    np.save(open('../data/gram_features_test.npy', 'wb'), x_test)


In [20]:
save_gram_features()

In [6]:
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score


x_train = np.load(open('../data/gram_features_train.npy', 'rb'))
x_test = np.load(open('../data/gram_features_test.npy', 'rb'))

train_labels = np.load(open('../data/labels_train.npy', 'rb'))
test_labels = np.load(open('../data/labels_test.npy', 'rb'))


clf = svm.SVC(kernel='linear', C=1)

print("5-fold CV accuracy using all data (2338 instances)")
data = np.concatenate((x_train, x_test), axis=0)
labels = np.concatenate((train_labels, test_labels), axis=0)
k_fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
print(cross_val_score(clf, data, labels, cv=k_fold, n_jobs=-1))


print("\nAccuracy on test set")
clf.fit(x_train, train_labels)
print(clf.score(x_test, test_labels))




5-fold CV accuracy using all data (2338 instances)
[0.61324786 0.62393162 0.63034188 0.66809422 0.6124197 ]

Accuracy on test set
0.6222426470588235


**Prepare combined data representations**

In [21]:
import numpy as np


def combined_features(as_nparray=False):

    x_train = np.load(open('../data/gram_features_train.npy', 'rb'))
    x_test = np.load(open('../data/gram_features_test.npy', 'rb'))
    train_files = np.load(open('../data/files_train.npy', 'rb'))
    test_files = np.load(open('../data/files_test.npy', 'rb'))

    nc_data_tmp = np.load("../data/nc_data.npy", allow_pickle=True).item()
    nc_data_64 = dict()
    nc_data = dict()
    nc_data_1024 = dict()
    hdc_data = dict()
    alpha_data = dict()
    for i, painting in enumerate(nc_data_tmp['painting']):
        painting = painting.split('.')[0]
        print(painting)
        nc_data_64[painting] = nc_data_tmp['nc_64'][i]
        nc_data[painting] = nc_data_tmp['nc_256'][i]
        nc_data_1024[painting] = nc_data_tmp['nc_1024'][i]
        hdc_data[painting] = nc_data_tmp['hdc'][i]
        alpha_data[painting] = nc_data_tmp['alpha'][i]
    
    x_train_nc_64 = np.zeros((train_files.shape[0], 64))
    x_train_nc_1024 = np.zeros((train_files.shape[0], 1024))
    x_train_nc_256 = np.zeros((train_files.shape[0], 256))
    x_train_hdc = np.zeros((train_files.shape[0], 89))
    x_train_alpha = np.zeros((train_files.shape[0], 1))
    
    for i, painting in enumerate(train_files):
        painting = painting.split('/')[-1].split('.')[0]
        x_train_nc_64[i, ] = nc_data_64[painting]
        x_train_nc_256[i, ] = nc_data[painting]
        x_train_nc_1024[i, ] = nc_data_1024[painting]
        x_train_hdc[i, ] = hdc_data[painting]
        x_train_alpha[i, ] = alpha_data[painting]
    
    x_test_nc_64 = np.zeros((test_files.shape[0], 64))
    x_test_nc_1024 = np.zeros((test_files.shape[0], 1024))
    x_test_nc_256 = np.zeros((test_files.shape[0], 256))
    x_test_hdc = np.zeros((test_files.shape[0], 89))
    x_test_alpha = np.zeros((test_files.shape[0], 1))
    for i, painting in enumerate(test_files):
        painting = painting.split('/')[-1].split('.')[0]
        x_test_nc_64[i, ] = nc_data_64[painting] 
        x_test_nc_256[i, ] = nc_data[painting]
        x_test_nc_1024[i, ] = nc_data_1024[painting]
        x_test_hdc[i, ] = hdc_data[painting]
        x_test_alpha[i, ] = alpha_data[painting]


    if as_nparray:
        X_train = np.concatenate((x_train, x_train_nc_256, x_train_nc_1024, x_train_hdc, x_train_alpha, x_train_nc_64), axis=1)
        X_test = np.concatenate((x_test, x_test_nc_256, x_test_nc_1024, x_test_hdc, x_test_alpha, x_test_nc_64), axis=1)

    else:
        X_train = [x_train, x_train_nc_256, x_train_nc_1024, x_train_hdc, x_train_alpha, x_train_nc_64]
        X_test = [x_test, x_test_nc_256, x_test_nc_1024, x_test_hdc, x_test_alpha, x_test_nc_64]

    return X_train, X_test


**Using ColumnTranformer to combine multiple representations (feature sets)**
\
Need to define function "combined_features" by running the cell above 
 

In [22]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import MaxAbsScaler

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.ensemble import VotingClassifier

from xgboost import XGBClassifier

def selector(X):
    return X
#GRAM
ct1 = ColumnTransformer(
    transformers=[
        ('gram', FunctionTransformer(selector), slice(0, 1024)),
    ],
)

#NC
ctx =  ColumnTransformer(
    transformers=[
        ('nc', FunctionTransformer(selector), slice(1280, 2304)),
    ],
)

#HDC
ct4 = ColumnTransformer(
     transformers=[
      ('hdc', MaxAbsScaler(), slice(2304, 2393)),
     ],
)

#ALPHA
ct5 = ColumnTransformer(
    transformers=[
        ('alpha', FunctionTransformer(selector), [2393]),
    ],
)


####PIPE########

#GRAM
clf1 = SVC(kernel='linear', gamma='auto', probability=True)
pipe1 = Pipeline(
    steps=[
        ('ct1', ct1),
        ('clf1', clf1),
    ],
)

#NC
clfx = XGBClassifier(max_depth=3, learning_rate=0.16, n_estimators=445)
pipex = Pipeline(
    steps=[
        ('ctx', ctx),
        ('clfx', clfx),
    ],
)

#HDC
clf4 = XGBClassifier(max_depth=3, learning_rate=0.15, n_estimators=480)
pipe4 = Pipeline(
    steps=[
        ('ct4', ct4),
        ('clf4', clf4),
    ],
)

#ALPHA
clf5 = XGBClassifier(max_depth=1, learning_rate=0.3, n_estimators=300)
pipe5 = Pipeline(
    steps=[
        ('ct5', ct5),
        ('clf5', clf5),
    ],
)


clf_gram_nc= VotingClassifier(estimators=[("gram", pipe1), ("nc", pipex)], voting='soft', weights=[7,1])
clfn = VotingClassifier(estimators=[("gram", pipe1),("nc",pipex), ("hdc", pipe4), ("alpha", pipe5)], voting='soft', weights=[280,  21, 12, 3])

X_train, X_test = combined_features(as_nparray=True)


train_labels = np.load(open('../data/labels_train.npy', 'rb'))
test_labels = np.load(open('../data/labels_test.npy', 'rb'))


print("\nAccuracy on test set")

print("\nUsing Gram features")
pipe1.fit(X_train, train_labels)
print(pipe1.score(X_test, test_labels))

print("\nUsing Ensemble (nc + gram features) with Voting")
clf_gram_nc.fit(X_train, train_labels)
print(clf_gram_nc.score(X_test, test_labels))

print("\nUsing Ensemble (gram, nc and hdc) with Voting")
clfn.fit(X_train, train_labels)
print(clfn.score(X_test, test_labels))

AUGUIN_8
PAUL_GAUGUIN_9
PAUL_KLEE_10
PAUL_KLEE_11
PAUL_KLEE_12
PAUL_KLEE_13
PAUL_KLEE_14
PAUL_KLEE_15
PAUL_KLEE_16
PAUL_KLEE_17
PAUL_KLEE_18
PAUL_KLEE_19
PAUL_KLEE_1
PAUL_KLEE_20
PAUL_KLEE_21
PAUL_KLEE_22
PAUL_KLEE_23
PAUL_KLEE_24
PAUL_KLEE_25
PAUL_KLEE_26
PAUL_KLEE_27
PAUL_KLEE_28
PAUL_KLEE_29
PAUL_KLEE_2
PAUL_KLEE_30
PAUL_KLEE_31
PAUL_KLEE_32
PAUL_KLEE_33
PAUL_KLEE_34
PAUL_KLEE_35
PAUL_KLEE_36
PAUL_KLEE_37
PAUL_KLEE_38
PAUL_KLEE_39
PAUL_KLEE_3
PAUL_KLEE_40
PAUL_KLEE_41
PAUL_KLEE_42
PAUL_KLEE_43
PAUL_KLEE_44
PAUL_KLEE_45
PAUL_KLEE_46
PAUL_KLEE_47
PAUL_KLEE_48
PAUL_KLEE_49
PAUL_KLEE_4
PAUL_KLEE_50
PAUL_KLEE_5
PAUL_KLEE_6
PAUL_KLEE_7
PAUL_KLEE_8
PAUL_KLEE_9
PETER_PAUL_RUBENS_10
PETER_PAUL_RUBENS_11
PETER_PAUL_RUBENS_12
PETER_PAUL_RUBENS_13
PETER_PAUL_RUBENS_14
PETER_PAUL_RUBENS_15
PETER_PAUL_RUBENS_16
PETER_PAUL_RUBENS_17
PETER_PAUL_RUBENS_18
PETER_PAUL_RUBENS_19
PETER_PAUL_RUBENS_1
PETER_PAUL_RUBENS_20
PETER_PAUL_RUBENS_21
PETER_PAUL_RUBENS_22
PETER_PAUL_RUBENS_23
PETER_PAUL_RUBENS_24
