**Save features from VGG16 Conv layer**

Using first convolutional layer from convolutional block 5\
512 feature maps with a size of 14x14

In [None]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.applications import VGG16

batch_size = 64

def save_image_features():
    # build model from VGG16 network, only up to the first convolutional layer from convolutional block 5
    base_model = VGG16(include_top=False, weights='imagenet')
    base_model.trainable = False
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_conv1').output)
#    model.summary()

    # image generator from directory
    datagen = ImageDataGenerator(rescale=1. / 255)

    generator = datagen.flow_from_directory(
        '../Labels_Author/Train',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    
    # obtain covolutional features
    features_train = model.predict(generator)
    
    # save training features
    np.save(open('../data/vgg16conv51_features_author_train.npy', 'wb'),
            features_train)

    # save training labels
    labels = [int(filepath.split('/')[-2]) for filepath in generator.filepaths]
    np.save(open('../data/labels_author_train.npy', 'wb'), labels)
    np.save(open('../data/files_author_train.npy', 'wb'), generator.filepaths)
    

    generator = datagen.flow_from_directory(
        '../Labels_Author/Test',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)
    
    # obtain covolutional features
    features_test = model.predict(generator)
    
    # save test features
    np.save(open('../data/vgg16conv51_features_author_test.npy', 'wb'),
            features_test)

    # save test labels
    labels = [int(filepath.split('/')[-2]) for filepath in generator.filepaths]
    np.save(open('../data/labels_author_test.npy', 'wb'), labels)
    np.save(open('../data/files_author_test.npy', 'wb'), generator.filepaths)

    

In [None]:
save_image_features()

**Generate Gram matrix**

In [None]:
import numpy as np
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler

def save_gram_features(scale=False):

    # Load and reshape training data
    # Original shape (, 14, 14, 512)
    # Resulting shape after vectorizing feature maps (, 196, 512)
    train_data = np.load(open('../data/vgg16conv51_features_author_train.npy', 'rb'))
    train_data = train_data.reshape(train_data.shape[0], -1, 512)
    test_data = np.load(open('../data/vgg16conv51_features_author_test.npy', 'rb'))
    test_data = test_data.reshape(test_data.shape[0], -1, 512)
    #print(train_data.shape)

    # Calculate Gram matrix values as dot product of feature map vectors: 196x512 -> 512x512
    # Vectorize upper diagonal (of symmetrical matrix) to vector of length 512*513/2 = 131328 

    # should be the same, except for the selection of upper diagonal, as:
    #G_train = np.matmul(np.transpose(train_data, (0, 2, 1)), train_data)
    #G_train = G_train.reshape(G_train.shape[0], -1)

    G_train = np.zeros((train_data.shape[0], 131328))
    for k in range(train_data.shape[0]):
        g = train_data[k,].T.dot(train_data[k,])
        G_train[k,] = g[np.triu_indices_from(g)]

    G_test = np.zeros((test_data.shape[0], 131328))
    for k in range(test_data.shape[0]):
        g = test_data[k,].T.dot(test_data[k,])
        G_test[k,] = g[np.triu_indices_from(g)]

    #print(G_train.shape)

    # Reduce dimensions by PCA
    if scale:
        scaler = StandardScaler().fit(G_train)
        G_train = scaler.transform(G_train)
        G_test = scaler.transform(G_test)

    pca = decomposition.PCA(n_components=2048).fit(G_train)
    x_train = pca.transform(G_train)
    x_test = pca.transform(G_test)

    #print(x_train.shape)
    
    np.save(open('../data/gram_features_author_train.npy', 'wb'), x_train)
    np.save(open('../data/gram_features_author_test.npy', 'wb'), x_test)


In [None]:
save_gram_features()

In [None]:
import numpy as np
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score


x_train = np.load(open('../data/gram_features_author_train.npy', 'rb'))
x_test = np.load(open('../data/gram_features_author_test.npy', 'rb'))

train_labels = np.load(open('../data/labels_author_train.npy', 'rb'))
test_labels = np.load(open('../data/labels_author_test.npy', 'rb'))


clf = svm.SVC(kernel='linear', C=1)
#clf = RandomForestClassifier(n_estimators=10)
#clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)

data = np.concatenate((x_train, x_test), axis=0)
print("5-fold CV accuracy using all data (%i instances)" %(len(data)))
labels = np.concatenate((train_labels, test_labels), axis=0)
k_fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
print(cross_val_score(clf, data, labels, cv=k_fold, n_jobs=-1))


print("\nAccuracy on test set")
clf.fit(x_train, train_labels)
print(clf.score(x_test, test_labels))




**Prepare combined data representations**

In [None]:
import numpy as np


def combined_features(normalized=False):

    x_train = np.load(open('../data/gram_features_author_train.npy', 'rb'))
    x_test = np.load(open('../data/gram_features_author_test.npy', 'rb'))
    train_files = np.load(open('../data/files_author_train.npy', 'rb'))
    test_files = np.load(open('../data/files_author_test.npy', 'rb'))

    if normalized:
        nc_data_tmp = np.load("../data/nc_data_normalized.npy", allow_pickle=True).item()
    else:
        nc_data_tmp = np.load("../data/nc_data.npy", allow_pickle=True).item()
        
    nc_data = dict()
    nc_data_64 = dict()
    nc_data_1024 = dict()
    hdc_data = dict()
    alpha_data = dict()
    for i, painting in enumerate(nc_data_tmp['painting']):
        painting = painting.split('.')[0]
        nc_data[painting] = nc_data_tmp['nc_256'][i]
        nc_data_64[painting] = nc_data_tmp['nc_64'][i]
        nc_data_1024[painting] = nc_data_tmp['nc_1024'][i]
        hdc_data[painting] = nc_data_tmp['hdc'][i]
        alpha_data[painting] = nc_data_tmp['alpha'][i]

    x_train_nc_64 = np.zeros((train_files.shape[0], 64))
    x_train_nc_1024 = np.zeros((train_files.shape[0], 1024))
    x_train_nc_256 = np.zeros((train_files.shape[0], 256))
    x_train_hdc = np.zeros((train_files.shape[0], 89))
    x_train_alpha = np.zeros((train_files.shape[0], 1))
    
    for i, painting in enumerate(train_files):
        painting = painting.split('/')[-1].split('.')[0]
        x_train_nc_256[i, ] = nc_data[painting]
        x_train_nc_1024[i, ] = nc_data_1024[painting]
        x_train_nc_64[i, ] = nc_data_64[painting]
        x_train_hdc[i, ] = hdc_data[painting]
        x_train_alpha[i, ] = alpha_data[painting]

    x_test_nc_64 = np.zeros((test_files.shape[0], 64))
    x_test_nc_1024 = np.zeros((test_files.shape[0], 1024))
    x_test_nc_256 = np.zeros((test_files.shape[0], 256))
    x_test_hdc = np.zeros((test_files.shape[0], 89))
    x_test_alpha = np.zeros((test_files.shape[0], 1))
    for i, painting in enumerate(test_files):
        painting = painting.split('/')[-1].split('.')[0]
        x_test_nc_64[i, ] = nc_data_64[painting] 
        x_test_nc_256[i, ] = nc_data[painting]
        x_test_nc_1024[i, ] = nc_data_1024[painting]
        x_test_hdc[i, ] = hdc_data[painting]
        x_test_alpha[i, ] = alpha_data[painting]

    X_train = np.concatenate((x_train, x_train_nc_256, x_train_nc_1024, x_train_hdc, x_train_alpha, x_train_nc_64), axis=1)
    X_test = np.concatenate((x_test, x_test_nc_256, x_test_nc_1024, x_test_hdc, x_test_alpha, x_test_nc_64), axis=1)

    return X_train, X_test


**Using ColumnTranformer to combine multiple representations (feature sets)**
\
Need to define function "combined_features" by running the cell above 
 

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import MaxAbsScaler

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer

from xgboost import XGBClassifier

def selector(X):
    return X

ct1 = ColumnTransformer(
    transformers=[
        ('gram', FunctionTransformer(selector), slice(0, 2048)),
    ],
)

ct2 = ColumnTransformer(
    transformers=[
        
        ('nc256', FunctionTransformer(selector), slice(2048, 2304)),
        ('nc1024', FunctionTransformer(selector), slice(2304, 3328)),        
        ('hdc', MaxAbsScaler(), slice(3328, 3417)),
        ('alpha', FunctionTransformer(selector), [3417]),
        ('nc64', FunctionTransformer(selector), slice(3418, 3482)),
    ],
    transformer_weights={
        'nc_64':1.0,'nc_256':1.0, 'nc1024': 1.0, 'hdc': 1.0, 'alpha': 1.0},
)


ct3 = ColumnTransformer(
    transformers=[
        ('nc256', FunctionTransformer(selector), slice(2048, 2304)),
    ],
)

clf1 = SVC(kernel='linear', gamma='auto', probability=True)
pipe1 = Pipeline(
    steps=[
        ('ct1', ct1),
        ('clf1', clf1),
    ],
)


clf2 = XGBClassifier(max_depth=3, learning_rate=0.1, n_estimators=160)
pipe2 = Pipeline(
    steps=[
        ('ct2', ct2),
        ('clf2', clf2),
    ],
)



clf3 = XGBClassifier(max_depth=3, learning_rate=0.16, n_estimators=445)
pipe3 = Pipeline(
    steps=[
        ('ct3', ct3),
        ('clf3', clf3),
    ],
)


clf_gram_nc = VotingClassifier(estimators=[("gram", pipe1), ("nc_256", pipe3)], voting='soft', weights=[7,1])

clf = VotingClassifier(estimators=[("gram", pipe1), ("all_features", pipe2)], voting='soft', weights=[7,1]) #BEST

X_train, X_test = combined_features(normalized=False)


train_labels = np.load(open('../data/labels_author_train.npy', 'rb'))
test_labels = np.load(open('../data/labels_author_test.npy', 'rb'))



print("\nAccuracy on test set")

print("\nUsing Gram features")
pipe1.fit(X_train, train_labels)
print(pipe1.score(X_test, test_labels))

print("\nUsing Gram + nc features nc_256")
clf_gram_nc.fit(X_train, train_labels)
print(clf_gram_nc.score(X_test, test_labels))

print("\nUsing Gram features and NC+HDC, with Voting")
clf.fit(X_train, train_labels)
print(clf.score(X_test, test_labels))
