<a href="https://colab.research.google.com/github/fahim5466/Painting-Artstyle-Detection/blob/main/ResNet50_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

In [None]:
import os
import numpy as np
import pandas as pd
from random import sample
from scipy.misc import imresize
import pickle, cv2
from sklearn.preprocessing import LabelEncoder

'''
Read in images and accompanying metadata to create sampled dataset consisting of 200 images from the ten art styles of interest. Save processed datasets to file.
'''

def make_img_df():
    img_root = '/content/drive/My Drive/Artsy/Expressionism/'
    img_details = pd.read_csv('/content/drive/My Drive/Artsy/all_data_info.csv')
    keepers = ['Impressionism',
                'Expressionism',
               'Surrealism'
                ]

    df_details = img_details[img_details['style'].isin(keepers)]
    img_names = df_details['new_filename'].values

    files = [f for f in os.listdir(img_root) if os.path.isfile(os.path.join(img_root, f))]

    art_list = []
    for name in files:
        if name in img_names:
            img_path = '{}{}'.format(img_root, name)
            art_list.append(img_path)

    names = []
    for path in art_list:
        img = cv2.imread(path, 1)
        try:
            img.shape
            names.append(path.lstrip(img_root))
        except AttributeError:
            continue

    styles = [df_details.loc[df_details['new_filename'] == name, 'style'].iloc[0] for name in names]

    images = ['{}{}'.format(img_root, name) for name in names]

    final_df = pd.DataFrame({'img_path':images, 'class':styles})
    final_df.to_pickle('/content/drive/My Drive/Artsy/paths_classes_10.pkl')


def prepare_data():
    df = pd.read_pickle('/content/drive/My Drive/Artsy/paths_classes_10.pkl')

    paths_and_classes_small, class_names = sampled_paths_classes(df)

    with open('/content/drive/My Drive/Artsy/paths_and_classes_small.pkl', 'wb') as f:
        pickle.dump(paths_and_classes_small, f)

    class_dict = {index: art_class for index, art_class in zip(range(10), class_names)}

    with open('/content/drive/My Drive/Artsy/class_dict.pkl', 'wb') as f:
        pickle.dump(class_dict, f, protocol=pickle.HIGHEST_PROTOCOL)

    images = [cv2.imread(path,1) for path, label in paths_and_classes_small]

    x = np.array([prepare_image(image) for image in images])
    y = np.array([style for path, style in paths_and_classes_small])

    np.savez('/content/drive/My Drive/Artsy/images_labels_224.npz', x=x, y=y)


def sampled_paths_classes(df):
    # encode art categories as numerical values
    encoder = LabelEncoder()
    y = encoder.fit_transform(df['class'].astype('str'))
    n_classes = len(np.unique(y))
    paths_and_classes = list(zip(df['img_path'].tolist(), y))

    paths_and_classes_small = []
    for x in range(n_classes):
        temp = [(path, style) for path, style in paths_and_classes if style == x]
        samp = sample(temp, 200)
        for path, style in samp:
            paths_and_classes_small.append((path,style))

    np.random.shuffle(paths_and_classes_small)

    return paths_and_classes_small, encoder.classes_


def prepare_image(image, target_width=224, target_height=224, max_zoom=0.2):
    height = image.shape[0]
    width = image.shape[1]
    image_ratio = width / height
    target_image_ratio = target_width / target_height
    crop_vertically = image_ratio < target_image_ratio
    crop_width = width if crop_vertically else int(height * target_image_ratio)
    crop_height = int(width / target_image_ratio) if crop_vertically else height

    resize_factor = np.random.rand() * max_zoom + 1.0
    crop_width = int(crop_width / resize_factor)
    crop_height = int(crop_height / resize_factor)

    x0 = np.random.randint(0, width - crop_width)
    y0 = np.random.randint(0, height - crop_height)
    x1 = x0 + crop_width
    y1 = y0 + crop_height

    image = image[y0:y1, x0:x1]

    if np.random.rand() < 0.5:
        image = np.fliplr(image)

    image = imresize(image, (target_width, target_height))

    return image.astype(np.float32) / 255


if __name__ == '__main__':
    make_img_df()
prepare_data()

In [None]:

import numpy as np
import pandas as pd
from random import sample
import pickle, cv2
from scipy.misc import imresize
from sklearn.model_selection import train_test_split
from keras import applications, optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense
from keras.utils import np_utils

'''
Train and save a Keras CNN model using the ResNet50 baseline model
'''

def train_validation_split(x, y):
    # split data into training and test sets
    X_training, X_test, y_training, y_test = train_test_split(x, y, stratify=y, random_state=1337)

    # split training into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_training, y_training, stratify=y_training, random_state=42)

    return X_train, X_val, X_test, y_train, y_val, y_test


def one_hot(y_train, y_val, y_test, n_classes):
    y_train = np_utils.to_categorical(y_train, n_classes)
    y_val = np_utils.to_categorical(y_val, n_classes)
    y_test = np_utils.to_categorical(y_test, n_classes)

    return y_train, y_val, y_test


def build_fit_save_cnn(input_shape, n_classes, epochs, batch_size, X_train, X_val, y_train, y_val):
    base_model = applications.ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

    add_model = Sequential()
    add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    add_model.add(Dense(512, activation='relu'))
    add_model.add(Dropout(0.25))
    add_model.add(Dense(n_classes, activation='softmax'))

    # combine base model and fully connected layers
    final_model = Model(inputs=base_model.input, outputs=add_model(base_model.output))

    # specify SDG optimizer parameters
    sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

    # compile model
    final_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    final_model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_val, y_val))

    score = final_model.evaluate(X_val, y_val, verbose=0)
    print('Val. score:', score[0])
    print('Val. accuracy:', score[1])

    save_model(final_model)

    return final_model


def test_predict_score(model, X_test, y_test):
    score = model.evaluate(X_test, y_test, verbose=0)
    test_pred = model.predict(X_test)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    return test_pred, score


def save_model(model):
    model_json = model.to_json()
    with open('/content/drive/My Drive/Artsy/saved_model/resnet_model.json', 'w') as json_file:
        json_file.write(model_json)

    model.save_weights('/content/drive/My Drive/Artsy/saved_model/resnet_model_weights.h5')
    print('Model saved to disk!')


if __name__ == '__main__':
    seed = 1337
    np.random.seed(seed)

    epochs = 30
    batch_size = 25
    input_shape = (224,224,3)

    data = np.load('/content/drive/My Drive/Artsy/images_labels_224.npz')
    x = data['x']
    y = data['y']
    n_classes = len(np.unique(y))

    # train/validation split
    X_train, X_val, X_test, y_train, y_val, y_test = train_validation_split(x, y)

    # convert y to one-hot encoding
    y_train, y_val, y_test = one_hot(y_train, y_val, y_test, n_classes)

    # build, train, and save CNN model
    final_model = build_fit_save_cnn(input_shape, n_classes, epochs, batch_size, X_train, X_val, y_train, y_val)

    # score model on test set
test_pred, score = test_predict_score(loaded_model, X_test, y_test)