In [1]:
#!/usr/bin/env python

# data tools
import os, cv2, glob, argparse
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# sklearn tools
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# tf tools
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (Conv2D,
                                     MaxPool2D,
                                     MaxPooling2D, 
                                     Activation, 
                                     Flatten,
                                     Dropout,
                                     Dense)
from tensorflow.keras.preprocessing.image import (load_img,
                                                  img_to_array,
                                                  ImageDataGenerator)
from tensorflow.keras.applications.vgg16 import (preprocess_input,
                                                 decode_predictions,
                                                 VGG16)

In [2]:
# Read all images from the 4 corpora
def read_corpora_imgs(corpora_parent):
    
    # Get corpora in corpora parent dir
    corpora = os.listdir(corpora_parent)
    
    # Empty list for appending to
    corpora_imgs = []
    
    # For each corpus in corpora
    for corpus in corpora:
        
        # Defining corpus path
        corpus_path = os.path.join("data", corpus)
        
        # Info for terminal use
        print(f"[INFO] Loading images from \"{corpus_path}\" ...")
        # Empty list for appending to
        imgs = []
        
        # For each img within corpus path, read
        for img in glob.glob(os.path.join(corpus_path, "*2.jpg")):
            imgs.append(cv2.imread(img))
        
        # Append imgs in corpora to corpora_imgs
        corpora_imgs.append(imgs)
    
    # Return corpora_imgs
    return corpora_imgs

In [3]:
# Read corpora images 
corpora_imgs = read_corpora_imgs("data")

[INFO] Loading images from "data/content_monet_style_monet"
[INFO] Loading images from "data/content_gauguin_style_gauguin"
[INFO] Loading images from "data/content_monet_style_gauguin"
[INFO] Loading images from "data/content_gauguin_style_monet"


In [11]:
# Get a list of labels
label = ["content_vangogh_style_vangogh"] * len(corpora_imgs[0])
label.extend(["content_monet_style_vangogh"] * len(corpora_imgs[1]))
label.extend(["content_vangogh_style_monet"] * len(corpora_imgs[2]))
label.extend(["content_monet_style_monet"] * len(corpora_imgs[3]))

AttributeError: 'int' object has no attribute 'extend'

In [12]:
# Get a single list, containing all arrays
X = [array for corpus in corpora_imgs for array in corpus]

In [13]:
# Create dataframe with labels and arrays
df = pd.DataFrame.from_dict({"X" : X,
     "y" : label})

In [14]:
# Define function for resizing and making into array
def get_resized_arrays(arrays, width, height):
    # Info for terminal use
    print("[INFO] Resizing images ...")
    
    # Empty list for appending to
    arrays_resized = []
    
    # For every array in the list of arrays
    for array in arrays:
        # Resize array
        resized = cv2.resize(array, (width, height), interpolation = cv2.INTER_AREA)
        
        # Convert to array and values between 0 and 1 to allow to be used in CNN (using list comprehension)
        resized = np.asarray(resized/255.).astype("float32")

        # Append to list
        arrays_resized.append(resized)
            
    # Make into arrays with same dimensions instead of lists
    #arrays_resized = np.array(arrays_resized).reshape(len(arrays_resized), width, height, 3)
    
    # Return
    return arrays_resized

In [8]:
# Resize arrays
df["X"] = get_resized_arrays(df["X"], 224, 224)

[INFO] Resizing images ...


# Classifying original paintings for benchmark performance

In [17]:
# Filtering df to only include the original paintings
df_original = df.loc[(df['y'] == "content_monet_style_monet") | (df['y'] == "content_gauguin_style_gauguin")]

In [18]:
# Make a test-train split
X_train, X_test, y_train, y_test = train_test_split(df_original["X"], 
                                                    df_original["y"], 
                                                    random_state = 9, # for replication purposes
                                                    train_size = .8)

In [153]:
# IS THIS NECESSARY?? **

# Define function for min max scaling
#def min_max_scaling(X_train, X_test):
    # Min-max scaling:
 #   scaler = MinMaxScaler()
  #  scaler = scaler.fit(X_train) # Important to scale not only train data but also test data information from train
    #X_train_scaled = pd.DataFrame(scaler.transform(X_train))
    #X_test_scaled = pd.DataFrame(scaler.transform(X_test))
    
    # Return scaled values
    #return X_train_scaled, X_test_scaled

In [154]:
#X_train_scaled, X_test_scaled = min_max_scaling(X_train, X_test)

In [19]:
# Define function for binarizing labels
def binarize_labels(y_train, y_test):
    # Binarize the labels (getting from e.g. [3,1,2] to [[0,0,1],[1,0,0],[0,1,0], instead of course with numbers from 0-10) 
    y_train = LabelBinarizer().fit_transform(y_train) 
    y_test = LabelBinarizer().fit_transform(y_test)
    
    # Return binarized labels
    return y_train, y_test

In [20]:
# Binarize labels
y_train, y_test = binarize_labels(y_train, y_test)

In [21]:
# Convert to tensor
#y_train = tf.convert_to_tensor(y_train, dtype=tf.int64)
#y_test = tf.convert_to_tensor(y_test, dtype=tf.int64) 

# Classifying original paintings using self-defined model

In [182]:
model = Sequential()
model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
model.add(MaxPool2D())

model.add(Conv2D(32, 3, padding="same", activation="relu"))
model.add(MaxPool2D())

model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dense(2, activation="softmax"))

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 112, 112, 32)      9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 56, 56, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 28, 28, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 28, 28, 64)       

In [183]:
opt = Adam(lr=0.01)
model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) , metrics = ['accuracy'])

In [184]:
history = model.fit(X_train, y_train,epochs = 5 , validation_data = (X_test, y_test))

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

# Classifying original paintings using pretrained model (VGG16)

In [163]:
base_model = tf.keras.applications.MobileNetV2(input_shape = (224, 224, 3), include_top = False, weights = "imagenet")
base_model.trainable = False

In [158]:
model = tf.keras.Sequential([base_model,
                                 tf.keras.layers.GlobalAveragePooling2D(),
                                 tf.keras.layers.Dropout(0.2),
                                 tf.keras.layers.Dense(2, activation="softmax")                                     
                                ])

In [159]:
base_learning_rate = 0.00001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [162]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Functi (None, 7, 7, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d_2 ( (None, 1280)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 2562      
Total params: 2,260,546
Trainable params: 2,562
Non-trainable params: 2,257,984
_________________________________________________________________


In [160]:
history = model.fit(X_train, y_train, epochs = 5 , validation_data = (X_test, y_test,), verbose = 1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(5)

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()