# Painting Classification by Artitst

# Read data

In [None]:
# Imports
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.applications import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import *

In [None]:
# # Get directories
main_direc = os.getcwd()
# print(main_direc)
images_dir = os.path.join(main_direc, 'data/images/images')
# print(images_dir)

# CSV location
artist_csv_loc = os.path.join(main_direc, 'data/artists.csv')

In [None]:
# Read in data about each artist
artists = pd.read_csv(artist_csv_loc)
# display(artists)
# print(artists.shape)

# Data Processing

In [None]:
"""
Set hyperparameters for the number of classes and image generators
"""

IMG_WIDTH = 128
IMG_HEIGHT = 128
BATCH_SIZE = 32
NUM_ARTISTS = 10

## DataFram management

### Artists DataFram

In [None]:
# Sortting the artists by number of paintings
artists_sort = artists.sort_values(by=['paintings'], ascending=False)
# print(artists_sort)

In [None]:
# Creating a dataframe with the top 10 artists by number of paintings
artists_top = artists_sort.head(NUM_ARTISTS)
artists_top = artists_top.head(10).reset_index()
display(artists_top[['name', 'paintings']].style.hide_index())

#### Assigning class weights

In [None]:
# Assigning higher weights to underrepresented classes

artists_top = artists_top[['name', 'paintings']]
# display(artists_top)

# Calculate (balanced) class weights
artists_top['class_weight'] = artists_top.paintings.sum() / (artists_top.shape[0] * artists_top.paintings)
# display(artists_top)

# Assign class weights
class_weights = artists_top['class_weight'].to_dict()
class_weights

# display(artists_top)

### Images DataFrame

In [None]:
# Creating a dataframe of all image

artists_top_name = artists_top['name'].str.replace(' ', '_').values
# print(artists_top_name)

images_df = pd.DataFrame()
for name in artists_top_name:
    images_df = pd.concat([images_df, pd.DataFrame(data={'path': glob.glob(images_dir + "/" + name + '/*'), 'name': name})], ignore_index=True)

# display(images_df)

## Split data into train and test sets

In [None]:
# Train and test split
train_df = images_df.sample(frac=0.8, random_state=200)
test_df = images_df.drop(train_df.index)

## Data Augmentation

In [None]:
# Data augmentation using ImageDataGenerator

train_datagen = ImageDataGenerator(validation_split=0.2,
                                   rescale=1./255,
                                   shear_range=1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True)

train_generator = train_datagen.flow_from_dataframe(train_df,
                                                   x_col='path',
                                                   y_col='name',
                                                   target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                   class_mode='categorical',
                                                   # classes=artists_top_name.tolist(),
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   seed=42,
                                                   subset="training")

valid_generator = train_datagen.flow_from_dataframe(train_df,
                                                   x_col='path',
                                                   y_col='name',
                                                   target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                   class_mode='categorical',
                                                   batch_size=BATCH_SIZE,
                                                   shuffle=True,
                                                   seed=42,
                                                   subset="validation")

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(test_df,
                                                 x_col='path',
                                                 y_col='name',
                                                 target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                 class_mode='categorical',
                                                 batch_size=BATCH_SIZE,
                                                 shuffle=False)

# Set the amount of steps for training, validation, and testing data
# based on the batch size
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

print("Total number of batches =", str(STEP_SIZE_TRAIN) + ", " + str(STEP_SIZE_VALID) + ", and " + str(STEP_SIZE_TEST))

# Modeling

## Building the Model

In [None]:
# Load pre-trained model
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

In [None]:
# Make pre-trained model's layers trainable
for layer in base_model.layers:
    layer.trainable = True

# Adding pre-trained model layers
X = base_model.output
X = tf.keras.layers.Flatten()(X)
X = tf.keras.layers.BatchNormalization()(X)

# Add layers to the end of the pre-trained model
X = tf.keras.layers.Dense(512, activation='relu')(X)
X = tf.keras.layers.Dense(16, activation='relu')(X)

# Add output layer
outputs = tf.keras.layers.Dense(NUM_ARTISTS, activation='softmax')(X)

# Create the model
model = tf.keras.Model(inputs=base_model.input, outputs=outputs)

In [None]:
model.summary()
print("\nTotal layers: " + str(len(model.layers)))

In [None]:
OPTIMIZER = tf.optimizers.Adam(learning_rate=0.0001)
LOSS_FUNCTION = tf.losses.CategoricalCrossentropy()

N_EPOCHS = 75

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto', restore_best_weights=True, verbose=1)

# Compile the model
model.compile(optimizer=OPTIMIZER,
            loss=LOSS_FUNCTION,
            metrics=['accuracy'])

In [None]:
# Train the model on all layers
history = model.fit(train_generator,
                               epochs=N_EPOCHS,
                               verbose=1,
                               callbacks=[early_stop],
                               validation_data=valid_generator,
                               shuffle=True,
                               class_weight=class_weights,
                               steps_per_epoch=STEP_SIZE_TRAIN,
                               validation_steps=STEP_SIZE_VALID,
                               workers=16)

## Model Evaluation

In [None]:
# Plot the training graph
fig, axs = plt.subplots(1, 2, figsize=(10,5))

axs[0].plot(history.history['accuracy'], label="Training Accuracy")
axs[0].plot(history.history['val_accuracy'], label="Valideation Accuracy")
axs[0].set_title('Training and Validation Accuracy')
axs[0].legend()

axs[1].plot(history.history['loss'], label="Training Loss")
axs[1].plot(history.history['val_loss'], label="Validation Loss")
axs[1].set_title('Training and Validation Loss')
axs[1].legend()

plt.show()

In [None]:
# Prediction accuracy on train data
accuracy = model.evaluate(train_generator, verbose=1)
print("Prediction accuracy on train data =", accuracy[1])

# Prediction accuracy on validation data
accuracy = model.evaluate(valid_generator, verbose=1)
print("Prediction accuracy on validation data =", accuracy[1])

# Prediction accuracy on test data
accuracy = model.evaluate(test_generator, verbose=1)
print("Prediction accuracy on test data =", accuracy[1])