## Identifying Artist from Art

The goal of this work is build a neural network capable to analyze some painting and classify their respective artists.

For that we are using the dataset [**Best Artworks of All Time**](https://www.kaggle.com/ikarus777/best-artworks-of-all-time).

**Read Data**

In [42]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#### Preprocessing

In [49]:
# Remove umbalanced artists and take the artists that only proximate number of paintings
artists = pd.read_csv('archive/artists.csv')
artists = artists.sort_values(by=['paintings'], ascending=False)
artists = artists[artists['paintings'] > 100].reset_index()
artists = artists[artists['paintings'] < 700].reset_index()

class_weights = artists.paintings.sum() / (artists.shape[0] * artists.paintings)

# fix Albrect Durer name
updated_name = "Albrecht_Dürer".replace("_", " ")
artists.iloc[4, 0] = updated_name
artists = list(artists['name'].str.replace(' ', '_').values)

In [50]:
# Choosed artists
artists

['Pablo_Picasso',
 'Pierre-Auguste_Renoir',
 'Albrecht_Dürer',
 'Paul_Gauguin',
 'Francisco_Goya',
 'Rembrandt',
 'Alfred_Sisley',
 'Titian',
 'Marc_Chagall',
 'Rene_Magritte',
 'Amedeo_Modigliani',
 'Paul_Klee',
 'Henri_Matisse',
 'Andy_Warhol',
 'Mikhail_Vrubel',
 'Sandro_Botticelli',
 'Leonardo_da_Vinci',
 'Peter_Paul_Rubens',
 'Salvador_Dali',
 'Hieronymus_Bosch',
 'Pieter_Bruegel',
 'Diego_Velazquez',
 'Kazimir_Malevich',
 'Frida_Kahlo',
 'Giotto_di_Bondone',
 'Gustav_Klimt',
 'Raphael',
 'Joan_Miro']

In [51]:
# Artists weights
class_weights

0     0.448422
1     0.585884
2     0.600174
3     0.632981
4     0.676485
5     0.751363
6     0.760066
7     0.771989
8     0.823670
9     1.014728
10    1.019985
11    1.047112
12    1.058372
13    1.087609
14    1.151211
15    1.200348
16    1.376623
17    1.396150
18    1.416238
19    1.436913
20    1.469083
21    1.537946
22    1.562358
23    1.640476
24    1.654262
25    1.682540
26    1.806029
27    1.929972
Name: paintings, dtype: float64

In [52]:
# Creating the datasets.

image_height = 256
image_width = 256

args = {
    'directory': 'archive/images/images',
    'batch_size': 32,
    'target_size': (image_height, image_width),
    'seed': 123,
    'classes': artists
}

image_data_gen = ImageDataGenerator(validation_split=0.1, rescale=1./255., shear_range=5)
train_ds = image_data_gen.flow_from_directory(**args, subset='training')
validation_ds = image_data_gen.flow_from_directory(**args, subset='validation')

Found 4679 images belonging to 28 classes.
Found 505 images belonging to 28 classes.


In [23]:
# artists_df = pd.read_csv('archive/artists.csv')
# useful_artists = list(artists_df[(artists_df['paintings'] > 100) & (artists_df['paintings'] < 700)]['name'])
# useful_artists = [x.replace(' ', '_') for x in useful_artists]

In [27]:
# # Visualize the data: here are the first 9 images from the training dataset.

# plt.figure(figsize=(10, 10))

# class_names = train_ds.class_names

# for images, labels in train_ds.take(1):
#     for i in range(9):
#         ax = plt.subplot(3, 3, i + 1)
        
#         plt.imshow(images[i].numpy().astype('uint8'))
#         plt.title(class_names[labels[i]])
#         plt.axis('off')

In [28]:
# # Configure the dataset for performance.
# #
# # https://www.tensorflow.org/tutorials/load_data/images#configure_the_dataset_for_performance

# train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
# validation_ds = validation_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

**Data Augmentation**

In [53]:
data_augmentation = []
data_augmentation.append(layers.experimental.preprocessing.RandomFlip('horizontal',
                                                                      input_shape=(image_height, image_width, 3)))
data_augmentation.append(layers.experimental.preprocessing.RandomRotation(0.1))
data_augmentation.append(layers.experimental.preprocessing.RandomZoom(0.1))

**Build Model**

In [54]:
model = models.Sequential()

model.add(models.Sequential(data_augmentation))
model.add(layers.experimental.preprocessing.Rescaling(1./255, input_shape=(image_height, image_width, 3)))

model.add(layers.Conv2D(32, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))
model.add(layers.Conv2D(64, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))
model.add(layers.Conv2D(64, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.Flatten())
model.add(layers.Dropout(0.1))

model.add(layers.Dense(64, activation='relu')) # Softmax could be an alternative.
model.add(layers.Dense(len(artists)))

In [None]:
# Compile and train the model.

epochs = 50

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_ds, validation_data=validation_ds, epochs=epochs, class_weight=class_weights.to_dict())

Epoch 1/5
 23/147 [===>..........................] - ETA: 10:32 - loss: 8.9394 - accuracy: 0.0196

In [34]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rescaling_1 (Rescaling)      (None, 256, 256, 3)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 254, 254, 32)      896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 127, 127, 32)      0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 125, 125, 64)      18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 60, 60, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 30, 30, 64)       

**Visualize Results**

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()