# Introdution

# Read Data

Make sure to have the data locally locally saved as `../cs254-final/project/data`.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os
import glob
from PIL import Image
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from keras import regularizers, optimizers
from keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator, array_to_img,img_to_array,load_img
from keras import backend as K

In [2]:
print(os.listdir("../cs254-final-project"))

['.git', '.gitignore', '.idea', '.ipynb_checkpoints', 'data', 'first_try.h5', 'main.ipynb', 'README.md']


In [3]:
artists = pd.read_csv('../cs254-final-project/data/artists.csv')
print(artists.shape)
# print(artists)

(50, 8)


# Data Processing

The following approach is a modified version of the approach from [DeepArtists: Identify Artist from Art](https://www.kaggle.com/supratimhaldar/deepartist-identify-artist-from-art).

In [4]:
# Sortting the artists by number of paintings
artists = artists.sort_values(by=['paintings'], ascending=False)
# print(artists)

In [5]:
# Creating a dataframe with the top 10 artists by number of paintings
artists_top = artists.head(10)
artists_top

Unnamed: 0,id,name,years,genre,nationality,bio,wikipedia,paintings
8,8,Vincent van Gogh,1853 – 1890,Post-Impressionism,Dutch,Vincent Willem van Gogh (Dutch: [ˈvɪnsɛnt ˈʋɪl...,http://en.wikipedia.org/wiki/Vincent_van_Gogh,877
30,30,Edgar Degas,1834 - 1917,Impressionism,French,Edgar Degas (US: or UK: ; born Hilaire-Germai...,http://en.wikipedia.org/wiki/Edgar_Degas,702
13,13,Pablo Picasso,1881 - 1973,Cubism,Spanish,Pablo Ruiz Picasso (; Spanish: [ˈpaβlo piˈkaso...,http://en.wikipedia.org/wiki/Pablo_Picasso,439
15,15,Pierre-Auguste Renoir,1841 - 1919,Impressionism,French,"Pierre-Auguste Renoir, commonly known as Augus...",http://en.wikipedia.org/wiki/Pierre-Auguste_Re...,336
19,19,Albrecht Dürer,1471 - 1528,Northern Renaissance,German,Albrecht Dürer (; German: [ˈʔalbʁɛçt ˈdyːʁɐ]; ...,http://en.wikipedia.org/wiki/Albrecht_Dürer,328
46,46,Paul Gauguin,1848 – 1903,"Symbolism,Post-Impressionism",French,"Eugène Henri Paul Gauguin (UK: , US: ; French:...",http://en.wikipedia.org/wiki/Paul_Gauguin,311
16,16,Francisco Goya,1746 - 1828,Romanticism,Spanish,Francisco José de Goya y Lucientes (; Spanish:...,http://en.wikipedia.org/wiki/Francisco_Goya,291
31,31,Rembrandt,1606 - 1669,Baroque,Dutch,Rembrandt Harmenszoon van Rijn (; Dutch: [ˈrɛm...,http://en.wikipedia.org/wiki/Rembrandt,262
20,20,Alfred Sisley,1839 - 1899,Impressionism,"French,British",Alfred Sisley (; French: [sislɛ]; 30 October 1...,http://en.wikipedia.org/wiki/Alfred_Sisley,259
32,32,Titian,1488 - 1576,"High Renaissance,Mannerism",Italian,Tiziano Vecelli or Tiziano Vecellio (pronounce...,http://en.wikipedia.org/wiki/Titian,255


## Exploring data and creating a DataFrame of all image paths with their associated artists

In [6]:
# Images
images_dir = '../cs254-final-project/data/images/images'
artists_dir = os.listdir(images_dir) # Files are named after each artists

In [7]:
# Images DataFrame
artists_top_name = artists_top['name'].str.replace(' ', '_').values

images_df = pd.DataFrame()
for name in artists_top_name:
    # print(glob.glob('../cs254-final-project/data/images/images/' + name + '/*'))

    # Method 1:
    #
    # images_df = images_df.append(pd.DataFrame(data={'Path': glob.glob('../cs254-final-project/data/images/images/' + name + '/*'), 'Name': name}), ignore_index=True)

    # Method 2:
    #
    images_df = pd.concat([images_df, pd.DataFrame(data={'Path': glob.glob('../cs254-final-project/data/images/images/' + name + '/*'), 'Name': name})], ignore_index=True)

images_df

Unnamed: 0,Path,Name
0,../cs254-final-project/data/images/images/Vinc...,Vincent_van_Gogh
1,../cs254-final-project/data/images/images/Vinc...,Vincent_van_Gogh
2,../cs254-final-project/data/images/images/Vinc...,Vincent_van_Gogh
3,../cs254-final-project/data/images/images/Vinc...,Vincent_van_Gogh
4,../cs254-final-project/data/images/images/Vinc...,Vincent_van_Gogh
...,...,...
3727,../cs254-final-project/data/images/images/Titi...,Titian
3728,../cs254-final-project/data/images/images/Titi...,Titian
3729,../cs254-final-project/data/images/images/Titi...,Titian
3730,../cs254-final-project/data/images/images/Titi...,Titian


In [8]:
# Converting the Images DataFrame to a .csv
#images_df.to_csv('../cs254-final-project/data/images.csv', index=True)

#resize all images:
#resized dataframe:
#normal_df = pd.DataFrame()
#count = 0
#for row in images_df:
 #   temp_img = Image.open(str(row['Path']))
  #  new_img = temp_img.resize((1000,1000))
   # new_img.save('data/images/resized/' + str(count) + ".jpg")
    #count += 1

#for name in artists_top_name:    
 #   normal_df = pd.concat([normal_df, pd.DataFrame(data={'Path': glob.glob('../data/images/resized/' + name + '/*'), 'Name': name})], ignore_index=True)
#normal_df



In [None]:
#Create Generator

#image dimensions?
img_width, img_height = 1000,1000

#total samples: 3731

#train_test_ran_samps = np.random.rand(len(images_df)) < 0.8 #80% train


train_df = images_df.sample(frac=0.8,random_state=200)
test_df = images_df.drop(train_df.index)

valid_df = train_df.sample(frac=0.1,random_state=200)

train_df = train_df.drop(valid_df.index)


#train_df = images_df[train_test_ran_samps]
#test_df = images_df[~train_test_ran_samps]

#train_valid_samps = np.random.rand(len(train_df)) < 0.90




#relevant dataframes, shuffled and split
#train_df = train_df[train_valid_samps] #76% of total --> 2835 samples
#valid_df = train_df[~train_valid_samps] #4% of total --> 149 samples
#test_df = images_df[~train_test_ran_samps] #20% of total --> 746 samples


#samplenum_train = len(train_df)
#samplenum_valid = len(validation_df)
#samplenum_valid = len(test_df)

#epochs = 50

if K.image_data_format() == 'channels_first':
    input_shape = (3,img_width,img_height)
else:
    input_shape = (img_width, img_height,3)
    
#instantiate neural network
#80/20 split
train_generator = ImageDataGenerator(rescale=1./255.)
test_generator = ImageDataGenerator(rescale=1./255.)


model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])



train_gen = train_generator.flow_from_dataframe(
        train_df,
        shuffle=True,
        x_col = 'Path',
        y_col = 'Name',
        class_mode = 'categorical',
        batch_size=32,
        target_size = (img_width,img_height)
)

valid_gen= test_generator.flow_from_dataframe(
        valid_df,
        shuffle=True,
        x_col = 'Path',
        y_col = 'Name',
        class_mode = 'categorical',
        batch_size=32,
        target_size = (img_width,img_height)
)

test_gen = test_generator.flow_from_dataframe(
        test_df,
        x_col = 'Path', #no y col, obviously...
        batch_size = 1,
        shuffle = False,
        class_mode = None,
        target_size = (img_width,img_height)
)

#step sizes:
steps_train = train_gen.n//train_gen.batch_size
steps_valid = valid_gen.n//valid_gen.batch_size
steps_test = test_gen.n//test_gen.batch_size


model.fit_generator(
        generator = train_gen,
        steps_per_epoch = steps_train,
        validation_data = valid_gen,
        validation_steps = steps_valid,
        epochs = 5
)

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

#model.save_weights('first_try.h5')

test_gen.reset()
predict = model.predict(test_gen,
                        steps=steps_test,
                        verbose=1)


Found 2687 validated image filenames belonging to 9 classes.
Found 299 validated image filenames belonging to 9 classes.
Found 746 validated image filenames.




Epoch 1/5
