In [1]:
import sys
sys.path.append("..") ## resetting the path to the parent directory

In [2]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np

In [3]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

In [4]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import SGD

keras = tf.keras
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [5]:
df = pd.read_csv('../data/balanced_df.csv')
#constantine_df = df[df.portrait=='Antoninus Pius']
#augustus_df = df[df.portrait=='Gallienus']
#frames = [constantine_df,augustus_df]

#demo_df = pd.concat(frames).reset_index(drop=True)
df.drop('Unnamed: 0',axis=1,inplace=True)

In [6]:
df.fname = df.fname.apply(lambda x: '../img/' + x + '.jpg')

In [7]:
from sklearn.preprocessing import LabelEncoder

LE = LabelEncoder()
df['code'] = LE.fit_transform(df['portrait'])

In [8]:
from sklearn.model_selection import train_test_split

X = df.drop('code',axis=1)

y = df.code

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2, stratify=y_train)

In [9]:
# The format specification here left pads zeros on the number: 0004.
train_filenames = [fname for fname in X_train.fname]
val_filenames = [fname for fname in X_val.fname]
test_filenames = [fname for fname in X_test.fname]

In [10]:
def reset_indices(arr):
    arr.reset_index(drop=True,inplace=True)
    return arr

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [224,224])
    image /= 255.0
    return image

def remove_corrupt_files(filenames,y_arr):
    imgs = []
    safe_filenames= []
    
    y_arr = reset_indices(y_arr)
    
    for i,filename in enumerate(filenames):        
        # Try reading, decoding, resizing and normalizing images
        # Only appends files that do not cause any errors to an 'imgs' list
        try:
            img = load_and_preprocess_image(filename)
            imgs.append(np.asarray(img))   
            safe_filenames.append(filename)
        # If error occurs, skip to the next file and remove the file from the y array using its index
        except:
            del y_arr[i]
            continue
              
    X = np.asarray(imgs)
    y = np.asarray(y_arr)
    
    return X, y

In [11]:
X_train,y_train = remove_corrupt_files(train_filenames,y_train)
X_val,y_val = remove_corrupt_files(val_filenames,y_val)
X_test,y_test = remove_corrupt_files(test_filenames,y_test)

In [14]:
from tensorflow.keras import backend as K
img_width, img_height = 224, 224
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [15]:
prepared_images = preprocess_input(X_train)

In [16]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense

In [17]:
base_model = VGG16(include_top=False, weights='imagenet',input_shape = (224,224,3),pooling=max)
base_model.trainable = False

In [18]:
inpt = Input(shape=(224,224,3),name = 'image_input')

In [19]:
output = base_model(inpt)

In [20]:
flat1 = Flatten(name='flatten')(output)
class1 = Dense(1024, activation='relu', name='fc1')(flat1)
output = Dense(12, activation='softmax', name='predictions')(class1)
# define new model
vgg_model = Model(inputs=inpt, outputs=output)

In [21]:
vgg_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     [(None, 224, 224, 3)]     0         
_________________________________________________________________
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 1024)              25691136  
_________________________________________________________________
predictions (Dense)          (None, 12)                12300     
Total params: 40,418,124
Trainable params: 25,703,436
Non-trainable params: 14,714,688
_________________________________________________________________


In [22]:
sgd = SGD(lr=0.001)

In [23]:
vgg_model.compile(optimizer=sgd,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [24]:
vgg_history = vgg_model.fit(X_train, y_train, epochs=10,steps_per_epoch=50,validation_data=(X_val,y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 

In [None]:
test_loss, test_acc = vgg_model.evaluate(X_test, y_test, verbose=0)

print('\nTest accuracy {:5.2f}%'.format(100*test_acc))

In [None]:
training_loss = vgg_history.history['loss']
test_loss = vgg_history.history['val_loss']

epoch_count = range(1,len(training_loss)+1)

plt.plot(epoch_count,training_loss,'r--')
plt.plot(epoch_count,test_loss,'b-')
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show();

In [None]:
acc = vgg_history.history['accuracy']
val_acc = vgg_history.history['val_accuracy']

epoch_count = range(1,len(training_loss)+1)

plt.plot(acc, 'r--', label='Training Accuracy')
plt.plot(val_acc, 'b-', label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.xlabel('Epoch')
plt.show();

## Undersample data