In [1]:
import pandas as pd
import numpy as np
import os
import cv2

from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.metrics import classification_report, precision_score

Using TensorFlow backend.


In [2]:
# constants to configure
root = './Processed_Data/'
df_labels = 'combine_10.csv'
df_info = 'dress_info.csv'
image_dir = 'cropped/'
batch_size = 16 # mini-batch training
dropout_rate = 0.25
epochs = 10

In [9]:
df = pd.read_csv(root+df_labels, index_col=0, header=None).fillna('')
orig = pd.read_csv(root+df_info)
cropped = os.listdir(image_dir)

# Resize images in place

In [10]:
img_size = (150, 500)
for f in cropped:
    try:
        img = cv2.imread(image_dir+f)
        img = cv2.resize(img, img_size)
        cv2.imwrite(image_dir+f, img)
    except:
        os.remove(image_dir+f) # remove badly cropped images

cropped = os.listdir(image_dir) # remap list of images

# Load in dataframe of image filenames and labels

In [11]:
# get back column names and replace with 1,0
col_names = []
for col in df.columns:
    col_name = ''.join(set(df[col].tolist()))
    col_names.append(col_name)
    df[col] = np.where(df[col] != '', 1, 0)
df.columns = col_names
df.index.name = 'name'

# filter out images that didn't get cropped successfully
df.index = orig.name.apply(lambda x: x+'.jpg')
df = df[df.index.isin(cropped)]

# extract filepath (X) and labels (Y)
df = df.reset_index()
y_col = df.drop(columns=['name']).columns.tolist()
x_col = 'name'

# Data generator

In [12]:
# stream input images for training and validation
train_datagen = ImageDataGenerator(
    rescale = 1./255, # shouldn't change, to improve convergence by converting pixel scale to 0-1
    # the below parameters applies some random transformations to training images
    horizontal_flip = True,
    rotation_range = 10,
    fill_mode = "nearest",
    zoom_range = 0.1,
    height_shift_range = 0.1,
    validation_split = 0.1 # use 10% (i.e. 1k images) for validation
)

In [14]:
train_generator = train_datagen.flow_from_dataframe(
    df,
    directory = image_dir,
    x_col = x_col, y_col = y_col,
    target_size = img_size,
    batch_size = batch_size, 
    class_mode = "other",
    subset = 'training',
    seed = 12345
)

validation_generator = train_datagen.flow_from_dataframe(
    df,
    directory = image_dir,
    x_col = x_col, y_col = y_col,
    target_size = img_size,
    batch_size = batch_size,
    class_mode = "other",
    subset = 'validation',
    seed = 12345,
)

Found 136 images.
Found 15 images.


# Train model from scratch

### can skip to next section

In [7]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 500, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(48, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vector
model.add(Dense(256, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(256, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(100, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(len(y_col), activation='sigmoid')) 

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.Adam(),
              metrics=['accuracy'])

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 148, 498, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 74, 249, 32)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 72, 247, 48)       13872     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 36, 123, 48)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 121, 64)       27712     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 60, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 65280)             0         
__________

In [9]:
# Save the model according to the conditions  
checkpoint = ModelCheckpoint("./saved_models/baseline_cnn_cropped_3layer_dropout.h5", 
                             monitor='val_acc', 
                             verbose=1, 
                             save_best_only=True,
                             save_weights_only=False,
                             mode='auto',
                             period=1)
early = EarlyStopping(monitor='val_acc', 
                      min_delta=0,
                      patience=4,
                      verbose=1,
                      mode='auto')

# Train the model 
history = model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // train_generator.batch_size,
    epochs = 10,
    callbacks = [checkpoint, early],
    validation_data = validation_generator,
    validation_steps = validation_generator.samples // validation_generator.batch_size,
)

Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.96783, saving model to ./baseline_cnn_cropped_3layer_dropout.h5
Epoch 2/10

Epoch 00002: val_acc improved from 0.96783 to 0.96866, saving model to ./baseline_cnn_cropped_3layer_dropout.h5
Epoch 3/10

Epoch 00003: val_acc improved from 0.96866 to 0.96905, saving model to ./baseline_cnn_cropped_3layer_dropout.h5
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.96905
Epoch 5/10

Epoch 00005: val_acc did not improve from 0.96905
Epoch 6/10

Epoch 00006: val_acc did not improve from 0.96905
Epoch 7/10

Epoch 00007: val_acc did not improve from 0.96905
Epoch 00007: early stopping


# Evaluate precision on validation data

In [15]:
# load model if needed
from keras.models import load_model

model = load_model('./saved_models/baseline_cnn_cropped_3layer_dropout.h5')

In [11]:
val_preds = model.predict_generator(validation_generator,
                                    steps = validation_generator.samples // validation_generator.batch_size)

print("Precision@1: %.5f" %precision_score(validation_generator.data,
                                           val_preds >= 0.5,
                                           average='micro'))
print(classification_report(validation_generator.data, val_preds >= 0.5, 
                            target_names=y_col))

Precision@1: 0.42742
                        precision    recall  f1-score   support

                   50s       0.00      0.00      0.00        13
                   60s       0.00      0.00      0.00        15
                   70s       0.00      0.00      0.00        11
                   80s       0.00      0.00      0.00         5
                   90s       0.00      0.00      0.00        12
     None_of_the_above       0.00      0.00      0.00         1
        amusement park       0.00      0.00      0.00         5
           androgynous       0.00      0.00      0.00         5
           anniversary       0.00      0.00      0.00         5
           art opening       0.00      0.00      0.00        16
           baby shower       0.00      0.00      0.00         3
    bachelorette party       0.00      0.00      0.00         2
                 basic       0.00      0.00      0.00        15
             bbq party       0.00      0.00      0.00         8
                 b

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
