In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from keras.applications import VGG16
import keras
import boto3
import botocore
import os
import pickle
from matplotlib import pyplot as plt
%matplotlib inline
import errno
import shutil
from scipy.misc import imsave 


Using TensorFlow backend.


In [2]:
zap = pd.read_csv('./zap.csv')

In [3]:
brand_dict = dict(zip(zap.brand_name.unique(),range(0,len(zap.brand_name.unique()))))
shoe_dict = dict(zip(zap.shoe_name.unique(),range(0,len(zap.shoe_name.unique()))))
color_dict = dict(zip(zap.colorway,zap.colorcode))

In [4]:
def get_shoe(shoe_dict,view,s3,BUCKET_NAME='capstoneshoes'):
    KEY = '{}/{}'.format(shoe_dict,view)
    try:
        s3.Bucket(BUCKET_NAME).download_file(KEY, '{}/{}.jpeg'.format(view,shoe_dict))
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise

In [5]:
def get_data(row,s3):
    views = ['lateralfrontleft']
    shoe_dict = row[1]['shoe_hash']
    try:
        for view in views:
            path = os.makedirs('{}'.format(view))
            get_shoe(shoe_dict,view,s3)
            img_arr = np.load('{}/{}.jpeg'.format(view,shoe_dict))
            imsave('{}/{}'.format(view,shoe_dict) + '.jpeg', img_arr)
    except OSError:
        for view in views:
            path = os.walk('{}'.format(view))
            get_shoe(shoe_dict,view,s3)
            img_arr = np.load('{}/{}.jpeg'.format(view,shoe_dict))
            imsave('{}/{}'.format(view,shoe_dict) + '.jpeg', img_arr)
    except:    
        pass

In [6]:
s3 = boto3.resource('s3')
def bucket_connect(x):
    return get_data(x,s3)

In [43]:
import multiprocessing
from multiprocessing.pool import Pool

In [44]:
pool = Pool(processes=multiprocessing.cpu_count())

In [None]:
pool.map(bucket_connect, list(zap.iterrows()))

In [25]:
no_brand = zap['shoe_hash']

In [26]:
X = no_brand.values
y = zap.brand_name.values
skf = StratifiedKFold(n_splits=2)


In [27]:
print skf

StratifiedKFold(n_splits=2, random_state=None, shuffle=False)


In [28]:
for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

('TRAIN:', array([ 3579,  3582,  3584, ..., 26419, 26420, 26421]), 'TEST:', array([    0,     1,     2, ..., 21151, 21154, 21155]))
('TRAIN:', array([    0,     1,     2, ..., 21151, 21154, 21155]), 'TEST:', array([ 3579,  3582,  3584, ..., 26419, 26420, 26421]))


In [7]:
def retrieve(directory):
    file_list = []
    for subdir, dirs, files in os.walk('./{}'.format(directory)):
        for f in files:
            if f != '.DS_Store':
                file_list.append(f)
        return file_list


In [8]:
def move_files(splits,name_folder,train_or_test):
    data = retrieve(name_folder)
    for i in splits:
        brand_val = i.rsplit('_')[0]
        try:
            path = os.makedirs('{}/{}/'.format(train_or_test,brand_val))
        except:
            pass
        try:
            source = './{}/{}'.format(name_folder,i)
            new = '{}/{}/'.format(train_or_test,brand_val)
            shutil.move(source,new)
        except:
            pass

In [10]:
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input, Flatten, Dense
from keras.models import Model
import numpy as np

#Get back the convolutional part of a VGG network trained on ImageNet
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)
model_vgg16_conv.summary()

#Create your own input format (here 3x200x200)
input = Input(shape=(224,224,3),name = 'image_input')

#Use the generated model 
output_vgg16_conv = model_vgg16_conv(input)

#Add the fully-connected layers 
x = Flatten(name='flatten')(output_vgg16_conv)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(8, activation='softmax', name='predictions')(x)

#Create your own model 
my_model = Model(input=input, output=x)

#In the summary, weights and layers from VGG part will be hidden, but they will be fit during the training
my_model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________



In [26]:
features_for_train_data = my_model.predict(X_train)
features_for_test_data = my_model.predict(X_val)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_model.compile(optimizer=adam,
              loss='mse',
              metrics=['accuracy'])

my_model.fit(features_for_train_data, y_train,
          nb_epoch=80,
          batch_size=32,
          validation_data=(features_for_test_data, y_val))

ValueError: Error when checking : expected input_9 to have 4 dimensions, but got array with shape (17700, 9)

In [20]:
# Generate a model with all layers (with top)
vgg16 = VGG16(weights=None, include_top=True)

#Add a layer where input is the output of the  second last layer 
x = Dense(8, activation='softmax', name='predictions')(vgg16.layers[-2].output)

#Then create the corresponding model 
my_model = Model(input=vgg16.input, output=x)
my_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

